diff options
Diffstat (limited to 'thirdparty')
| -rw-r--r-- | thirdparty/README.md | 29 | ||||
| -rw-r--r-- | thirdparty/basis_universal/basisu_comp.cpp | 1206 | ||||
| -rw-r--r-- | thirdparty/basis_universal/basisu_etc.cpp | 1074 | ||||
| -rw-r--r-- | thirdparty/basis_universal/basisu_pvrtc1_4.cpp | 269 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/apg_bmp.c | 541 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/apg_bmp.h | 123 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_astc_decomp.cpp (renamed from thirdparty/basis_universal/basisu_astc_decomp.cpp) | 15 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_astc_decomp.h (renamed from thirdparty/basis_universal/basisu_astc_decomp.h) | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_backend.cpp (renamed from thirdparty/basis_universal/basisu_backend.cpp) | 305 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_backend.h (renamed from thirdparty/basis_universal/basisu_backend.h) | 35 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_basis_file.cpp (renamed from thirdparty/basis_universal/basisu_basis_file.cpp) | 105 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_basis_file.h (renamed from thirdparty/basis_universal/basisu_basis_file.h) | 4 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_bc7enc.cpp | 1984 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_bc7enc.h | 131 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_comp.cpp | 2113 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_comp.h (renamed from thirdparty/basis_universal/basisu_comp.h) | 259 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_enc.cpp (renamed from thirdparty/basis_universal/basisu_enc.cpp) | 837 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_enc.h (renamed from thirdparty/basis_universal/basisu_enc.h) | 399 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_etc.cpp | 1593 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_etc.h (renamed from thirdparty/basis_universal/basisu_etc.h) | 158 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_frontend.cpp (renamed from thirdparty/basis_universal/basisu_frontend.cpp) | 897 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_frontend.h (renamed from thirdparty/basis_universal/basisu_frontend.h) | 46 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_global_selector_palette_helpers.cpp (renamed from thirdparty/basis_universal/basisu_global_selector_palette_helpers.cpp) | 0 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_global_selector_palette_helpers.h (renamed from thirdparty/basis_universal/basisu_global_selector_palette_helpers.h) | 6 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp (renamed from thirdparty/basis_universal/basisu_gpu_texture.cpp) | 625 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_gpu_texture.h (renamed from thirdparty/basis_universal/basisu_gpu_texture.h) | 24 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_kernels_declares.h | 25 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_kernels_imp.h | 584 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp | 161 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_miniz.h | 2514 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp | 564 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h (renamed from thirdparty/basis_universal/basisu_pvrtc1_4.h) | 100 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_resample_filters.cpp (renamed from thirdparty/basis_universal/basisu_resample_filters.cpp) | 24 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_resampler.cpp (renamed from thirdparty/basis_universal/basisu_resampler.cpp) | 0 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_resampler.h (renamed from thirdparty/basis_universal/basisu_resampler.h) | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_resampler_filters.h (renamed from thirdparty/basis_universal/basisu_resampler_filters.h) | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_ssim.cpp (renamed from thirdparty/basis_universal/basisu_ssim.cpp) | 0 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_ssim.h (renamed from thirdparty/basis_universal/basisu_ssim.h) | 0 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp | 4189 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/basisu_uastc_enc.h | 140 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/cppspmd_flow.h | 590 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/cppspmd_math.h | 725 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/cppspmd_math_declares.h | 89 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/cppspmd_sse.h | 2118 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/cppspmd_type_aliases.h | 47 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/jpgd.cpp | 3241 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/jpgd.h | 347 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/lodepng.cpp (renamed from thirdparty/basis_universal/lodepng.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/encoder/lodepng.h (renamed from thirdparty/basis_universal/lodepng.h) | 0 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu.h | 152 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_containers.h | 1908 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_containers_impl.h | 311 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_file_headers.h | 45 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_global_selector_cb.h | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_global_selector_palette.h | 8 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder.cpp | 9646 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder.h | 758 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h | 84 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_astc.inc | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_astc_0_255.inc | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_atc_55.inc | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_atc_56.inc | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m5_color.inc | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m6.inc | 4383 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_pvrtc2_45.inc | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc | 2 | ||||
| -rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h | 297 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_filter.cpp | 56 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_for.cpp | 48 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_for_for.cpp | 63 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_for_for_prefix_sum.cpp | 85 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_map.cpp | 47 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_partition.cpp | 53 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.cpp | 48 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_reduce.cpp | 49 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_set.cpp | 43 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/algorithms/parallel_sort.cpp | 50 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/math/AVX2NEON.h | 986 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/math/SSE2NEON.h | 1753 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/math/constants.cpp | 61 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/common/tasking/taskschedulergcd.h | 49 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/builders/primrefgen.h | 28 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/common/instance_stack.h | 199 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/common/scene_curves.h | 341 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h | 21 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h | 21 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h | 21 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h | 21 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h | 21 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h | 22 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h | 493 | ||||
| -rw-r--r-- | thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h | 508 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_any_of.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_any_of.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_filter.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_filter.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_for.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_for.h) | 73 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_for_for.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_for_for.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_for_for_prefix_sum.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_for_for_prefix_sum.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_map.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_map.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_partition.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_partition.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_prefix_sum.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_reduce.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_reduce.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_set.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_set.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/algorithms/parallel_sort.h (renamed from thirdparty/embree-aarch64/common/algorithms/parallel_sort.h) | 9 | ||||
| -rw-r--r-- | thirdparty/embree/common/lexers/parsestream.h (renamed from thirdparty/embree-aarch64/common/lexers/parsestream.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/lexers/stream.h (renamed from thirdparty/embree-aarch64/common/lexers/stream.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/lexers/streamfilters.h (renamed from thirdparty/embree-aarch64/common/lexers/streamfilters.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/lexers/stringstream.cpp (renamed from thirdparty/embree-aarch64/common/lexers/stringstream.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/lexers/stringstream.h (renamed from thirdparty/embree-aarch64/common/lexers/stringstream.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/lexers/tokenstream.cpp (renamed from thirdparty/embree-aarch64/common/lexers/tokenstream.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/lexers/tokenstream.h (renamed from thirdparty/embree-aarch64/common/lexers/tokenstream.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/affinespace.h (renamed from thirdparty/embree-aarch64/common/math/affinespace.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/bbox.h (renamed from thirdparty/embree-aarch64/common/math/bbox.h) | 8 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/col3.h (renamed from thirdparty/embree-aarch64/common/math/col3.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/col4.h (renamed from thirdparty/embree-aarch64/common/math/col4.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/color.h (renamed from thirdparty/embree-aarch64/common/math/color.h) | 44 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/constants.cpp | 27 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/constants.h (renamed from thirdparty/embree-aarch64/common/math/constants.h) | 60 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/interval.h (renamed from thirdparty/embree-aarch64/common/math/interval.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/lbbox.h (renamed from thirdparty/embree-aarch64/common/math/lbbox.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/linearspace2.h (renamed from thirdparty/embree-aarch64/common/math/linearspace2.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/linearspace3.h (renamed from thirdparty/embree-aarch64/common/math/linearspace3.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/math.h (renamed from thirdparty/embree-aarch64/common/math/math.h) | 140 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/obbox.h (renamed from thirdparty/embree-aarch64/common/math/obbox.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/quaternion.h (renamed from thirdparty/embree-aarch64/common/math/quaternion.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/range.h (renamed from thirdparty/embree-aarch64/common/math/range.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/transcendental.h (renamed from thirdparty/embree-aarch64/common/math/transcendental.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/vec2.h (renamed from thirdparty/embree-aarch64/common/math/vec2.h) | 8 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/vec2fa.h (renamed from thirdparty/embree-aarch64/common/math/vec2fa.h) | 28 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/vec3.h (renamed from thirdparty/embree-aarch64/common/math/vec3.h) | 26 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/vec3ba.h (renamed from thirdparty/embree-aarch64/common/math/vec3ba.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/vec3fa.h (renamed from thirdparty/embree-aarch64/common/math/vec3fa.h) | 129 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/vec3ia.h (renamed from thirdparty/embree-aarch64/common/math/vec3ia.h) | 40 | ||||
| -rw-r--r-- | thirdparty/embree/common/math/vec4.h (renamed from thirdparty/embree-aarch64/common/math/vec4.h) | 23 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/arm/emulation.h | 50 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/arm/sse2neon.h | 6996 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/avx.h (renamed from thirdparty/embree-aarch64/common/simd/avx.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/avx512.h (renamed from thirdparty/embree-aarch64/common/simd/avx512.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/simd.h (renamed from thirdparty/embree-aarch64/common/simd/simd.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/sse.cpp (renamed from thirdparty/embree-aarch64/common/simd/sse.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/sse.h (renamed from thirdparty/embree-aarch64/common/simd/sse.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/varying.h (renamed from thirdparty/embree-aarch64/common/simd/varying.h) | 71 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vboold4_avx.h (renamed from thirdparty/embree-aarch64/common/simd/vboold4_avx.h) | 29 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vboold4_avx512.h (renamed from thirdparty/embree-aarch64/common/simd/vboold4_avx512.h) | 18 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vboold8_avx512.h (renamed from thirdparty/embree-aarch64/common/simd/vboold8_avx512.h) | 31 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vboolf16_avx512.h (renamed from thirdparty/embree-aarch64/common/simd/vboolf16_avx512.h) | 31 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vboolf4_avx512.h (renamed from thirdparty/embree-aarch64/common/simd/vboolf4_avx512.h) | 18 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vboolf4_sse2.h (renamed from thirdparty/embree-aarch64/common/simd/vboolf4_sse2.h) | 51 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vboolf8_avx.h (renamed from thirdparty/embree-aarch64/common/simd/vboolf8_avx.h) | 23 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vboolf8_avx512.h (renamed from thirdparty/embree-aarch64/common/simd/vboolf8_avx512.h) | 18 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vdouble4_avx.h (renamed from thirdparty/embree-aarch64/common/simd/vdouble4_avx.h) | 39 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vdouble8_avx512.h (renamed from thirdparty/embree-aarch64/common/simd/vdouble8_avx512.h) | 39 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vfloat16_avx512.h (renamed from thirdparty/embree-aarch64/common/simd/vfloat16_avx512.h) | 206 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vfloat4_sse2.h (renamed from thirdparty/embree-aarch64/common/simd/vfloat4_sse2.h) | 425 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vfloat8_avx.h (renamed from thirdparty/embree-aarch64/common/simd/vfloat8_avx.h) | 231 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vint16_avx512.h (renamed from thirdparty/embree-aarch64/common/simd/vint16_avx512.h) | 56 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vint4_sse2.h (renamed from thirdparty/embree-aarch64/common/simd/vint4_sse2.h) | 243 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vint8_avx.h (renamed from thirdparty/embree-aarch64/common/simd/vint8_avx.h) | 98 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vint8_avx2.h (renamed from thirdparty/embree-aarch64/common/simd/vint8_avx2.h) | 68 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vllong4_avx2.h (renamed from thirdparty/embree-aarch64/common/simd/vllong4_avx2.h) | 40 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vllong8_avx512.h (renamed from thirdparty/embree-aarch64/common/simd/vllong8_avx512.h) | 59 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vuint16_avx512.h (renamed from thirdparty/embree-aarch64/common/simd/vuint16_avx512.h) | 55 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vuint4_sse2.h (renamed from thirdparty/embree-aarch64/common/simd/vuint4_sse2.h) | 149 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vuint8_avx.h (renamed from thirdparty/embree-aarch64/common/simd/vuint8_avx.h) | 99 | ||||
| -rw-r--r-- | thirdparty/embree/common/simd/vuint8_avx2.h (renamed from thirdparty/embree-aarch64/common/simd/vuint8_avx2.h) | 67 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/alloc.cpp (renamed from thirdparty/embree-aarch64/common/sys/alloc.cpp) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/alloc.h (renamed from thirdparty/embree-aarch64/common/sys/alloc.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/array.h (renamed from thirdparty/embree-aarch64/common/sys/array.h) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/atomic.h (renamed from thirdparty/embree-aarch64/common/sys/atomic.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/barrier.cpp (renamed from thirdparty/embree-aarch64/common/sys/barrier.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/barrier.h (renamed from thirdparty/embree-aarch64/common/sys/barrier.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/condition.cpp (renamed from thirdparty/embree-aarch64/common/sys/condition.cpp) | 16 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/condition.h (renamed from thirdparty/embree-aarch64/common/sys/condition.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/filename.cpp (renamed from thirdparty/embree-aarch64/common/sys/filename.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/filename.h (renamed from thirdparty/embree-aarch64/common/sys/filename.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/intrinsics.h (renamed from thirdparty/embree-aarch64/common/sys/intrinsics.h) | 346 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/library.cpp (renamed from thirdparty/embree-aarch64/common/sys/library.cpp) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/library.h (renamed from thirdparty/embree-aarch64/common/sys/library.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/mutex.cpp (renamed from thirdparty/embree-aarch64/common/sys/mutex.cpp) | 3 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/mutex.h (renamed from thirdparty/embree-aarch64/common/sys/mutex.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/platform.h (renamed from thirdparty/embree-aarch64/common/sys/platform.h) | 31 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/ref.h (renamed from thirdparty/embree-aarch64/common/sys/ref.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/regression.cpp (renamed from thirdparty/embree-aarch64/common/sys/regression.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/regression.h (renamed from thirdparty/embree-aarch64/common/sys/regression.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/string.cpp (renamed from thirdparty/embree-aarch64/common/sys/string.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/string.h (renamed from thirdparty/embree-aarch64/common/sys/string.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/sysinfo.cpp (renamed from thirdparty/embree-aarch64/common/sys/sysinfo.cpp) | 160 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/sysinfo.h (renamed from thirdparty/embree-aarch64/common/sys/sysinfo.h) | 34 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/thread.cpp (renamed from thirdparty/embree-aarch64/common/sys/thread.cpp) | 115 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/thread.h (renamed from thirdparty/embree-aarch64/common/sys/thread.h) | 5 | ||||
| -rw-r--r-- | thirdparty/embree/common/sys/vector.h (renamed from thirdparty/embree-aarch64/common/sys/vector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/tasking/taskscheduler.h (renamed from thirdparty/embree-aarch64/common/tasking/taskscheduler.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/common/tasking/taskschedulerinternal.cpp (renamed from thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.cpp) | 16 | ||||
| -rw-r--r-- | thirdparty/embree/common/tasking/taskschedulerinternal.h (renamed from thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.h) | 13 | ||||
| -rw-r--r-- | thirdparty/embree/common/tasking/taskschedulerppl.h (renamed from thirdparty/embree-aarch64/common/tasking/taskschedulerppl.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/common/tasking/taskschedulertbb.h (renamed from thirdparty/embree-aarch64/common/tasking/taskschedulertbb.h) | 8 | ||||
| -rw-r--r-- | thirdparty/embree/include/embree3/rtcore.h (renamed from thirdparty/embree-aarch64/include/embree3/rtcore.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/include/embree3/rtcore_buffer.h (renamed from thirdparty/embree-aarch64/include/embree3/rtcore_buffer.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/include/embree3/rtcore_builder.h (renamed from thirdparty/embree-aarch64/include/embree3/rtcore_builder.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/include/embree3/rtcore_common.h (renamed from thirdparty/embree-aarch64/include/embree3/rtcore_common.h) | 26 | ||||
| -rw-r--r-- | thirdparty/embree/include/embree3/rtcore_config.h (renamed from thirdparty/embree-aarch64/include/embree3/rtcore_config.h) | 10 | ||||
| -rw-r--r-- | thirdparty/embree/include/embree3/rtcore_device.h (renamed from thirdparty/embree-aarch64/include/embree3/rtcore_device.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/include/embree3/rtcore_geometry.h (renamed from thirdparty/embree-aarch64/include/embree3/rtcore_geometry.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/include/embree3/rtcore_quaternion.h (renamed from thirdparty/embree-aarch64/include/embree3/rtcore_quaternion.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/include/embree3/rtcore_ray.h (renamed from thirdparty/embree-aarch64/include/embree3/rtcore_ray.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/include/embree3/rtcore_scene.h (renamed from thirdparty/embree-aarch64/include/embree3/rtcore_scene.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/bvh_builder_hair.h (renamed from thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/bvh_builder_morton.h (renamed from thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/bvh_builder_msmblur.h (renamed from thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/bvh_builder_msmblur_hair.h (renamed from thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/bvh_builder_sah.h (renamed from thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/heuristic_binning.h (renamed from thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h) | 478 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/heuristic_binning_array_aligned.h (renamed from thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h) | 9 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/heuristic_binning_array_unaligned.h (renamed from thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/heuristic_openmerge_array.h (renamed from thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/heuristic_spatial.h (renamed from thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/heuristic_spatial_array.h (renamed from thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h) | 8 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/heuristic_strand_array.h (renamed from thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/heuristic_timesplit_array.h (renamed from thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/priminfo.h (renamed from thirdparty/embree-aarch64/kernels/builders/priminfo.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/primrefgen.cpp (renamed from thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp) | 156 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/primrefgen.h | 34 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/primrefgen_presplit.h (renamed from thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/builders/splitter.h (renamed from thirdparty/embree-aarch64/kernels/builders/splitter.h) | 24 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh.cpp) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh4_factory.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp) | 334 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh4_factory.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh8_factory.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp) | 284 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh8_factory.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_builder.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_builder.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h) | 3 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_builder_morton.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_builder_sah.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp) | 119 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_builder_sah_mb.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_builder_twolevel.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp) | 10 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_builder_twolevel.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_builder_twolevel_internal.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_collider.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_collider.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_factory.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_intersector1.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp) | 29 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_intersector1.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h) | 5 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_intersector1_bvh4.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp) | 20 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h) | 5 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_intersector_stream.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h) | 115 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_intersector_stream_filters.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_node_aabb.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_node_aabb_mb.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_node_aabb_mb4d.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_node_base.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_node_obb.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_node_obb_mb.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_node_qaabb.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_node_ref.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_refit.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_refit.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_rotate.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_rotate.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_statistics.cpp (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_statistics.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_traverser1.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h) | 242 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/bvh_traverser_stream.h (renamed from thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h) | 21 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/node_intersector.h (renamed from thirdparty/embree-aarch64/kernels/bvh/node_intersector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/node_intersector1.h (renamed from thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h) | 651 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/node_intersector_frustum.h (renamed from thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h) | 108 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/node_intersector_packet.h (renamed from thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h) | 94 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/bvh/node_intersector_packet_stream.h (renamed from thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h) | 98 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/accel.h (renamed from thirdparty/embree-aarch64/kernels/common/accel.h) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/accelinstance.h (renamed from thirdparty/embree-aarch64/kernels/common/accelinstance.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/acceln.cpp (renamed from thirdparty/embree-aarch64/kernels/common/acceln.cpp) | 8 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/acceln.h (renamed from thirdparty/embree-aarch64/kernels/common/acceln.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/accelset.cpp (renamed from thirdparty/embree-aarch64/kernels/common/accelset.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/accelset.h (renamed from thirdparty/embree-aarch64/kernels/common/accelset.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/alloc.cpp (renamed from thirdparty/embree-aarch64/kernels/common/alloc.cpp) | 5 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/alloc.h (renamed from thirdparty/embree-aarch64/kernels/common/alloc.h) | 76 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/buffer.h (renamed from thirdparty/embree-aarch64/kernels/common/buffer.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/builder.h (renamed from thirdparty/embree-aarch64/kernels/common/builder.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/context.h (renamed from thirdparty/embree-aarch64/kernels/common/context.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/default.h (renamed from thirdparty/embree-aarch64/kernels/common/default.h) | 7 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/device.cpp (renamed from thirdparty/embree-aarch64/kernels/common/device.cpp) | 27 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/device.h (renamed from thirdparty/embree-aarch64/kernels/common/device.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/geometry.cpp (renamed from thirdparty/embree-aarch64/kernels/common/geometry.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/geometry.h (renamed from thirdparty/embree-aarch64/kernels/common/geometry.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/hit.h (renamed from thirdparty/embree-aarch64/kernels/common/hit.h) | 10 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/instance_stack.h | 179 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/isa.h (renamed from thirdparty/embree-aarch64/kernels/common/isa.h) | 137 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/motion_derivative.h (renamed from thirdparty/embree-aarch64/kernels/common/motion_derivative.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/point_query.h (renamed from thirdparty/embree-aarch64/kernels/common/point_query.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/primref.h (renamed from thirdparty/embree-aarch64/kernels/common/primref.h) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/primref_mb.h (renamed from thirdparty/embree-aarch64/kernels/common/primref_mb.h) | 12 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/profile.h (renamed from thirdparty/embree-aarch64/kernels/common/profile.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/ray.h (renamed from thirdparty/embree-aarch64/kernels/common/ray.h) | 28 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/rtcore.cpp (renamed from thirdparty/embree-aarch64/kernels/common/rtcore.cpp) | 87 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/rtcore.h (renamed from thirdparty/embree-aarch64/kernels/common/rtcore.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/rtcore_builder.cpp (renamed from thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene.cpp (renamed from thirdparty/embree-aarch64/kernels/common/scene.cpp) | 153 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene.h (renamed from thirdparty/embree-aarch64/kernels/common/scene.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene_curves.h | 688 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene_grid_mesh.h (renamed from thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h) | 83 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene_instance.h (renamed from thirdparty/embree-aarch64/kernels/common/scene_instance.h) | 10 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene_line_segments.h (renamed from thirdparty/embree-aarch64/kernels/common/scene_line_segments.h) | 40 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene_points.h (renamed from thirdparty/embree-aarch64/kernels/common/scene_points.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene_quad_mesh.h (renamed from thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h) | 62 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene_subdiv_mesh.h (renamed from thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h) | 8 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene_triangle_mesh.cpp (renamed from thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp) | 61 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene_triangle_mesh.h (renamed from thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h) | 58 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/scene_user_geometry.h (renamed from thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/stack_item.h (renamed from thirdparty/embree-aarch64/kernels/common/stack_item.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/stat.cpp (renamed from thirdparty/embree-aarch64/kernels/common/stat.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/stat.h (renamed from thirdparty/embree-aarch64/kernels/common/stat.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/state.cpp (renamed from thirdparty/embree-aarch64/kernels/common/state.cpp) | 44 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/state.h (renamed from thirdparty/embree-aarch64/kernels/common/state.h) | 3 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/common/vector.h (renamed from thirdparty/embree-aarch64/kernels/common/vector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/config.h (renamed from thirdparty/embree-aarch64/kernels/config.h) | 0 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/cone.h (renamed from thirdparty/embree-aarch64/kernels/geometry/cone.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/coneline_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h) | 14 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/conelinei_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h) | 42 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curveNi.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curveNi.h) | 130 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curveNi_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curveNi_mb.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h) | 164 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curveNi_mb_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curveNv.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curveNv.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curveNv_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curve_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h) | 10 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curve_intersector_distance.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curve_intersector_oriented.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curve_intersector_precalculations.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curve_intersector_ribbon.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h) | 14 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curve_intersector_sweep.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/curve_intersector_virtual.h (renamed from thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h) | 210 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/cylinder.h (renamed from thirdparty/embree-aarch64/kernels/geometry/cylinder.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/disc_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h) | 10 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/disci_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h) | 114 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/filter.h (renamed from thirdparty/embree-aarch64/kernels/geometry/filter.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/grid_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/grid_soa.h (renamed from thirdparty/embree-aarch64/kernels/geometry/grid_soa.h) | 14 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/grid_soa_intersector1.h (renamed from thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/grid_soa_intersector_packet.h (renamed from thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h) | 10 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/instance.h (renamed from thirdparty/embree-aarch64/kernels/geometry/instance.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/instance_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/intersector_epilog.h (renamed from thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h) | 141 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/intersector_iterators.h (renamed from thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h) | 18 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/line_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/line_intersector.h) | 16 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/linei.h (renamed from thirdparty/embree-aarch64/kernels/geometry/linei.h) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/linei_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h) | 42 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/object.h (renamed from thirdparty/embree-aarch64/kernels/geometry/object.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/object_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/object_intersector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/plane.h (renamed from thirdparty/embree-aarch64/kernels/geometry/plane.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/pointi.h (renamed from thirdparty/embree-aarch64/kernels/geometry/pointi.h) | 7 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/primitive.h (renamed from thirdparty/embree-aarch64/kernels/geometry/primitive.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/primitive4.cpp (renamed from thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/quad_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/quad_intersector_moeller.h (renamed from thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h) | 162 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/quad_intersector_pluecker.h (renamed from thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h) | 139 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/quadi.h (renamed from thirdparty/embree-aarch64/kernels/geometry/quadi.h) | 12 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/quadi_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h) | 10 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/quadv.h (renamed from thirdparty/embree-aarch64/kernels/geometry/quadv.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/quadv_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/roundline_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h) | 35 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/roundlinei_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h) | 57 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/sphere_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/spherei_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h) | 58 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/subdivpatch1.h (renamed from thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/subdivpatch1_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h) | 52 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/subgrid.h (renamed from thirdparty/embree-aarch64/kernels/geometry/subgrid.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/subgrid_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h) | 73 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/subgrid_intersector_moeller.h | 382 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/subgrid_intersector_pluecker.h | 367 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/subgrid_mb_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h) | 38 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/triangle.h (renamed from thirdparty/embree-aarch64/kernels/geometry/triangle.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/triangle_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h) | 22 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/triangle_intersector_moeller.h (renamed from thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h) | 244 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/triangle_intersector_pluecker.h (renamed from thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h) | 250 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/triangle_intersector_woop.h (renamed from thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/triangle_triangle_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/trianglei.h (renamed from thirdparty/embree-aarch64/kernels/geometry/trianglei.h) | 10 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/trianglei_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h) | 90 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/trianglev.h (renamed from thirdparty/embree-aarch64/kernels/geometry/trianglev.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/trianglev_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h) | 42 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/trianglev_mb.h (renamed from thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/geometry/trianglev_mb_intersector.h (renamed from thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h) | 98 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/hash.h (renamed from thirdparty/embree-aarch64/kernels/hash.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/bezier_curve.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/bezier_curve.h) | 6 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/bezier_patch.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/bezier_patch.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/bilinear_patch.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/bilinear_patch.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/bspline_curve.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/bspline_curve.h) | 7 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/bspline_patch.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/catmullclark_coefficients.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/catmullclark_coefficients.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/catmullclark_patch.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/catmullclark_patch.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/catmullclark_ring.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/catmullclark_ring.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/catmullrom_curve.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/catmullrom_curve.h) | 7 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/feature_adaptive_eval.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/feature_adaptive_eval_grid.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_grid.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/feature_adaptive_eval_simd.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_simd.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/gregory_patch.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/gregory_patch.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/gregory_patch_dense.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/gregory_patch_dense.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/gridrange.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/gridrange.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/half_edge.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/half_edge.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/hermite_curve.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/hermite_curve.h) | 3 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/linear_bezier_patch.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/linear_bezier_patch.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/patch.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/patch.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/patch_eval.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/patch_eval.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/patch_eval_grid.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/patch_eval_grid.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/patch_eval_simd.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/patch_eval_simd.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/subdivpatch1base.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/subdivpatch1base.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/tessellation.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/tessellation.h) | 2 | ||||
| -rw-r--r-- | thirdparty/embree/kernels/subdiv/tessellation_cache.h (renamed from thirdparty/embree-aarch64/kernels/subdiv/tessellation_cache.h) | 4 | ||||
| -rw-r--r-- | thirdparty/embree/patches/godot-changes-android.patch | 103 | ||||
| -rw-r--r-- | thirdparty/embree/patches/godot-changes-misc.patch | 105 | ||||
| -rw-r--r-- | thirdparty/embree/patches/godot-changes-noexcept.patch (renamed from thirdparty/embree-aarch64/patches/godot-changes.patch) | 170 | ||||
| -rw-r--r-- | thirdparty/embree/patches/godot-changes-ubsan.patch | 24 | ||||
| -rw-r--r-- | thirdparty/enet/godot.cpp | 52 | ||||
| -rw-r--r-- | thirdparty/fonts/OpenSans_SemiBold.ttf | bin | 0 -> 100820 bytes | |||
| -rw-r--r-- | thirdparty/meshoptimizer/meshoptimizer.h | 5 | ||||
| -rw-r--r-- | thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch | 262 | ||||
| -rw-r--r-- | thirdparty/meshoptimizer/simplifier.cpp | 168 | ||||
| -rw-r--r-- | thirdparty/misc/patches/polypartition-godot-types.patch | 14 | ||||
| -rw-r--r-- | thirdparty/misc/polypartition.cpp | 10 | ||||
| -rw-r--r-- | thirdparty/misc/smolv.cpp | 2108 | ||||
| -rw-r--r-- | thirdparty/misc/smolv.h | 169 | ||||
| -rw-r--r-- | thirdparty/oidn/core/transfer_function.cpp | 107 | ||||
| -rw-r--r-- | thirdparty/oidn/patches/godot-changes-c58c5216.patch | 68 |
426 files changed, 52549 insertions, 20562 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md index 605b298ac1..03a2ddf5e4 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -8,13 +8,12 @@ readability. ## basis_universal - Upstream: https://github.com/BinomialLLC/basis_universal -- Version: git (895ee8ee7e04f22267f8d16d46de04d5a01d63ac, 2020) +- Version: git (ba1c3e40f1d434ebaf9a167b44e9b11d2bf0f765, 2021) - License: Apache 2.0 Files extracted from upstream source: -- `.cpp` and `.h` files in root folder except for `basisu_tool.cpp` (contains `main` and can cause link error) -- `.cpp`, `.h` and `.inc` files in `transcoder/`, keeping folder structure +- `encoder/` and `transcoder/` folders - `LICENSE` @@ -62,10 +61,10 @@ Files extracted from upstream source: Extracted from .zip provided. Extracted license and header only. -## embree-aarch64 +## embree -- Upstream: https://github.com/lighttransport/embree-aarch64 -- Version: 3.12.1 (6ef362f99af80c9dfe8dd2bfc582d9067897edc6, 2020) +- Upstream: https://github.com/embree/embree +- Version: 3.13.0 (7c53133eb21424f7f0ae1e25bf357e358feaf6ab, 2021) - License: Apache 2.0 Files extracted from upstream: @@ -73,8 +72,8 @@ Files extracted from upstream: - All cpp files listed in `modules/raycast/godot_update_embree.py` - All header files in the directories listed in `modules/raycast/godot_update_embree.py` -The `modules/raycast/godot_update_embree.py`script can be used to pull the -relevant files from the latest Embree-aarch64 release and apply some automatic changes. +The `modules/raycast/godot_update_embree.py` script can be used to pull the +relevant files from the latest Embree release and apply some automatic changes. Some changes have been made in order to remove exceptions and fix minor build errors. They are marked with `// -- GODOT start --` and `// -- GODOT end --` @@ -138,6 +137,10 @@ Files extracted from upstream source: * Upstream: https://android.googlesource.com/platform/frameworks/base/+/master/data/fonts/ * Version: ? (pre-2014 commit when DroidSansJapanese.ttf was obsoleted) * License: Apache 2.0 +- `OpenSans_SemiBold.ttf`: + * Upstream: https://fonts.google.com/specimen/Open+Sans + * Version: 1.10 (downloaded from Google Fonts in February 2021) + * License: Apache 2.0 - `Tamsyn*.png`: * Upstream: http://www.fial.com/~scott/tamsyn-font/ * Version: 1.11 (2015) @@ -364,7 +367,7 @@ File extracted from upstream release tarball: ## meshoptimizer - Upstream: https://github.com/zeux/meshoptimizer -- Version: 0.16 (95893c0566646434dd675b708d293fcb2d526d08, 2021) +- Version: git (f5d83e879c48f8664783a69b4f50711d27549b66, 2021) - License: MIT Files extracted from upstream repository: @@ -372,6 +375,9 @@ Files extracted from upstream repository: - All files in `src/`. - `LICENSE.md`. +An [experimental upstream feature](https://github.com/zeux/meshoptimizer/tree/simplify-attr), +has been backported, see patch in `patches` directory. + ## miniupnpc @@ -463,6 +469,10 @@ Collection of single-file libraries used in Godot components. * Version: git (2f625846a775501fb69456567409a8b12f10ea25, 2012) * License: BSD-3-Clause * Modifications: use `const char*` instead of `char*` for input string +- `smolv.h` + * Upstream: https://github.com/aras-p/smol-v + * Version: git (4b52c165c13763051a18e80ffbc2ee436314ceb2, 2020) + * License: Public Domain or MIT - `stb_rect_pack.h` * Upstream: https://github.com/nothings/stb * Version: 1.00 (2bb4a0accd4003c1db4c24533981e01b1adfd656, 2019) @@ -725,3 +735,4 @@ Files extracted from upstream source: - lib/{common/,compress/,decompress/,zstd.h} - LICENSE + diff --git a/thirdparty/basis_universal/basisu_comp.cpp b/thirdparty/basis_universal/basisu_comp.cpp deleted file mode 100644 index 1e4679311c..0000000000 --- a/thirdparty/basis_universal/basisu_comp.cpp +++ /dev/null @@ -1,1206 +0,0 @@ -// basisu_comp.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "basisu_comp.h" -#include "basisu_enc.h" -#include <unordered_set> - -#define BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN 0 -#define DEBUG_CROP_TEXTURE_TO_64x64 (0) -#define DEBUG_RESIZE_TEXTURE (0) -#define DEBUG_EXTRACT_SINGLE_BLOCK (0) - -namespace basisu -{ - basis_compressor::basis_compressor() : - m_total_blocks(0), - m_auto_global_sel_pal(false), - m_basis_file_size(0), - m_basis_bits_per_texel(0), - m_any_source_image_has_alpha(false) - { - debug_printf("basis_compressor::basis_compressor\n"); - } - - bool basis_compressor::init(const basis_compressor_params ¶ms) - { - debug_printf("basis_compressor::init\n"); - - m_params = params; - - if (m_params.m_debug) - { - debug_printf("basis_compressor::init:\n"); - -#define PRINT_BOOL_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast<int>(m_params.v), m_params.v.was_changed()); -#define PRINT_INT_VALUE(v) debug_printf("%s: %i %u\n", BASISU_STRINGIZE2(v), static_cast<int>(m_params.v), m_params.v.was_changed()); -#define PRINT_UINT_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast<uint32_t>(m_params.v), m_params.v.was_changed()); -#define PRINT_FLOAT_VALUE(v) debug_printf("%s: %f %u\n", BASISU_STRINGIZE2(v), static_cast<float>(m_params.v), m_params.v.was_changed()); - - debug_printf("Has global selector codebook: %i\n", m_params.m_pSel_codebook != nullptr); - - debug_printf("Source images: %u, source filenames: %u, source alpha filenames: %i\n", - (uint32_t)m_params.m_source_images.size(), (uint32_t)m_params.m_source_filenames.size(), (uint32_t)m_params.m_source_alpha_filenames.size()); - - PRINT_BOOL_VALUE(m_y_flip); - PRINT_BOOL_VALUE(m_debug); - PRINT_BOOL_VALUE(m_debug_images); - PRINT_BOOL_VALUE(m_global_sel_pal); - PRINT_BOOL_VALUE(m_auto_global_sel_pal); - PRINT_BOOL_VALUE(m_compression_level); - PRINT_BOOL_VALUE(m_no_hybrid_sel_cb); - PRINT_BOOL_VALUE(m_perceptual); - PRINT_BOOL_VALUE(m_no_endpoint_rdo); - PRINT_BOOL_VALUE(m_no_selector_rdo); - PRINT_BOOL_VALUE(m_read_source_images); - PRINT_BOOL_VALUE(m_write_output_basis_files); - PRINT_BOOL_VALUE(m_compute_stats); - PRINT_BOOL_VALUE(m_check_for_alpha) - PRINT_BOOL_VALUE(m_force_alpha) - PRINT_BOOL_VALUE(m_seperate_rg_to_color_alpha); - PRINT_BOOL_VALUE(m_multithreading); - PRINT_BOOL_VALUE(m_disable_hierarchical_endpoint_codebooks); - - PRINT_FLOAT_VALUE(m_hybrid_sel_cb_quality_thresh); - - PRINT_INT_VALUE(m_global_pal_bits); - PRINT_INT_VALUE(m_global_mod_bits); - - PRINT_FLOAT_VALUE(m_endpoint_rdo_thresh); - PRINT_FLOAT_VALUE(m_selector_rdo_thresh); - - PRINT_BOOL_VALUE(m_mip_gen); - PRINT_BOOL_VALUE(m_mip_renormalize); - PRINT_BOOL_VALUE(m_mip_wrapping); - PRINT_BOOL_VALUE(m_mip_srgb); - PRINT_FLOAT_VALUE(m_mip_premultiplied); - PRINT_FLOAT_VALUE(m_mip_scale); - PRINT_INT_VALUE(m_mip_smallest_dimension); - debug_printf("m_mip_filter: %s\n", m_params.m_mip_filter.c_str()); - - debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_max_endpoint_clusters); - debug_printf("m_max_selector_clusters: %u\n", m_params.m_max_selector_clusters); - debug_printf("m_quality_level: %i\n", m_params.m_quality_level); - - debug_printf("m_tex_type: %u\n", m_params.m_tex_type); - debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1); - debug_printf("m_us_per_frame: %i (%f fps)\n", m_params.m_us_per_frame, m_params.m_us_per_frame ? 1.0f / (m_params.m_us_per_frame / 1000000.0f) : 0); - -#undef PRINT_BOOL_VALUE -#undef PRINT_INT_VALUE -#undef PRINT_UINT_VALUE -#undef PRINT_FLOAT_VALUE - } - - if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size())) - { - assert(0); - return false; - } - - return true; - } - - basis_compressor::error_code basis_compressor::process() - { - debug_printf("basis_compressor::process\n"); - - if (!read_source_images()) - return cECFailedReadingSourceImages; - - if (!validate_texture_type_constraints()) - return cECFailedValidating; - - if (!process_frontend()) - return cECFailedFrontEnd; - - if (!extract_frontend_texture_data()) - return cECFailedFontendExtract; - - if (!process_backend()) - return cECFailedBackend; - - if (!create_basis_file_and_transcode()) - return cECFailedCreateBasisFile; - - if (!write_output_files_and_compute_stats()) - return cECFailedWritingOutput; - - return cECSuccess; - } - - bool basis_compressor::generate_mipmaps(const image &img, std::vector<image> &mips, bool has_alpha) - { - debug_printf("basis_compressor::generate_mipmaps\n"); - - uint32_t total_levels = 1; - uint32_t w = img.get_width(), h = img.get_height(); - while (maximum<uint32_t>(w, h) > (uint32_t)m_params.m_mip_smallest_dimension) - { - w = maximum(w >> 1U, 1U); - h = maximum(h >> 1U, 1U); - total_levels++; - } - -#if BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN - // Requires stb_image_resize - stbir_filter filter = STBIR_FILTER_DEFAULT; - if (m_params.m_mip_filter == "box") - filter = STBIR_FILTER_BOX; - else if (m_params.m_mip_filter == "triangle") - filter = STBIR_FILTER_TRIANGLE; - else if (m_params.m_mip_filter == "cubic") - filter = STBIR_FILTER_CUBICBSPLINE; - else if (m_params.m_mip_filter == "catmull") - filter = STBIR_FILTER_CATMULLROM; - else if (m_params.m_mip_filter == "mitchell") - filter = STBIR_FILTER_MITCHELL; - - for (uint32_t level = 1; level < total_levels; level++) - { - const uint32_t level_width = maximum<uint32_t>(1, img.get_width() >> level); - const uint32_t level_height = maximum<uint32_t>(1, img.get_height() >> level); - - image &level_img = *enlarge_vector(mips, 1); - level_img.resize(level_width, level_height); - - int result = stbir_resize_uint8_generic( - (const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba), - (uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba), - has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0, - m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, - nullptr); - - if (result == 0) - { - error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n"); - return false; - } - - if (m_params.m_mip_renormalize) - level_img.renormalize_normal_map(); - } -#else - for (uint32_t level = 1; level < total_levels; level++) - { - const uint32_t level_width = maximum<uint32_t>(1, img.get_width() >> level); - const uint32_t level_height = maximum<uint32_t>(1, img.get_height() >> level); - - image &level_img = *enlarge_vector(mips, 1); - level_img.resize(level_width, level_height); - - bool status = image_resample(img, level_img, m_params.m_mip_srgb, m_params.m_mip_filter.c_str(), m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3); - if (!status) - { - error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n"); - return false; - } - - if (m_params.m_mip_renormalize) - level_img.renormalize_normal_map(); - } -#endif - - return true; - } - - bool basis_compressor::read_source_images() - { - debug_printf("basis_compressor::read_source_images\n"); - - const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : (uint32_t)m_params.m_source_images.size(); - if (!total_source_files) - return false; - - m_stats.resize(0); - m_slice_descs.resize(0); - m_slice_images.resize(0); - - m_total_blocks = 0; - uint32_t total_macroblocks = 0; - - m_any_source_image_has_alpha = false; - - std::vector<image> source_images; - std::vector<std::string> source_filenames; - - // First load all source images, and determine if any have an alpha channel. - for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) - { - const char *pSource_filename = ""; - - image file_image; - - if (m_params.m_read_source_images) - { - pSource_filename = m_params.m_source_filenames[source_file_index].c_str(); - - // Load the source image - if (!load_png(pSource_filename, file_image)) - { - error_printf("Failed reading source image: %s\n", pSource_filename); - return false; - } - - printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height()); - - // Optionally load another image and put a grayscale version of it into the alpha channel. - if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size())) - { - const char *pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str(); - - image alpha_data; - - if (!load_png(pSource_alpha_image, alpha_data)) - { - error_printf("Failed reading source image: %s\n", pSource_alpha_image); - return false; - } - - printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height()); - - alpha_data.crop(file_image.get_width(), file_image.get_height()); - - for (uint32_t y = 0; y < file_image.get_height(); y++) - for (uint32_t x = 0; x < file_image.get_width(); x++) - file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma(); - } - } - else - { - file_image = m_params.m_source_images[source_file_index]; - } - - if (m_params.m_seperate_rg_to_color_alpha) - { - // Used for XY normal maps in RG - puts X in color, Y in alpha - for (uint32_t y = 0; y < file_image.get_height(); y++) - for (uint32_t x = 0; x < file_image.get_width(); x++) - { - const color_rgba &c = file_image(x, y); - file_image(x, y).set_noclamp_rgba(c.r, c.r, c.r, c.g); - } - } - - bool has_alpha = false; - if ((m_params.m_force_alpha) || (m_params.m_seperate_rg_to_color_alpha)) - has_alpha = true; - else if (!m_params.m_check_for_alpha) - file_image.set_alpha(255); - else if (file_image.has_alpha()) - has_alpha = true; - - if (has_alpha) - m_any_source_image_has_alpha = true; - - debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, file_image.get_width(), file_image.get_height(), has_alpha); - - if (m_params.m_y_flip) - file_image.flip_y(); - -#if DEBUG_EXTRACT_SINGLE_BLOCK - image block_image(4, 4); - const uint32_t block_x = 0; - const uint32_t block_y = 0; - block_image.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image, 0); - file_image = block_image; -#endif - -#if DEBUG_CROP_TEXTURE_TO_64x64 - file_image.resize(64, 64); -#endif -#if DEBUG_RESIZE_TEXTURE - image temp_img((file_image.get_width() + 1) / 2, (file_image.get_height() + 1) / 2); - image_resample(file_image, temp_img, m_params.m_perceptual, "kaiser"); - temp_img.swap(file_image); -#endif - - if ((!file_image.get_width()) || (!file_image.get_height())) - { - error_printf("basis_compressor::read_source_images: Source image has a zero width and/or height!\n"); - return false; - } - - if ((file_image.get_width() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (file_image.get_height() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION)) - { - error_printf("basis_compressor::read_source_images: Source image is too large!\n"); - return false; - } - - source_images.push_back(file_image); - source_filenames.push_back(pSource_filename); - } - - debug_printf("Any source image has alpha: %u\n", m_any_source_image_has_alpha); - - for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) - { - image &file_image = source_images[source_file_index]; - const std::string &source_filename = source_filenames[source_file_index]; - - std::vector<image> slices; - - slices.reserve(32); - slices.push_back(file_image); - - if (m_params.m_mip_gen) - { - if (!generate_mipmaps(file_image, slices, m_any_source_image_has_alpha)) - return false; - } - - uint_vec mip_indices(slices.size()); - for (uint32_t i = 0; i < slices.size(); i++) - mip_indices[i] = i; - - if (m_any_source_image_has_alpha) - { - // If source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. - std::vector<image> alpha_slices; - uint_vec new_mip_indices; - - alpha_slices.reserve(slices.size() * 2); - - for (uint32_t i = 0; i < slices.size(); i++) - { - image lvl_rgb(slices[i]); - image lvl_a(lvl_rgb); - - for (uint32_t y = 0; y < lvl_a.get_height(); y++) - { - for (uint32_t x = 0; x < lvl_a.get_width(); x++) - { - uint8_t a = lvl_a(x, y).a; - lvl_a(x, y).set_noclamp_rgba(a, a, a, 255); - } - } - - lvl_rgb.set_alpha(255); - - alpha_slices.push_back(lvl_rgb); - new_mip_indices.push_back(i); - - alpha_slices.push_back(lvl_a); - new_mip_indices.push_back(i); - } - - slices.swap(alpha_slices); - mip_indices.swap(new_mip_indices); - } - - assert(slices.size() == mip_indices.size()); - - for (uint32_t slice_index = 0; slice_index < slices.size(); slice_index++) - { - const bool is_alpha_slice = m_any_source_image_has_alpha && ((slice_index & 1) != 0); - - image &slice_image = slices[slice_index]; - const uint32_t orig_width = slice_image.get_width(); - const uint32_t orig_height = slice_image.get_height(); - - // Enlarge the source image to 4x4 block boundaries, duplicating edge pixels if necessary to avoid introducing extra colors into blocks. - slice_image.crop_dup_borders(slice_image.get_block_width(4) * 4, slice_image.get_block_height(4) * 4); - - if (m_params.m_debug_images) - { - save_png(string_format("basis_debug_source_image_%u_%u.png", source_file_index, slice_index).c_str(), slice_image); - } - - enlarge_vector(m_stats, 1); - enlarge_vector(m_slice_images, 1); - enlarge_vector(m_slice_descs, 1); - - const uint32_t dest_image_index = (uint32_t)m_stats.size() - 1; - - m_stats[dest_image_index].m_filename = source_filename.c_str(); - m_stats[dest_image_index].m_width = orig_width; - m_stats[dest_image_index].m_height = orig_height; - - m_slice_images[dest_image_index] = slice_image; - - debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), orig_width, orig_height, slice_image.get_width(), slice_image.get_height()); - - basisu_backend_slice_desc &slice_desc = m_slice_descs[dest_image_index]; - - slice_desc.m_first_block_index = m_total_blocks; - - slice_desc.m_orig_width = orig_width; - slice_desc.m_orig_height = orig_height; - - slice_desc.m_width = slice_image.get_width(); - slice_desc.m_height = slice_image.get_height(); - - slice_desc.m_num_blocks_x = slice_image.get_block_width(4); - slice_desc.m_num_blocks_y = slice_image.get_block_height(4); - - slice_desc.m_num_macroblocks_x = (slice_desc.m_num_blocks_x + 1) >> 1; - slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1; - - slice_desc.m_source_file_index = source_file_index; - - slice_desc.m_mip_index = mip_indices[slice_index]; - - slice_desc.m_alpha = is_alpha_slice; - slice_desc.m_iframe = false; - if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) - { - slice_desc.m_iframe = (source_file_index == 0); - } - - m_total_blocks += slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; - total_macroblocks += slice_desc.m_num_macroblocks_x * slice_desc.m_num_macroblocks_y; - - } // slice_index - - } // source_file_index - - debug_printf("Total blocks: %u, Total macroblocks: %u\n", m_total_blocks, total_macroblocks); - - // Make sure we don't have too many slices - if (m_slice_descs.size() > BASISU_MAX_SLICES) - { - error_printf("Too many slices!\n"); - return false; - } - - // Basic sanity check on the slices - for (uint32_t i = 1; i < m_slice_descs.size(); i++) - { - const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1]; - const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; - - // Make sure images are in order - int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index; - if (image_delta > 1) - return false; - - // Make sure mipmap levels are in order - if (!image_delta) - { - int level_delta = (int)slice_desc.m_mip_index - (int)prev_slice_desc.m_mip_index; - if (level_delta > 1) - return false; - } - } - - printf("Total basis file slices: %u\n", (uint32_t)m_slice_descs.size()); - - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; - - printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u, iframe: %u\n", - i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, slice_desc.m_width, slice_desc.m_height, slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe); - - if (m_any_source_image_has_alpha) - { - // Alpha slices must be at odd slice indices - if (slice_desc.m_alpha) - { - if ((i & 1) == 0) - return false; - - const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1]; - - // Make sure previous slice has this image's color data - if (prev_slice_desc.m_source_file_index != slice_desc.m_source_file_index) - return false; - if (prev_slice_desc.m_alpha) - return false; - if (prev_slice_desc.m_mip_index != slice_desc.m_mip_index) - return false; - if (prev_slice_desc.m_num_blocks_x != slice_desc.m_num_blocks_x) - return false; - if (prev_slice_desc.m_num_blocks_y != slice_desc.m_num_blocks_y) - return false; - } - else if (i & 1) - return false; - } - else if (slice_desc.m_alpha) - { - return false; - } - - if ((slice_desc.m_orig_width > slice_desc.m_width) || (slice_desc.m_orig_height > slice_desc.m_height)) - return false; - if ((slice_desc.m_source_file_index == 0) && (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) - { - if (!slice_desc.m_iframe) - return false; - } - } - - return true; - } - - // Do some basic validation for 2D arrays, cubemaps, video, and volumes. - bool basis_compressor::validate_texture_type_constraints() - { - debug_printf("basis_compressor::validate_texture_type_constraints\n"); - - // In 2D mode anything goes (each image may have a different resolution and # of mipmap levels). - if (m_params.m_tex_type == basist::cBASISTexType2D) - return true; - - uint32_t total_basis_images = 0; - - for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - - total_basis_images = maximum<uint32_t>(total_basis_images, slice_desc.m_source_file_index + 1); - } - - if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) - { - // For cubemaps, validate that the total # of Basis images is a multiple of 6. - if ((total_basis_images % 6) != 0) - { - error_printf("basis_compressor::validate_texture_type_constraints: For cubemaps the total number of input images is not a multiple of 6!\n"); - return false; - } - } - - // Now validate that all the mip0's have the same dimensions, and that each image has the same # of mipmap levels. - uint_vec image_mipmap_levels(total_basis_images); - - int width = -1, height = -1; - for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - - image_mipmap_levels[slice_desc.m_source_file_index] = maximum(image_mipmap_levels[slice_desc.m_source_file_index], slice_desc.m_mip_index + 1); - - if (slice_desc.m_mip_index != 0) - continue; - - if (width < 0) - { - width = slice_desc.m_orig_width; - height = slice_desc.m_orig_height; - } - else if ((width != (int)slice_desc.m_orig_width) || (height != (int)slice_desc.m_orig_height)) - { - error_printf("basis_compressor::validate_texture_type_constraints: The source image resolutions are not all equal!\n"); - return false; - } - } - - for (size_t i = 1; i < image_mipmap_levels.size(); i++) - { - if (image_mipmap_levels[0] != image_mipmap_levels[i]) - { - error_printf("basis_compressor::validate_texture_type_constraints: Each image must have the same number of mipmap levels!\n"); - return false; - } - } - - return true; - } - - bool basis_compressor::process_frontend() - { - debug_printf("basis_compressor::process_frontend\n"); - - m_source_blocks.resize(m_total_blocks); - - for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - - const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; - const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; - - const image &source_image = m_slice_images[slice_index]; - - for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) - for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) - source_image.extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4); - } - -#if 0 - // TODO - basis_etc1_pack_params pack_params; - pack_params.m_quality = cETCQualityMedium; - pack_params.m_perceptual = m_params.m_perceptual; - pack_params.m_use_color4 = false; - - pack_etc1_block_context pack_context; - - std::unordered_set<uint64_t> endpoint_hash; - std::unordered_set<uint32_t> selector_hash; - - for (uint32_t i = 0; i < m_source_blocks.size(); i++) - { - etc_block blk; - pack_etc1_block(blk, m_source_blocks[i].get_ptr(), pack_params, pack_context); - - const color_rgba c0(blk.get_block_color(0, false)); - endpoint_hash.insert((c0.r | (c0.g << 5) | (c0.b << 10)) | (blk.get_inten_table(0) << 16)); - - const color_rgba c1(blk.get_block_color(1, false)); - endpoint_hash.insert((c1.r | (c1.g << 5) | (c1.b << 10)) | (blk.get_inten_table(1) << 16)); - - selector_hash.insert(blk.get_raw_selector_bits()); - } - - const uint32_t total_unique_endpoints = (uint32_t)endpoint_hash.size(); - const uint32_t total_unique_selectors = (uint32_t)selector_hash.size(); - - if (m_params.m_debug) - { - debug_printf("Unique endpoints: %u, unique selectors: %u\n", total_unique_endpoints, total_unique_selectors); - } -#endif - - const double total_texels = m_total_blocks * 16.0f; - - int endpoint_clusters = m_params.m_max_endpoint_clusters; - int selector_clusters = m_params.m_max_selector_clusters; - - if (endpoint_clusters > basisu_frontend::cMaxEndpointClusters) - { - error_printf("Too many endpoint clusters! (%u but max is %u)\n", endpoint_clusters, basisu_frontend::cMaxEndpointClusters); - return false; - } - if (selector_clusters > basisu_frontend::cMaxSelectorClusters) - { - error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters); - return false; - } - - if (m_params.m_quality_level != -1) - { - const float quality = saturate(m_params.m_quality_level / 255.0f); - - const float bits_per_endpoint_cluster = 14.0f; - const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f - int max_endpoints = static_cast<int>((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster); - - const float mid = 128.0f / 255.0f; - - float color_endpoint_quality = quality; - - const float endpoint_split_point = 0.5f; - if (color_endpoint_quality <= mid) - { - color_endpoint_quality = lerp(0.0f, endpoint_split_point, powf(color_endpoint_quality / mid, .65f)); - - max_endpoints = clamp<int>(max_endpoints, 256, 3072); - max_endpoints = minimum<uint32_t>(max_endpoints, m_total_blocks); - - if (max_endpoints < 64) - max_endpoints = 64; - endpoint_clusters = clamp<uint32_t>((uint32_t)(.5f + lerp<float>(32, static_cast<float>(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); - } - else - { - color_endpoint_quality = powf((color_endpoint_quality - mid) / (1.0f - mid), 1.6f); - - max_endpoints = clamp<int>(max_endpoints, 256, 8192); - max_endpoints = minimum<uint32_t>(max_endpoints, m_total_blocks); - - if (max_endpoints < 3072) - max_endpoints = 3072; - endpoint_clusters = clamp<uint32_t>((uint32_t)(.5f + lerp<float>(3072, static_cast<float>(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); - } - - float bits_per_selector_cluster = m_params.m_global_sel_pal ? 21.0f : 14.0f; - - const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f - int max_selectors = static_cast<int>((max_desired_selector_cluster_bits_per_texel * total_texels) / bits_per_selector_cluster); - max_selectors = clamp<int>(max_selectors, 256, basisu_frontend::cMaxSelectorClusters); - max_selectors = minimum<uint32_t>(max_selectors, m_total_blocks); - - float color_selector_quality = quality; - //color_selector_quality = powf(color_selector_quality, 1.65f); - color_selector_quality = powf(color_selector_quality, 2.62f); - - if (max_selectors < 96) - max_selectors = 96; - selector_clusters = clamp<uint32_t>((uint32_t)(.5f + lerp<float>(96, static_cast<float>(max_selectors), color_selector_quality)), 8, basisu_frontend::cMaxSelectorClusters); - - debug_printf("Max endpoints: %u, max selectors: %u\n", endpoint_clusters, selector_clusters); - - if (m_params.m_quality_level >= 223) - { - if (!m_params.m_selector_rdo_thresh.was_changed()) - { - if (!m_params.m_endpoint_rdo_thresh.was_changed()) - m_params.m_endpoint_rdo_thresh *= .25f; - - if (!m_params.m_selector_rdo_thresh.was_changed()) - m_params.m_selector_rdo_thresh *= .25f; - } - } - else if (m_params.m_quality_level >= 192) - { - if (!m_params.m_endpoint_rdo_thresh.was_changed()) - m_params.m_endpoint_rdo_thresh *= .5f; - - if (!m_params.m_selector_rdo_thresh.was_changed()) - m_params.m_selector_rdo_thresh *= .5f; - } - else if (m_params.m_quality_level >= 160) - { - if (!m_params.m_endpoint_rdo_thresh.was_changed()) - m_params.m_endpoint_rdo_thresh *= .75f; - - if (!m_params.m_selector_rdo_thresh.was_changed()) - m_params.m_selector_rdo_thresh *= .75f; - } - else if (m_params.m_quality_level >= 129) - { - float l = (quality - 129 / 255.0f) / ((160 - 129) / 255.0f); - - if (!m_params.m_endpoint_rdo_thresh.was_changed()) - m_params.m_endpoint_rdo_thresh *= lerp<float>(1.0f, .75f, l); - - if (!m_params.m_selector_rdo_thresh.was_changed()) - m_params.m_selector_rdo_thresh *= lerp<float>(1.0f, .75f, l); - } - } - - m_auto_global_sel_pal = false; - if (!m_params.m_global_sel_pal && m_params.m_auto_global_sel_pal) - { - const float bits_per_selector_cluster = 31.0f; - double selector_codebook_bpp_est = (bits_per_selector_cluster * selector_clusters) / total_texels; - debug_printf("selector_codebook_bpp_est: %f\n", selector_codebook_bpp_est); - const float force_global_sel_pal_bpp_threshold = .15f; - if ((total_texels <= 128.0f*128.0f) && (selector_codebook_bpp_est > force_global_sel_pal_bpp_threshold)) - { - m_auto_global_sel_pal = true; - debug_printf("Auto global selector palette enabled\n"); - } - } - - basisu_frontend::params p; - p.m_num_source_blocks = m_total_blocks; - p.m_pSource_blocks = &m_source_blocks[0]; - p.m_max_endpoint_clusters = endpoint_clusters; - p.m_max_selector_clusters = selector_clusters; - p.m_perceptual = m_params.m_perceptual; - p.m_debug_stats = m_params.m_debug; - p.m_debug_images = m_params.m_debug_images; - p.m_compression_level = m_params.m_compression_level; - p.m_tex_type = m_params.m_tex_type; - p.m_multithreaded = m_params.m_multithreading; - p.m_disable_hierarchical_endpoint_codebooks = m_params.m_disable_hierarchical_endpoint_codebooks; - p.m_pJob_pool = m_params.m_pJob_pool; - - if ((m_params.m_global_sel_pal) || (m_auto_global_sel_pal)) - { - p.m_pGlobal_sel_codebook = m_params.m_pSel_codebook; - p.m_num_global_sel_codebook_pal_bits = m_params.m_global_pal_bits; - p.m_num_global_sel_codebook_mod_bits = m_params.m_global_mod_bits; - p.m_use_hybrid_selector_codebooks = !m_params.m_no_hybrid_sel_cb; - p.m_hybrid_codebook_quality_thresh = m_params.m_hybrid_sel_cb_quality_thresh; - } - - if (!m_frontend.init(p)) - { - error_printf("basisu_frontend::init() failed!\n"); - return false; - } - - m_frontend.compress(); - - if (m_params.m_debug_images) - { - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - char filename[1024]; -#ifdef _WIN32 - sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); -#else - snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); -#endif - m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true); - -#ifdef _WIN32 - sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); -#else - snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); -#endif - m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false); - } - } - - return true; - } - - bool basis_compressor::extract_frontend_texture_data() - { - debug_printf("basis_compressor::extract_frontend_texture_data\n"); - - m_frontend_output_textures.resize(m_slice_descs.size()); - m_best_etc1s_images.resize(m_slice_descs.size()); - m_best_etc1s_images_unpacked.resize(m_slice_descs.size()); - - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; - - const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; - const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; - - const uint32_t width = num_blocks_x * 4; - const uint32_t height = num_blocks_y * 4; - - m_frontend_output_textures[i].init(texture_format::cETC1, width, height); - - for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) - for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) - memcpy(m_frontend_output_textures[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_output_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); - -#if 0 - if (m_params.m_debug_images) - { - char filename[1024]; - sprintf_s(filename, sizeof(filename), "rdo_etc_frontend_%u_", i); - write_etc1_vis_images(m_frontend_output_textures[i], filename); - } -#endif - - m_best_etc1s_images[i].init(texture_format::cETC1, width, height); - for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) - for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) - memcpy(m_best_etc1s_images[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_etc1s_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); - - m_best_etc1s_images[i].unpack(m_best_etc1s_images_unpacked[i]); - } - - return true; - } - - bool basis_compressor::process_backend() - { - debug_printf("basis_compressor::process_backend\n"); - - basisu_backend_params backend_params; - backend_params.m_debug = m_params.m_debug; - backend_params.m_debug_images = m_params.m_debug_images; - backend_params.m_etc1s = true; - backend_params.m_compression_level = m_params.m_compression_level; - - if (!m_params.m_no_endpoint_rdo) - backend_params.m_endpoint_rdo_quality_thresh = m_params.m_endpoint_rdo_thresh; - - if (!m_params.m_no_selector_rdo) - backend_params.m_selector_rdo_quality_thresh = m_params.m_selector_rdo_thresh; - - backend_params.m_use_global_sel_codebook = (m_frontend.get_params().m_pGlobal_sel_codebook != NULL); - backend_params.m_global_sel_codebook_pal_bits = m_frontend.get_params().m_num_global_sel_codebook_pal_bits; - backend_params.m_global_sel_codebook_mod_bits = m_frontend.get_params().m_num_global_sel_codebook_mod_bits; - backend_params.m_use_hybrid_sel_codebooks = m_frontend.get_params().m_use_hybrid_selector_codebooks; - - m_backend.init(&m_frontend, backend_params, m_slice_descs, m_params.m_pSel_codebook); - uint32_t total_packed_bytes = m_backend.encode(); - - if (!total_packed_bytes) - { - error_printf("basis_compressor::encode() failed!\n"); - return false; - } - - debug_printf("Total packed bytes (estimated): %u\n", total_packed_bytes); - - return true; - } - - bool basis_compressor::create_basis_file_and_transcode() - { - debug_printf("basis_compressor::create_basis_file_and_transcode\n"); - - const basisu_backend_output &encoded_output = m_backend.get_output(); - - if (!m_basis_file.init(encoded_output, m_params.m_tex_type, m_params.m_userdata0, m_params.m_userdata1, m_params.m_y_flip, m_params.m_us_per_frame)) - { - error_printf("basis_compressor::write_output_files_and_compute_stats: basisu_backend:init() failed!\n"); - return false; - } - - const uint8_vec &comp_data = m_basis_file.get_compressed_data(); - - m_output_basis_file = comp_data; - - // Verify the compressed data by transcoding it to ETC1/BC1 and validating the CRC's. - basist::basisu_transcoder decoder(m_params.m_pSel_codebook); - if (!decoder.validate_file_checksums(&comp_data[0], (uint32_t)comp_data.size(), true)) - { - error_printf("decoder.validate_file_checksums() failed!\n"); - return false; - } - - m_decoded_output_textures.resize(m_slice_descs.size()); - m_decoded_output_textures_unpacked.resize(m_slice_descs.size()); - - m_decoded_output_textures_bc1.resize(m_slice_descs.size()); - m_decoded_output_textures_unpacked_bc1.resize(m_slice_descs.size()); - - interval_timer tm; - tm.start(); - - if (!decoder.start_transcoding(&comp_data[0], (uint32_t)comp_data.size())) - { - error_printf("decoder.start_transcoding() failed!\n"); - return false; - } - - debug_printf("basisu_comppressor::start_transcoding() took %3.3fms\n", tm.get_elapsed_ms()); - - uint32_t total_orig_pixels = 0; - uint32_t total_texels = 0; - - double total_time_etc1 = 0; - - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - gpu_image decoded_texture; - decoded_texture.init(texture_format::cETC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height); - - tm.start(); - - if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, - reinterpret_cast<etc_block *>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cETC1, 8)) - { - error_printf("Transcoding failed to ETC1 on slice %u!\n", i); - return false; - } - - total_time_etc1 += tm.get_elapsed_secs(); - - uint32_t image_crc16 = basist::crc16(decoded_texture.get_ptr(), decoded_texture.get_size_in_bytes(), 0); - if (image_crc16 != m_backend.get_output().m_slice_image_crcs[i]) - { - error_printf("Decoded image data CRC check failed on slice %u!\n", i); - return false; - } - debug_printf("Decoded image data CRC check succeeded on slice %i\n", i); - - m_decoded_output_textures[i] = decoded_texture; - - total_orig_pixels += m_slice_descs[i].m_orig_width * m_slice_descs[i].m_orig_height; - total_texels += m_slice_descs[i].m_width * m_slice_descs[i].m_height; - } - - tm.start(); - - basist::basisu_transcoder_init(); - - debug_printf("basist::basisu_transcoder_init: Took %f ms\n", tm.get_elapsed_ms()); - - double total_time_bc1 = 0; - - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - gpu_image decoded_texture; - decoded_texture.init(texture_format::cBC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height); - - tm.start(); - - if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, - reinterpret_cast<etc_block *>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC1, 8)) - { - error_printf("Transcoding failed to BC1 on slice %u!\n", i); - return false; - } - - total_time_bc1 += tm.get_elapsed_secs(); - - m_decoded_output_textures_bc1[i] = decoded_texture; - } - - for (uint32_t i = 0; i < m_slice_descs.size(); i++) - { - m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]); - m_decoded_output_textures_bc1[i].unpack(m_decoded_output_textures_unpacked_bc1[i]); - } - - debug_printf("Transcoded to ETC1 in %3.3fms, %f texels/sec\n", total_time_etc1 * 1000.0f, total_orig_pixels / total_time_etc1); - - debug_printf("Transcoded to BC1 in %3.3fms, %f texels/sec\n", total_time_bc1 * 1000.0f, total_orig_pixels / total_time_bc1); - - debug_printf("Total .basis output file size: %u, %3.3f bits/texel\n", comp_data.size(), comp_data.size() * 8.0f / total_orig_pixels); - - m_output_blocks.resize(0); - - uint32_t total_orig_texels = 0; - for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - - total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; - - const uint32_t total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; - - assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks); - - memcpy(enlarge_vector(m_output_blocks, total_blocks), m_decoded_output_textures[slice_index].get_ptr(), sizeof(etc_block) * total_blocks); - } - - m_basis_file_size = (uint32_t)comp_data.size(); - m_basis_bits_per_texel = (comp_data.size() * 8.0f) / total_orig_texels; - - return true; - } - - bool basis_compressor::write_output_files_and_compute_stats() - { - debug_printf("basis_compressor::write_output_files_and_compute_stats\n"); - - if (m_params.m_write_output_basis_files) - { - const uint8_vec &comp_data = m_basis_file.get_compressed_data(); - - const std::string& basis_filename = m_params.m_out_filename; - - if (!write_vec_to_file(basis_filename.c_str(), comp_data)) - { - error_printf("Failed writing output data to file \"%s\"\n", basis_filename.c_str()); - return false; - } - - printf("Wrote output .basis file \"%s\"\n", basis_filename.c_str()); - } - - m_stats.resize(m_slice_descs.size()); - - uint32_t total_orig_texels = 0; - - for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) - { - const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - - total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; - - if (m_params.m_compute_stats) - { - printf("Slice: %u\n", slice_index); - - image_stats &s = m_stats[slice_index]; - - // TODO: We used to output SSIM (during heavy encoder development), but this slowed down compression too much. We'll be adding it back. - - image_metrics em; - - // ---- .basis ETC1S stats - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0); - em.print(".basis ETC1S 709 Luma: "); - - s.m_basis_etc1s_luma_709_psnr = static_cast<float>(em.m_psnr); - s.m_basis_etc1s_luma_709_ssim = static_cast<float>(em.m_ssim); - - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true); - em.print(".basis ETC1S 601 Luma: "); - - s.m_basis_etc1s_luma_601_psnr = static_cast<float>(em.m_psnr); - - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3); - em.print(".basis ETC1S RGB Avg: "); - - s.m_basis_etc1s_rgb_avg_psnr = em.m_psnr; - - if (m_slice_descs.size() == 1) - { - debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_etc1s_luma_709_psnr / ((m_backend.get_output().get_output_size_estimate() * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); - } - - // ---- .basis BC1 stats - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc1[slice_index], 0, 0); - em.print(".basis BC1 709 Luma: "); - - s.m_basis_bc1_luma_709_psnr = static_cast<float>(em.m_psnr); - s.m_basis_bc1_luma_709_ssim = static_cast<float>(em.m_ssim); - - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc1[slice_index], 0, 0, true, true); - em.print(".basis BC1 601 Luma: "); - - s.m_basis_bc1_luma_601_psnr = static_cast<float>(em.m_psnr); - - em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc1[slice_index], 0, 3); - em.print(".basis BC1 RGB Avg: "); - - s.m_basis_bc1_rgb_avg_psnr = static_cast<float>(em.m_psnr); - - // ---- Nearly best possible ETC1S stats - em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0); - em.print("Unquantized ETC1S 709 Luma: "); - - s.m_best_luma_709_psnr = static_cast<float>(em.m_psnr); - s.m_best_luma_709_ssim = static_cast<float>(em.m_ssim); - - em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true); - em.print("Unquantized ETC1S 601 Luma: "); - - s.m_best_luma_601_psnr = static_cast<float>(em.m_psnr); - - em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3); - em.print("Unquantized ETC1S RGB Avg: "); - - s.m_best_rgb_avg_psnr = static_cast<float>(em.m_psnr); - } - - if (m_frontend.get_params().m_debug_images) - { - std::string out_basename; - if (m_params.m_out_filename.size()) - string_get_filename(m_params.m_out_filename.c_str(), out_basename); - else if (m_params.m_source_filenames.size()) - string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename); - - string_remove_extension(out_basename); - out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index); - - // Write "best" ETC1S debug images - { - gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]); - best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image); - - image best_etc1s_unpacked; - best_etc1s_gpu_image.unpack(best_etc1s_unpacked); - save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked); - } - - // Write decoded ETC1S debug images - { - gpu_image decoded_etc1s(m_decoded_output_textures[slice_index]); - decoded_etc1s.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_decoded_etc1s.ktx").c_str(), decoded_etc1s); - - image temp(m_decoded_output_textures_unpacked[slice_index]); - temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); - save_png(out_basename + "_decoded_etc1s.png", temp); - } - - // Write decoded BC1 debug images - { - gpu_image decoded_bc1(m_decoded_output_textures_bc1[slice_index]); - decoded_bc1.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); - write_compressed_texture_file((out_basename + "_decoded_bc1.ktx").c_str(), decoded_bc1); - - image temp(m_decoded_output_textures_unpacked_bc1[slice_index]); - temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); - save_png(out_basename + "_decoded_bc1.png", temp); - } - } - } - - return true; - } - -} // namespace basisu diff --git a/thirdparty/basis_universal/basisu_etc.cpp b/thirdparty/basis_universal/basisu_etc.cpp deleted file mode 100644 index 244f1d2e6b..0000000000 --- a/thirdparty/basis_universal/basisu_etc.cpp +++ /dev/null @@ -1,1074 +0,0 @@ -// basis_etc.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "basisu_etc.h" - -#define BASISU_DEBUG_ETC_ENCODER 0 -#define BASISU_DEBUG_ETC_ENCODER_DEEPER 0 - -namespace basisu -{ - const uint32_t BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE = 165; - - static const struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] = - { - { { 0, 0, 0, 8 } },{ { 0, 5, 2, 1 } },{ { 0, 6, 1, 1 } },{ { 0, 7, 0, 1 } },{ { 0, 7, 1, 0 } }, - { { 0, 0, 8, 0 } },{ { 0, 0, 3, 5 } },{ { 0, 1, 7, 0 } },{ { 0, 0, 4, 4 } },{ { 0, 0, 2, 6 } }, - { { 0, 0, 7, 1 } },{ { 0, 0, 1, 7 } },{ { 0, 0, 5, 3 } },{ { 1, 6, 0, 1 } },{ { 0, 0, 6, 2 } }, - { { 0, 2, 6, 0 } },{ { 2, 4, 2, 0 } },{ { 0, 3, 5, 0 } },{ { 3, 3, 1, 1 } },{ { 4, 2, 0, 2 } }, - { { 1, 5, 2, 0 } },{ { 0, 5, 3, 0 } },{ { 0, 6, 2, 0 } },{ { 2, 4, 1, 1 } },{ { 5, 1, 0, 2 } }, - { { 6, 1, 1, 0 } },{ { 3, 3, 0, 2 } },{ { 6, 0, 0, 2 } },{ { 0, 8, 0, 0 } },{ { 6, 1, 0, 1 } }, - { { 0, 1, 6, 1 } },{ { 1, 6, 1, 0 } },{ { 4, 1, 3, 0 } },{ { 0, 2, 5, 1 } },{ { 5, 0, 3, 0 } }, - { { 5, 3, 0, 0 } },{ { 0, 1, 5, 2 } },{ { 0, 3, 4, 1 } },{ { 2, 5, 1, 0 } },{ { 1, 7, 0, 0 } }, - { { 0, 1, 4, 3 } },{ { 6, 0, 2, 0 } },{ { 0, 4, 4, 0 } },{ { 2, 6, 0, 0 } },{ { 0, 2, 4, 2 } }, - { { 0, 5, 1, 2 } },{ { 0, 6, 0, 2 } },{ { 3, 5, 0, 0 } },{ { 0, 4, 3, 1 } },{ { 3, 4, 1, 0 } }, - { { 4, 3, 1, 0 } },{ { 1, 5, 0, 2 } },{ { 0, 3, 3, 2 } },{ { 1, 4, 1, 2 } },{ { 0, 4, 2, 2 } }, - { { 2, 3, 3, 0 } },{ { 4, 4, 0, 0 } },{ { 1, 2, 4, 1 } },{ { 0, 5, 0, 3 } },{ { 0, 1, 3, 4 } }, - { { 1, 5, 1, 1 } },{ { 1, 4, 2, 1 } },{ { 1, 3, 2, 2 } },{ { 5, 2, 1, 0 } },{ { 1, 3, 3, 1 } }, - { { 0, 1, 2, 5 } },{ { 1, 1, 5, 1 } },{ { 0, 3, 2, 3 } },{ { 2, 5, 0, 1 } },{ { 3, 2, 2, 1 } }, - { { 2, 3, 0, 3 } },{ { 1, 4, 3, 0 } },{ { 2, 2, 1, 3 } },{ { 6, 2, 0, 0 } },{ { 1, 0, 6, 1 } }, - { { 3, 3, 2, 0 } },{ { 7, 1, 0, 0 } },{ { 3, 1, 4, 0 } },{ { 0, 2, 3, 3 } },{ { 0, 4, 1, 3 } }, - { { 0, 4, 0, 4 } },{ { 0, 1, 0, 7 } },{ { 2, 0, 5, 1 } },{ { 2, 0, 4, 2 } },{ { 3, 0, 2, 3 } }, - { { 2, 2, 4, 0 } },{ { 2, 2, 3, 1 } },{ { 4, 0, 3, 1 } },{ { 3, 2, 3, 0 } },{ { 2, 3, 2, 1 } }, - { { 1, 3, 4, 0 } },{ { 7, 0, 1, 0 } },{ { 3, 0, 4, 1 } },{ { 1, 0, 5, 2 } },{ { 8, 0, 0, 0 } }, - { { 3, 0, 1, 4 } },{ { 4, 1, 1, 2 } },{ { 4, 0, 2, 2 } },{ { 1, 2, 5, 0 } },{ { 4, 2, 1, 1 } }, - { { 3, 4, 0, 1 } },{ { 2, 0, 3, 3 } },{ { 5, 0, 1, 2 } },{ { 5, 0, 0, 3 } },{ { 2, 4, 0, 2 } }, - { { 2, 1, 4, 1 } },{ { 4, 0, 1, 3 } },{ { 2, 1, 5, 0 } },{ { 4, 2, 2, 0 } },{ { 4, 0, 4, 0 } }, - { { 1, 0, 4, 3 } },{ { 1, 4, 0, 3 } },{ { 3, 0, 3, 2 } },{ { 4, 3, 0, 1 } },{ { 0, 1, 1, 6 } }, - { { 1, 3, 1, 3 } },{ { 0, 2, 2, 4 } },{ { 2, 0, 2, 4 } },{ { 5, 1, 1, 1 } },{ { 3, 0, 5, 0 } }, - { { 2, 3, 1, 2 } },{ { 3, 0, 0, 5 } },{ { 0, 3, 1, 4 } },{ { 5, 0, 2, 1 } },{ { 2, 1, 3, 2 } }, - { { 2, 0, 6, 0 } },{ { 3, 1, 3, 1 } },{ { 5, 1, 2, 0 } },{ { 1, 0, 3, 4 } },{ { 1, 1, 6, 0 } }, - { { 4, 0, 0, 4 } },{ { 2, 0, 1, 5 } },{ { 0, 3, 0, 5 } },{ { 1, 3, 0, 4 } },{ { 4, 1, 2, 1 } }, - { { 1, 2, 3, 2 } },{ { 3, 1, 0, 4 } },{ { 5, 2, 0, 1 } },{ { 1, 2, 2, 3 } },{ { 3, 2, 1, 2 } }, - { { 2, 2, 2, 2 } },{ { 6, 0, 1, 1 } },{ { 1, 2, 1, 4 } },{ { 1, 1, 4, 2 } },{ { 3, 2, 0, 3 } }, - { { 1, 2, 0, 5 } },{ { 1, 0, 7, 0 } },{ { 3, 1, 2, 2 } },{ { 1, 0, 2, 5 } },{ { 2, 0, 0, 6 } }, - { { 2, 1, 1, 4 } },{ { 2, 2, 0, 4 } },{ { 1, 1, 3, 3 } },{ { 7, 0, 0, 1 } },{ { 1, 0, 0, 7 } }, - { { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } }, - { { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } } - }; - - const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = - { - { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, - { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } - }; - - const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; - const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; - - // [flip][subblock][pixel_index] - const etc_coord2 g_etc1_pixel_coords[2][2][8] = - { - { - { - { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, - { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } - }, - { - { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, - { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } - } - }, - { - { - { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, - { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } - }, - { - { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, - { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } - }, - } - }; - - // [flip][subblock][pixel_index] - const uint32_t g_etc1_pixel_indices[2][2][8] = - { - { - { - 0 + 4 * 0, 0 + 4 * 1, 0 + 4 * 2, 0 + 4 * 3, - 1 + 4 * 0, 1 + 4 * 1, 1 + 4 * 2, 1 + 4 * 3 - }, - { - 2 + 4 * 0, 2 + 4 * 1, 2 + 4 * 2, 2 + 4 * 3, - 3 + 4 * 0, 3 + 4 * 1, 3 + 4 * 2, 3 + 4 * 3 - } - }, - { - { - 0 + 4 * 0, 1 + 4 * 0, 2 + 4 * 0, 3 + 4 * 0, - 0 + 4 * 1, 1 + 4 * 1, 2 + 4 * 1, 3 + 4 * 1 - }, - { - 0 + 4 * 2, 1 + 4 * 2, 2 + 4 * 2, 3 + 4 * 2, - 0 + 4 * 3, 1 + 4 * 3, 2 + 4 * 3, 3 + 4 * 3 - }, - } - }; - - uint16_t etc_block::pack_color5(const color_rgba& color, bool scaled, uint32_t bias) - { - return pack_color5(color.r, color.g, color.b, scaled, bias); - } - - uint16_t etc_block::pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) - { - if (scaled) - { - r = (r * 31U + bias) / 255U; - g = (g * 31U + bias) / 255U; - b = (b * 31U + bias) / 255U; - } - - r = minimum(r, 31U); - g = minimum(g, 31U); - b = minimum(b, 31U); - - return static_cast<uint16_t>(b | (g << 5U) | (r << 10U)); - } - - color_rgba etc_block::unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha) - { - uint32_t b = packed_color5 & 31U; - uint32_t g = (packed_color5 >> 5U) & 31U; - uint32_t r = (packed_color5 >> 10U) & 31U; - - if (scaled) - { - b = (b << 3U) | (b >> 2U); - g = (g << 3U) | (g >> 2U); - r = (r << 3U) | (r >> 2U); - } - - return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); - } - - void etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, bool scaled) - { - result = unpack_color5(packed_color5, scaled, 255); - } - - void etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled) - { - color_rgba c(unpack_color5(packed_color5, scaled, 0)); - r = c.r; - g = c.g; - b = c.b; - } - - bool etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) - { - color_rgba_i16 dc(unpack_delta3(packed_delta3)); - - int b = (packed_color5 & 31U) + dc.b; - int g = ((packed_color5 >> 5U) & 31U) + dc.g; - int r = ((packed_color5 >> 10U) & 31U) + dc.r; - - bool success = true; - if (static_cast<uint32_t>(r | g | b) > 31U) - { - success = false; - r = clamp<int>(r, 0, 31); - g = clamp<int>(g, 0, 31); - b = clamp<int>(b, 0, 31); - } - - if (scaled) - { - b = (b << 3U) | (b >> 2U); - g = (g << 3U) | (g >> 2U); - r = (r << 3U) | (r >> 2U); - } - - result.set_noclamp_rgba(r, g, b, minimum(alpha, 255U)); - return success; - } - - bool etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) - { - color_rgba result; - const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); - r = result.r; - g = result.g; - b = result.b; - return success; - } - - uint16_t etc_block::pack_delta3(const color_rgba_i16& color) - { - return pack_delta3(color.r, color.g, color.b); - } - - uint16_t etc_block::pack_delta3(int r, int g, int b) - { - assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); - assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); - assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); - if (r < 0) r += 8; - if (g < 0) g += 8; - if (b < 0) b += 8; - return static_cast<uint16_t>(b | (g << 3) | (r << 6)); - } - - color_rgba_i16 etc_block::unpack_delta3(uint16_t packed_delta3) - { - int r = (packed_delta3 >> 6) & 7; - int g = (packed_delta3 >> 3) & 7; - int b = packed_delta3 & 7; - if (r >= 4) r -= 8; - if (g >= 4) g -= 8; - if (b >= 4) b -= 8; - return color_rgba_i16(r, g, b, 255); - } - - void etc_block::unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3) - { - r = (packed_delta3 >> 6) & 7; - g = (packed_delta3 >> 3) & 7; - b = packed_delta3 & 7; - if (r >= 4) r -= 8; - if (g >= 4) g -= 8; - if (b >= 4) b -= 8; - } - - uint16_t etc_block::pack_color4(const color_rgba& color, bool scaled, uint32_t bias) - { - return pack_color4(color.r, color.g, color.b, scaled, bias); - } - - uint16_t etc_block::pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) - { - if (scaled) - { - r = (r * 15U + bias) / 255U; - g = (g * 15U + bias) / 255U; - b = (b * 15U + bias) / 255U; - } - - r = minimum(r, 15U); - g = minimum(g, 15U); - b = minimum(b, 15U); - - return static_cast<uint16_t>(b | (g << 4U) | (r << 8U)); - } - - color_rgba etc_block::unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha) - { - uint32_t b = packed_color4 & 15U; - uint32_t g = (packed_color4 >> 4U) & 15U; - uint32_t r = (packed_color4 >> 8U) & 15U; - - if (scaled) - { - b = (b << 4U) | b; - g = (g << 4U) | g; - r = (r << 4U) | r; - } - - return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); - } - - void etc_block::unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled) - { - color_rgba c(unpack_color4(packed_color4, scaled, 0)); - r = c.r; - g = c.g; - b = c.b; - } - - void etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint32_t table_idx) - { - assert(table_idx < cETC1IntenModifierValues); - const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint32_t r, g, b; - unpack_color5(r, g, b, packed_color5, true); - - const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0, 255); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1, 255); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2, 255); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3, 255); - } - - bool etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint16_t packed_delta3, uint32_t table_idx) - { - assert(table_idx < cETC1IntenModifierValues); - const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint32_t r, g, b; - bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); - - const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0, 255); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1, 255); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2, 255); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3, 255); - - return success; - } - - void etc_block::get_abs_subblock_colors(color_rgba* pDst, uint16_t packed_color4, uint32_t table_idx) - { - assert(table_idx < cETC1IntenModifierValues); - const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; - - uint32_t r, g, b; - unpack_color4(r, g, b, packed_color4, true); - - const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); - - const int y0 = pInten_modifer_table[0]; - pDst[0].set(ir + y0, ig + y0, ib + y0, 255); - - const int y1 = pInten_modifer_table[1]; - pDst[1].set(ir + y1, ig + y1, ib + y1, 255); - - const int y2 = pInten_modifer_table[2]; - pDst[2].set(ir + y2, ig + y2, ib + y2, 255); - - const int y3 = pInten_modifer_table[3]; - pDst[3].set(ir + y3, ig + y3, ib + y3, 255); - } - - bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) - { - const bool diff_flag = block.get_diff_bit(); - const bool flip_flag = block.get_flip_bit(); - const uint32_t table_index0 = block.get_inten_table(0); - const uint32_t table_index1 = block.get_inten_table(1); - - color_rgba subblock_colors0[4]; - color_rgba subblock_colors1[4]; - - if (diff_flag) - { - const uint16_t base_color5 = block.get_base5_color(); - const uint16_t delta_color3 = block.get_delta3_color(); - etc_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); - - if (!etc_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) - return false; - } - else - { - const uint16_t base_color4_0 = block.get_base4_color(0); - etc_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); - - const uint16_t base_color4_1 = block.get_base4_color(1); - etc_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); - } - - if (preserve_alpha) - { - if (flip_flag) - { - for (uint32_t y = 0; y < 2; y++) - { - pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); - pDst += 4; - } - - for (uint32_t y = 2; y < 4; y++) - { - pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); - pDst += 4; - } - } - else - { - for (uint32_t y = 0; y < 4; y++) - { - pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); - pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); - pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); - pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); - pDst += 4; - } - } - } - else - { - if (flip_flag) - { - // 0000 - // 0000 - // 1111 - // 1111 - for (uint32_t y = 0; y < 2; y++) - { - pDst[0] = subblock_colors0[block.get_selector(0, y)]; - pDst[1] = subblock_colors0[block.get_selector(1, y)]; - pDst[2] = subblock_colors0[block.get_selector(2, y)]; - pDst[3] = subblock_colors0[block.get_selector(3, y)]; - pDst += 4; - } - - for (uint32_t y = 2; y < 4; y++) - { - pDst[0] = subblock_colors1[block.get_selector(0, y)]; - pDst[1] = subblock_colors1[block.get_selector(1, y)]; - pDst[2] = subblock_colors1[block.get_selector(2, y)]; - pDst[3] = subblock_colors1[block.get_selector(3, y)]; - pDst += 4; - } - } - else - { - // 0011 - // 0011 - // 0011 - // 0011 - for (uint32_t y = 0; y < 4; y++) - { - pDst[0] = subblock_colors0[block.get_selector(0, y)]; - pDst[1] = subblock_colors0[block.get_selector(1, y)]; - pDst[2] = subblock_colors1[block.get_selector(2, y)]; - pDst[3] = subblock_colors1[block.get_selector(3, y)]; - pDst += 4; - } - } - } - - return true; - } - - inline int extend_6_to_8(uint32_t n) - { - return (n << 2) | (n >> 4); - } - - inline int extend_7_to_8(uint32_t n) - { - return (n << 1) | (n >> 6); - } - - inline int extend_4_to_8(uint32_t n) - { - return (n << 4) | n; - } - - uint64_t etc_block::evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index) const - { - color_rgba unpacked_block[16]; - - unpack_etc1(*this, unpacked_block); - - uint64_t total_error = 0; - - if (subblock_index < 0) - { - for (uint32_t i = 0; i < 16; i++) - total_error += color_distance(perceptual, pBlock_pixels[i], unpacked_block[i], false); - } - else - { - const bool flip_bit = get_flip_bit(); - - for (uint32_t i = 0; i < 8; i++) - { - const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; - - total_error += color_distance(perceptual, pBlock_pixels[idx], unpacked_block[idx], false); - } - } - - return total_error; - } - - void etc_block::get_subblock_pixels(color_rgba* pPixels, int subblock_index) const - { - if (subblock_index < 0) - unpack_etc1(*this, pPixels); - else - { - color_rgba unpacked_block[16]; - - unpack_etc1(*this, unpacked_block); - - const bool flip_bit = get_flip_bit(); - - for (uint32_t i = 0; i < 8; i++) - { - const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; - - pPixels[i] = unpacked_block[idx]; - } - } - } - - bool etc1_optimizer::compute() - { - assert(m_pResult->m_pSelectors); - - if ((m_pParams->m_pForce_selectors) || (m_pParams->m_pEval_solution_override)) - { - assert(m_pParams->m_quality >= cETCQualitySlow); - } - - const uint32_t n = m_pParams->m_num_src_pixels; - - if (m_pParams->m_cluster_fit) - { - if (m_pParams->m_quality == cETCQualityFast) - compute_internal_cluster_fit(4); - else if (m_pParams->m_quality == cETCQualityMedium) - compute_internal_cluster_fit(32); - else if (m_pParams->m_quality == cETCQualitySlow) - compute_internal_cluster_fit(64); - else - compute_internal_cluster_fit(BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE); - } - else - compute_internal_neighborhood(m_br, m_bg, m_bb); - - if (!m_best_solution.m_valid) - { - m_pResult->m_error = UINT32_MAX; - return false; - } - - const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; - -#ifdef BASISU_BUILD_DEBUG - if (m_pParams->m_pEval_solution_override == nullptr) - { - color_rgba block_colors[4]; - m_best_solution.m_coords.get_block_colors(block_colors); - - const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; - uint64_t actual_error = 0; - for (uint32_t i = 0; i < n; i++) - { - if ((m_pParams->m_perceptual) && (m_pParams->m_quality >= cETCQualitySlow)) - actual_error += color_distance(true, pSrc_pixels[i], block_colors[pSelectors[i]], false); - else - actual_error += color_distance(pSrc_pixels[i], block_colors[pSelectors[i]], false); - } - assert(actual_error == m_best_solution.m_error); - } -#endif - - m_pResult->m_error = m_best_solution.m_error; - - m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; - m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; - - m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; - memcpy(m_pResult->m_pSelectors, pSelectors, n); - m_pResult->m_n = n; - - return true; - } - - void etc1_optimizer::refine_solution(uint32_t max_refinement_trials) - { - // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. - // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: - // The goal is: - // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 - // Rearranging this: - // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 - // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 - // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 - // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 - // So what this means: - // optimal_block_color = avg_input - avg_inten_delta - // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. - // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. - // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. - - const uint32_t n = m_pParams->m_num_src_pixels; - - for (uint32_t refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) - { - const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; - const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; - - int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; - const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); - for (uint32_t r = 0; r < n; r++) - { - const uint32_t s = *pSelectors++; - const int yd_temp = pInten_table[s]; - // Compute actual delta being applied to each pixel, taking into account clamping. - delta_sum_r += clamp<int>(base_color.r + yd_temp, 0, 255) - base_color.r; - delta_sum_g += clamp<int>(base_color.g + yd_temp, 0, 255) - base_color.g; - delta_sum_b += clamp<int>(base_color.b + yd_temp, 0, 255) - base_color.b; - } - - if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) - break; - - const float avg_delta_r_f = static_cast<float>(delta_sum_r) / n; - const float avg_delta_g_f = static_cast<float>(delta_sum_g) / n; - const float avg_delta_b_f = static_cast<float>(delta_sum_b) / n; - const int br1 = clamp<int>(static_cast<uint32_t>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bg1 = clamp<int>(static_cast<uint32_t>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bb1 = clamp<int>(static_cast<uint32_t>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Refinement trial %u, avg_delta %f %f %f\n", refinement_trial, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); -#endif - - if (!evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution)) - break; - - } // refinement_trial - } - - void etc1_optimizer::compute_internal_neighborhood(int scan_r, int scan_g, int scan_b) - { - if (m_best_solution.m_error == 0) - return; - - const uint32_t n = m_pParams->m_num_src_pixels; - const int scan_delta_size = m_pParams->m_scan_delta_size; - - // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. - // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. - for (int zdi = 0; zdi < scan_delta_size; zdi++) - { - const int zd = m_pParams->m_pScan_deltas[zdi]; - const int mbb = scan_b + zd; - if (mbb < 0) continue; else if (mbb > m_limit) break; - - for (int ydi = 0; ydi < scan_delta_size; ydi++) - { - const int yd = m_pParams->m_pScan_deltas[ydi]; - const int mbg = scan_g + yd; - if (mbg < 0) continue; else if (mbg > m_limit) break; - - for (int xdi = 0; xdi < scan_delta_size; xdi++) - { - const int xd = m_pParams->m_pScan_deltas[xdi]; - const int mbr = scan_r + xd; - if (mbr < 0) continue; else if (mbr > m_limit) break; - - etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); - - if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) - continue; - - if (m_pParams->m_refinement) - { - refine_solution((m_pParams->m_quality == cETCQualityFast) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2)); - } - - } // xdi - } // ydi - } // zdi - } - - void etc1_optimizer::compute_internal_cluster_fit(uint32_t total_perms_to_try) - { - if ((!m_best_solution.m_valid) || ((m_br != m_best_solution.m_coords.m_unscaled_color.r) || (m_bg != m_best_solution.m_coords.m_unscaled_color.g) || (m_bb != m_best_solution.m_coords.m_unscaled_color.b))) - { - evaluate_solution(etc1_solution_coordinates(m_br, m_bg, m_bb, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); - } - - if ((m_best_solution.m_error == 0) || (!m_best_solution.m_valid)) - return; - - for (uint32_t i = 0; i < total_perms_to_try; i++) - { - int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; - - const int *pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; - const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); - - const uint8_t *pNum_selectors = g_cluster_fit_order_tab[i].m_v; - - for (uint32_t q = 0; q < 4; q++) - { - const int yd_temp = pInten_table[q]; - - delta_sum_r += pNum_selectors[q] * (clamp<int>(base_color.r + yd_temp, 0, 255) - base_color.r); - delta_sum_g += pNum_selectors[q] * (clamp<int>(base_color.g + yd_temp, 0, 255) - base_color.g); - delta_sum_b += pNum_selectors[q] * (clamp<int>(base_color.b + yd_temp, 0, 255) - base_color.b); - } - - if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) - continue; - - const float avg_delta_r_f = static_cast<float>(delta_sum_r) / 8; - const float avg_delta_g_f = static_cast<float>(delta_sum_g) / 8; - const float avg_delta_b_f = static_cast<float>(delta_sum_b) / 8; - - const int br1 = clamp<int>(static_cast<uint32_t>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bg1 = clamp<int>(static_cast<uint32_t>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); - const int bb1 = clamp<int>(static_cast<uint32_t>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Second refinement trial %u, avg_delta %f %f %f\n", i, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); -#endif - - evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); - - if (m_best_solution.m_error == 0) - break; - } - } - - void etc1_optimizer::init(const params& params, results& result) - { - m_pParams = ¶ms; - m_pResult = &result; - - const uint32_t n = m_pParams->m_num_src_pixels; - - m_selectors.resize(n); - m_best_selectors.resize(n); - m_temp_selectors.resize(n); - m_trial_solution.m_selectors.resize(n); - m_best_solution.m_selectors.resize(n); - - m_limit = m_pParams->m_use_color4 ? 15 : 31; - - vec3F avg_color(0.0f); - - m_luma.resize(n); - m_sorted_luma_indices.resize(n); - m_sorted_luma.resize(n); - - for (uint32_t i = 0; i < n; i++) - { - const color_rgba& c = m_pParams->m_pSrc_pixels[i]; - const vec3F fc(c.r, c.g, c.b); - - avg_color += fc; - - m_luma[i] = static_cast<uint16_t>(c.r + c.g + c.b); - m_sorted_luma_indices[i] = i; - } - avg_color /= static_cast<float>(n); - m_avg_color = avg_color; - - m_br = clamp<int>(static_cast<uint32_t>(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); - m_bg = clamp<int>(static_cast<uint32_t>(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); - m_bb = clamp<int>(static_cast<uint32_t>(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Avg block color: %u %u %u\n", m_br, m_bg, m_bb); -#endif - - if (m_pParams->m_quality <= cETCQualityMedium) - { - indirect_sort(n, &m_sorted_luma_indices[0], &m_luma[0]); - - m_pSorted_luma = &m_sorted_luma[0]; - m_pSorted_luma_indices = &m_sorted_luma_indices[0]; - - for (uint32_t i = 0; i < n; i++) - m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; - } - - m_best_solution.m_coords.clear(); - m_best_solution.m_valid = false; - m_best_solution.m_error = UINT64_MAX; - - m_solutions_tried.clear(); - } - - bool etc1_optimizer::evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) - { - uint32_t k = coords.m_unscaled_color.r | (coords.m_unscaled_color.g << 8) | (coords.m_unscaled_color.b << 16); - if (!m_solutions_tried.insert(k).second) - return false; - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval solution: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); -#endif - - trial_solution.m_valid = false; - - if (m_pParams->m_constrain_against_base_color5) - { - const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; - const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; - const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; - - if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) - { -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); -#endif - return false; - } - } - - const color_rgba base_color(coords.get_scaled_color()); - - const uint32_t n = m_pParams->m_num_src_pixels; - assert(trial_solution.m_selectors.size() == n); - - trial_solution.m_error = UINT64_MAX; - - const uint8_t *pSelectors_to_use = m_pParams->m_pForce_selectors; - - for (uint32_t inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) - { - const int* pInten_table = g_etc1_inten_tables[inten_table]; - - color_rgba block_colors[4]; - for (uint32_t s = 0; s < 4; s++) - { - const int yd = pInten_table[s]; - block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); - } - - uint64_t total_error = 0; - - const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; - for (uint32_t c = 0; c < n; c++) - { - const color_rgba& src_pixel = *pSrc_pixels++; - - uint32_t best_selector_index = 0; - uint32_t best_error = 0; - - if (pSelectors_to_use) - { - best_selector_index = pSelectors_to_use[c]; - best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[best_selector_index], false); - } - else - { - best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[0], false); - - uint32_t trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[1], false); - if (trial_error < best_error) - { - best_error = trial_error; - best_selector_index = 1; - } - - trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[2], false); - if (trial_error < best_error) - { - best_error = trial_error; - best_selector_index = 2; - } - - trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[3], false); - if (trial_error < best_error) - { - best_error = trial_error; - best_selector_index = 3; - } - } - - m_temp_selectors[c] = static_cast<uint8_t>(best_selector_index); - - total_error += best_error; - if ((m_pParams->m_pEval_solution_override == nullptr) && (total_error >= trial_solution.m_error)) - break; - } - - if (m_pParams->m_pEval_solution_override) - { - if (!(*m_pParams->m_pEval_solution_override)(total_error, *m_pParams, block_colors, &m_temp_selectors[0], coords)) - return false; - } - - if (total_error < trial_solution.m_error) - { - trial_solution.m_error = total_error; - trial_solution.m_coords.m_inten_table = inten_table; - trial_solution.m_selectors.swap(m_temp_selectors); - trial_solution.m_valid = true; - } - } - trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; - trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); -#endif - - bool success = false; - if (pBest_solution) - { - if (trial_solution.m_error < pBest_solution->m_error) - { - *pBest_solution = trial_solution; - success = true; - } - } - - return success; - } - - bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) - { - uint32_t k = coords.m_unscaled_color.r | (coords.m_unscaled_color.g << 8) | (coords.m_unscaled_color.b << 16); - if (!m_solutions_tried.insert(k).second) - return false; - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval solution fast: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); -#endif - - if (m_pParams->m_constrain_against_base_color5) - { - const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; - const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; - const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; - - if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) - { - trial_solution.m_valid = false; - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); -#endif - return false; - } - } - - const color_rgba base_color(coords.get_scaled_color()); - - const uint32_t n = m_pParams->m_num_src_pixels; - assert(trial_solution.m_selectors.size() == n); - - trial_solution.m_error = UINT64_MAX; - - for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) - { - const int* pInten_table = g_etc1_inten_tables[inten_table]; - - uint32_t block_inten[4]; - color_rgba block_colors[4]; - for (uint32_t s = 0; s < 4; s++) - { - const int yd = pInten_table[s]; - color_rgba block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); - block_colors[s] = block_color; - block_inten[s] = block_color.r + block_color.g + block_color.b; - } - - // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. - // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. - // 0 1 2 3 - // 01 12 23 - const uint32_t block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; - - uint64_t total_error = 0; - const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; - if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) - { - if (block_inten[0] > m_pSorted_luma[n - 1]) - { - const uint32_t min_error = iabs((int)block_inten[0] - (int)m_pSorted_luma[n - 1]); - if (min_error >= trial_solution.m_error) - continue; - } - - memset(&m_temp_selectors[0], 0, n); - - for (uint32_t c = 0; c < n; c++) - total_error += color_distance(block_colors[0], pSrc_pixels[c], false); - } - else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) - { - if (m_pSorted_luma[0] > block_inten[3]) - { - const uint32_t min_error = iabs((int)m_pSorted_luma[0] - (int)block_inten[3]); - if (min_error >= trial_solution.m_error) - continue; - } - - memset(&m_temp_selectors[0], 3, n); - - for (uint32_t c = 0; c < n; c++) - total_error += color_distance(block_colors[3], pSrc_pixels[c], false); - } - else - { - uint32_t cur_selector = 0, c; - for (c = 0; c < n; c++) - { - const uint32_t y = m_pSorted_luma[c]; - while ((y * 2) >= block_inten_midpoints[cur_selector]) - if (++cur_selector > 2) - goto done; - const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; - m_temp_selectors[sorted_pixel_index] = static_cast<uint8_t>(cur_selector); - total_error += color_distance(block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); - } - done: - while (c < n) - { - const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; - m_temp_selectors[sorted_pixel_index] = 3; - total_error += color_distance(block_colors[3], pSrc_pixels[sorted_pixel_index], false); - ++c; - } - } - - if (total_error < trial_solution.m_error) - { - trial_solution.m_error = total_error; - trial_solution.m_coords.m_inten_table = inten_table; - trial_solution.m_selectors.swap(m_temp_selectors); - trial_solution.m_valid = true; - if (!total_error) - break; - } - } - trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; - trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - -#if BASISU_DEBUG_ETC_ENCODER_DEEPER - printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); -#endif - - bool success = false; - if (pBest_solution) - { - if (trial_solution.m_error < pBest_solution->m_error) - { - *pBest_solution = trial_solution; - success = true; - } - } - - return success; - } - -} // namespace basisu diff --git a/thirdparty/basis_universal/basisu_pvrtc1_4.cpp b/thirdparty/basis_universal/basisu_pvrtc1_4.cpp deleted file mode 100644 index f0122fcb6c..0000000000 --- a/thirdparty/basis_universal/basisu_pvrtc1_4.cpp +++ /dev/null @@ -1,269 +0,0 @@ -// basisu_pvrtc1_4.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "basisu_pvrtc1_4.h" - -namespace basisu -{ - uint32_t pvrtc4_swizzle_uv(uint32_t width, uint32_t height, uint32_t x, uint32_t y) - { - assert((x < width) && (y < height) && basisu::is_pow2(height) && basisu::is_pow2(width)); - - uint32_t min_d = width, max_v = y; - if (height < width) - { - min_d = height; - max_v = x; - } - - // Interleave the XY LSB's - uint32_t shift_ofs = 0, swizzled = 0; - for (uint32_t s_bit = 1, d_bit = 1; s_bit < min_d; s_bit <<= 1, d_bit <<= 2, ++shift_ofs) - { - if (y & s_bit) swizzled |= d_bit; - if (x & s_bit) swizzled |= (2 * d_bit); - } - - max_v >>= shift_ofs; - - // OR in the rest of the bits from the largest dimension - swizzled |= (max_v << (2 * shift_ofs)); - - return swizzled; - } - - color_rgba pvrtc4_block::get_endpoint(uint32_t endpoint_index, bool unpack) const - { - assert(endpoint_index < 2); - const uint32_t packed = m_endpoints >> (endpoint_index * 16); - - uint32_t r, g, b, a; - if (packed & 0x8000) - { - // opaque 554 or 555 - if (!endpoint_index) - { - r = (packed >> 10) & 31; - g = (packed >> 5) & 31; - b = (packed >> 1) & 15; - - if (unpack) - { - b = (b << 1) | (b >> 3); - } - } - else - { - r = (packed >> 10) & 31; - g = (packed >> 5) & 31; - b = packed & 31; - } - - a = unpack ? 255 : 7; - } - else - { - // translucent 4433 or 4443 - if (!endpoint_index) - { - a = (packed >> 12) & 7; - r = (packed >> 8) & 15; - g = (packed >> 4) & 15; - b = (packed >> 1) & 7; - - if (unpack) - { - a = (a << 1); - a = (a << 4) | a; - - r = (r << 1) | (r >> 3); - g = (g << 1) | (g >> 3); - b = (b << 2) | (b >> 1); - } - } - else - { - a = (packed >> 12) & 7; - r = (packed >> 8) & 15; - g = (packed >> 4) & 15; - b = packed & 15; - - if (unpack) - { - a = (a << 1); - a = (a << 4) | a; - - r = (r << 1) | (r >> 3); - g = (g << 1) | (g >> 3); - b = (b << 1) | (b >> 3); - } - } - } - - if (unpack) - { - r = (r << 3) | (r >> 2); - g = (g << 3) | (g >> 2); - b = (b << 3) | (b >> 2); - } - - assert((r < 256) && (g < 256) && (b < 256) && (a < 256)); - - return color_rgba(r, g, b, a); - } - - color_rgba pvrtc4_block::get_endpoint_5554(uint32_t endpoint_index) const - { - assert(endpoint_index < 2); - const uint32_t packed = m_endpoints >> (endpoint_index * 16); - - uint32_t r, g, b, a; - if (packed & 0x8000) - { - // opaque 554 or 555 - if (!endpoint_index) - { - r = (packed >> 10) & 31; - g = (packed >> 5) & 31; - b = (packed >> 1) & 15; - - b = (b << 1) | (b >> 3); - } - else - { - r = (packed >> 10) & 31; - g = (packed >> 5) & 31; - b = packed & 31; - } - - a = 15; - } - else - { - // translucent 4433 or 4443 - if (!endpoint_index) - { - a = (packed >> 12) & 7; - r = (packed >> 8) & 15; - g = (packed >> 4) & 15; - b = (packed >> 1) & 7; - - a = a << 1; - - r = (r << 1) | (r >> 3); - g = (g << 1) | (g >> 3); - b = (b << 2) | (b >> 1); - } - else - { - a = (packed >> 12) & 7; - r = (packed >> 8) & 15; - g = (packed >> 4) & 15; - b = packed & 15; - - a = a << 1; - - r = (r << 1) | (r >> 3); - g = (g << 1) | (g >> 3); - b = (b << 1) | (b >> 3); - } - } - - assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); - - return color_rgba(r, g, b, a); - } - - bool pvrtc4_image::get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const - { - assert((x < m_width) && (y < m_height)); - - int block_x0 = (static_cast<int>(x) - 2) >> 2; - int block_x1 = block_x0 + 1; - int block_y0 = (static_cast<int>(y) - 2) >> 2; - int block_y1 = block_y0 + 1; - - block_x0 = posmod(block_x0, m_block_width); - block_x1 = posmod(block_x1, m_block_width); - block_y0 = posmod(block_y0, m_block_height); - block_y1 = posmod(block_y1, m_block_height); - - pColors[0] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); - pColors[3] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); - - if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) - { - for (uint32_t c = 0; c < 4; c++) - { - uint32_t m = (pColors[0][c] + pColors[3][c]) / 2; - pColors[1][c] = static_cast<uint8_t>(m); - pColors[2][c] = static_cast<uint8_t>(m); - } - pColors[2][3] = 0; - return true; - } - - for (uint32_t c = 0; c < 4; c++) - { - pColors[1][c] = static_cast<uint8_t>((pColors[0][c] * 5 + pColors[3][c] * 3) / 8); - pColors[2][c] = static_cast<uint8_t>((pColors[0][c] * 3 + pColors[3][c] * 5) / 8); - } - - return false; - } - - color_rgba pvrtc4_image::get_pixel(uint32_t x, uint32_t y, uint32_t m) const - { - assert((x < m_width) && (y < m_height)); - - int block_x0 = (static_cast<int>(x) - 2) >> 2; - int block_x1 = block_x0 + 1; - int block_y0 = (static_cast<int>(y) - 2) >> 2; - int block_y1 = block_y0 + 1; - - block_x0 = posmod(block_x0, m_block_width); - block_x1 = posmod(block_x1, m_block_width); - block_y0 = posmod(block_y0, m_block_height); - block_y1 = posmod(block_y1, m_block_height); - - if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) - { - if (m == 0) - return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); - else if (m == 3) - return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); - - color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); - color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); - - return color_rgba((l[0] + h[0]) / 2, (l[1] + h[1]) / 2, (l[2] + h[2]) / 2, (m == 2) ? 0 : (l[3] + h[3]) / 2); - } - else - { - if (m == 0) - return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); - else if (m == 3) - return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); - - color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); - color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); - - if (m == 2) - return color_rgba((l[0] * 3 + h[0] * 5) / 8, (l[1] * 3 + h[1] * 5) / 8, (l[2] * 3 + h[2] * 5) / 8, (l[3] * 3 + h[3] * 5) / 8); - else - return color_rgba((l[0] * 5 + h[0] * 3) / 8, (l[1] * 5 + h[1] * 3) / 8, (l[2] * 5 + h[2] * 3) / 8, (l[3] * 5 + h[3] * 3) / 8); - } - } - -} // basisu diff --git a/thirdparty/basis_universal/encoder/apg_bmp.c b/thirdparty/basis_universal/encoder/apg_bmp.c new file mode 100644 index 0000000000..ef3d015e40 --- /dev/null +++ b/thirdparty/basis_universal/encoder/apg_bmp.c @@ -0,0 +1,541 @@ +/* +BMP File Reader/Writer Implementation +Anton Gerdelan +Version: 3 +Licence: see apg_bmp.h +C99 +*/ + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS 1 +#endif + +#include "apg_bmp.h" +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* Maximum pixel dimensions of width or height of an image. Should accommodate max used in graphics APIs. + NOTE: 65536*65536 is the biggest number storable in 32 bits. + This needs to be multiplied by n_channels so actual memory indices are not uint32 but size_t to avoid overflow. + Note this will crash stb_image_write et al at maximum size which use 32bits, so reduce max size to accom. */ +#define _BMP_MAX_DIMS 65536 +#define _BMP_FILE_HDR_SZ 14 +#define _BMP_MIN_DIB_HDR_SZ 40 +#define _BMP_MIN_HDR_SZ ( _BMP_FILE_HDR_SZ + _BMP_MIN_DIB_HDR_SZ ) +#define _BMP_MAX_IMAGE_FILE_SIZE (1024ULL*1024ULL*1024ULL) + +#pragma pack( push, 1 ) // supported on GCC in addition to individual packing attribs +/* All BMP files, regardless of type, start with this file header */ +typedef struct _bmp_file_header_t { + char file_type[2]; + uint32_t file_sz; + uint16_t reserved1; + uint16_t reserved2; + uint32_t image_data_offset; +} _bmp_file_header_t; + +/* Following the file header is the BMP type header. this is the most commonly used format */ +typedef struct _bmp_dib_BITMAPINFOHEADER_t { + uint32_t this_header_sz; + int32_t w; // in older headers w & h these are shorts and may be unsigned + int32_t h; // + uint16_t n_planes; // must be 1 + uint16_t bpp; // bits per pixel. 1,4,8,16,24,32. + uint32_t compression_method; // 16 and 32-bit images must have a value of 3 here + uint32_t image_uncompressed_sz; // not consistently used in the wild, so ignored here. + int32_t horiz_pixels_per_meter; // not used. + int32_t vert_pixels_per_meter; // not used. + uint32_t n_colours_in_palette; // + uint32_t n_important_colours; // not used. + /* NOTE(Anton) a DIB header may end here at 40-bytes. be careful using sizeof() */ + /* if 'compression' value, above, is set to 3 ie the image is 16 or 32-bit, then these colour channel masks follow the headers. + these are big-endian order bit masks to assign bits of each pixel to different colours. bits used must be contiguous and not overlap. */ + uint32_t bitmask_r; + uint32_t bitmask_g; + uint32_t bitmask_b; +} _bmp_dib_BITMAPINFOHEADER_t; +#pragma pack( pop ) + +typedef enum _bmp_compression_t { + BI_RGB = 0, + BI_RLE8 = 1, + BI_RLE4 = 2, + BI_BITFIELDS = 3, + BI_JPEG = 4, + BI_PNG = 5, + BI_ALPHABITFIELDS = 6, + BI_CMYK = 11, + BI_CMYKRLE8 = 12, + BI_CMYRLE4 = 13 +} _bmp_compression_t; + +/* convenience struct and file->memory function */ +typedef struct _entire_file_t { + void* data; + size_t sz; +} _entire_file_t; + +/* +RETURNS +- true on success. record->data is allocated memory and must be freed by the caller. +- false on any error. Any allocated memory is freed if false is returned */ +static bool _read_entire_file( const char* filename, _entire_file_t* record ) { + FILE* fp = fopen( filename, "rb" ); + if ( !fp ) { return false; } + fseek( fp, 0L, SEEK_END ); + record->sz = (size_t)ftell( fp ); + + // Immediately bail on anything larger than _BMP_MAX_IMAGE_FILE_SIZE. + if (record->sz > _BMP_MAX_IMAGE_FILE_SIZE) { + fclose( fp ); + return false; + } + + record->data = malloc( record->sz ); + if ( !record->data ) { + fclose( fp ); + return false; + } + rewind( fp ); + size_t nr = fread( record->data, record->sz, 1, fp ); + fclose( fp ); + if ( 1 != nr ) { return false; } + return true; +} + +static bool _validate_file_hdr( _bmp_file_header_t* file_hdr_ptr, size_t file_sz ) { + if ( !file_hdr_ptr ) { return false; } + if ( file_hdr_ptr->file_type[0] != 'B' || file_hdr_ptr->file_type[1] != 'M' ) { return false; } + if ( file_hdr_ptr->image_data_offset > file_sz ) { return false; } + return true; +} + +static bool _validate_dib_hdr( _bmp_dib_BITMAPINFOHEADER_t* dib_hdr_ptr, size_t file_sz ) { + if ( !dib_hdr_ptr ) { return false; } + if ( _BMP_FILE_HDR_SZ + dib_hdr_ptr->this_header_sz > file_sz ) { return false; } + if ( ( 32 == dib_hdr_ptr->bpp || 16 == dib_hdr_ptr->bpp ) && ( BI_BITFIELDS != dib_hdr_ptr->compression_method && BI_ALPHABITFIELDS != dib_hdr_ptr->compression_method ) ) { + return false; + } + if ( BI_RGB != dib_hdr_ptr->compression_method && BI_BITFIELDS != dib_hdr_ptr->compression_method && BI_ALPHABITFIELDS != dib_hdr_ptr->compression_method ) { + return false; + } + // NOTE(Anton) using abs() in the if-statement was blowing up on large negative numbers. switched to labs() + if ( 0 == dib_hdr_ptr->w || 0 == dib_hdr_ptr->h || labs( dib_hdr_ptr->w ) > _BMP_MAX_DIMS || labs( dib_hdr_ptr->h ) > _BMP_MAX_DIMS ) { return false; } + + /* NOTE(Anton) if images reliably used n_colours_in_palette we could have done a palette/file size integrity check here. + because some always set 0 then we have to check every palette indexing as we read them */ + return true; +} + +/* NOTE(Anton) this could have ifdef branches on different compilers for the intrinsics versions for perf */ +static uint32_t _bitscan( uint32_t dword ) { + for ( uint32_t i = 0; i < 32; i++ ) { + if ( 1 & dword ) { return i; } + dword = dword >> 1; + } + return (uint32_t)-1; +} + +unsigned char* apg_bmp_read( const char* filename, int* w, int* h, unsigned int* n_chans ) { + if ( !filename || !w || !h || !n_chans ) { return NULL; } + + // read in the whole file into memory first - much faster than parsing on-the-fly + _entire_file_t record; + if ( !_read_entire_file( filename, &record ) ) { return NULL; } + if ( record.sz < _BMP_MIN_HDR_SZ ) { + free( record.data ); + return NULL; + } + + // grab and validate the first, file, header + _bmp_file_header_t* file_hdr_ptr = (_bmp_file_header_t*)record.data; + if ( !_validate_file_hdr( file_hdr_ptr, record.sz ) ) { + free( record.data ); + return NULL; + } + + // grad and validate the second, DIB, header + _bmp_dib_BITMAPINFOHEADER_t* dib_hdr_ptr = (_bmp_dib_BITMAPINFOHEADER_t*)( (uint8_t*)record.data + _BMP_FILE_HDR_SZ ); + if ( !_validate_dib_hdr( dib_hdr_ptr, record.sz ) ) { + free( record.data ); + return NULL; + } + + // bitmaps can have negative dims to indicate the image should be flipped + uint32_t width = *w = abs( dib_hdr_ptr->w ); + uint32_t height = *h = abs( dib_hdr_ptr->h ); + + // TODO(Anton) flip image memory at the end if this is true. because doing it per row was making me write bugs. + // bool vertically_flip = dib_hdr_ptr->h > 0 ? false : true; + + // channel count and palette are not well defined in the header so we make a good guess here + uint32_t n_dst_chans = 3, n_src_chans = 3; + bool has_palette = false; + switch ( dib_hdr_ptr->bpp ) { + case 32: n_dst_chans = n_src_chans = 4; break; // technically can be RGB but not supported + case 24: n_dst_chans = n_src_chans = 3; break; // technically can be RGBA but not supported + case 8: // seems to always use a BGR0 palette, even for greyscale + n_dst_chans = 3; + has_palette = true; + n_src_chans = 1; + break; + case 4: // always has a palette - needed for a MS-saved BMP + n_dst_chans = 3; + has_palette = true; + n_src_chans = 1; + break; + case 1: // 1-bpp means the palette has 3 colour channels with 2 colours i.e. monochrome but not always black & white + n_dst_chans = 3; + has_palette = true; + n_src_chans = 1; + break; + default: // this includes 2bpp and 16bpp + free( record.data ); + return NULL; + } // endswitch + *n_chans = n_dst_chans; + // NOTE(Anton) some image formats are not allowed a palette - could check for a bad header spec here also + if ( dib_hdr_ptr->n_colours_in_palette > 0 ) { has_palette = true; } + +#ifdef APG_BMP_DEBUG_OUTPUT + printf( "apg_bmp_debug: reading image\n|-filename `%s`\n|-dims %ux%u pixels\n|-bpp %u\n|-n_src_chans %u\n|-n_dst_chans %u\n", filename, *w, *h, + dib_hdr_ptr->bpp, n_src_chans, n_dst_chans ); +#endif + + uint32_t palette_offset = _BMP_FILE_HDR_SZ + dib_hdr_ptr->this_header_sz; + bool has_bitmasks = false; + if ( BI_BITFIELDS == dib_hdr_ptr->compression_method || BI_ALPHABITFIELDS == dib_hdr_ptr->compression_method ) { + has_bitmasks = true; + palette_offset += 12; + } + if ( palette_offset > record.sz ) { + free( record.data ); + return NULL; + } + + // work out if any padding how much to skip at end of each row + uint32_t unpadded_row_sz = width * n_src_chans; + // bit-encoded palette indices have different padding properties + if ( 4 == dib_hdr_ptr->bpp ) { + unpadded_row_sz = width % 2 > 0 ? width / 2 + 1 : width / 2; // find how many whole bytes required for this bit width + } + if ( 1 == dib_hdr_ptr->bpp ) { + unpadded_row_sz = width % 8 > 0 ? width / 8 + 1 : width / 8; // find how many whole bytes required for this bit width + } + uint32_t row_padding_sz = 0 == unpadded_row_sz % 4 ? 0 : 4 - ( unpadded_row_sz % 4 ); // NOTE(Anton) didn't expect operator precedence of - over % + + // another file size integrity check: partially validate source image data size + // 'image_data_offset' is by row padded to 4 bytes and is either colour data or palette indices. + if ( file_hdr_ptr->image_data_offset + ( unpadded_row_sz + row_padding_sz ) * height > record.sz ) { + free( record.data ); + return NULL; + } + + // find which bit number each colour channel starts at, so we can separate colours out + uint32_t bitshift_rgba[4] = {0, 0, 0, 0}; // NOTE(Anton) noticed this was int and not uint32_t so changed it. 17 Mar 2020 + uint32_t bitmask_a = 0; + if ( has_bitmasks ) { + bitmask_a = ~( dib_hdr_ptr->bitmask_r | dib_hdr_ptr->bitmask_g | dib_hdr_ptr->bitmask_b ); + bitshift_rgba[0] = _bitscan( dib_hdr_ptr->bitmask_r ); + bitshift_rgba[1] = _bitscan( dib_hdr_ptr->bitmask_g ); + bitshift_rgba[2] = _bitscan( dib_hdr_ptr->bitmask_b ); + bitshift_rgba[3] = _bitscan( bitmask_a ); + } + + // allocate memory for the output pixels block. cast to size_t in case width and height are both the max of 65536 and n_dst_chans > 1 + unsigned char* dst_img_ptr = malloc( (size_t)width * (size_t)height * (size_t)n_dst_chans ); + if ( !dst_img_ptr ) { + free( record.data ); + return NULL; + } + + uint8_t* palette_data_ptr = (uint8_t*)record.data + palette_offset; + uint8_t* src_img_ptr = (uint8_t*)record.data + file_hdr_ptr->image_data_offset; + size_t dst_stride_sz = width * n_dst_chans; + + // == 32-bpp -> 32-bit RGBA. == 32-bit and 16-bit require bitmasks + if ( 32 == dib_hdr_ptr->bpp ) { + // check source image has enough data in it to read from + if ( (size_t)file_hdr_ptr->image_data_offset + (size_t)height * (size_t)width * (size_t)n_src_chans > record.sz ) { + free( record.data ); + free( dst_img_ptr ); + return NULL; + } + size_t src_byte_idx = 0; + for ( uint32_t r = 0; r < height; r++ ) { + size_t dst_pixels_idx = r * dst_stride_sz; + for ( uint32_t c = 0; c < width; c++ ) { + uint32_t pixel; + memcpy( &pixel, &src_img_ptr[src_byte_idx], 4 ); + // NOTE(Anton) the below assumes 32-bits is always RGBA 1 byte per channel. 10,10,10 RGB exists though and isn't handled. + dst_img_ptr[dst_pixels_idx++] = ( uint8_t )( ( pixel & dib_hdr_ptr->bitmask_r ) >> bitshift_rgba[0] ); + dst_img_ptr[dst_pixels_idx++] = ( uint8_t )( ( pixel & dib_hdr_ptr->bitmask_g ) >> bitshift_rgba[1] ); + dst_img_ptr[dst_pixels_idx++] = ( uint8_t )( ( pixel & dib_hdr_ptr->bitmask_b ) >> bitshift_rgba[2] ); + dst_img_ptr[dst_pixels_idx++] = ( uint8_t )( ( pixel & bitmask_a ) >> bitshift_rgba[3] ); + src_byte_idx += 4; + } + src_byte_idx += row_padding_sz; + } + + // == 8-bpp -> 24-bit RGB == + } else if ( 8 == dib_hdr_ptr->bpp && has_palette ) { + // validate indices (body of image data) fits in file + if ( file_hdr_ptr->image_data_offset + height * width > record.sz ) { + free( record.data ); + free( dst_img_ptr ); + return NULL; + } + size_t src_byte_idx = 0; + for ( uint32_t r = 0; r < height; r++ ) { + size_t dst_pixels_idx = ( height - 1 - r ) * dst_stride_sz; + for ( uint32_t c = 0; c < width; c++ ) { + // "most palettes are 4 bytes in RGB0 order but 3 for..." - it was actually BRG0 in old images -- Anton + uint8_t index = src_img_ptr[src_byte_idx]; // 8-bit index value per pixel + + if ( palette_offset + index * 4 + 2 >= record.sz ) { + free( record.data ); + return dst_img_ptr; + } + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[index * 4 + 2]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[index * 4 + 1]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[index * 4 + 0]; + src_byte_idx++; + } + src_byte_idx += row_padding_sz; + } + + // == 4-bpp (16-colour) -> 24-bit RGB == + } else if ( 4 == dib_hdr_ptr->bpp && has_palette ) { + size_t src_byte_idx = 0; + for ( uint32_t r = 0; r < height; r++ ) { + size_t dst_pixels_idx = ( height - 1 - r ) * dst_stride_sz; + for ( uint32_t c = 0; c < width; c++ ) { + if ( file_hdr_ptr->image_data_offset + src_byte_idx > record.sz ) { + free( record.data ); + free( dst_img_ptr ); + return NULL; + } + // handle 2 pixels at a time + uint8_t pixel_duo = src_img_ptr[src_byte_idx]; + uint8_t a_index = ( 0xFF & pixel_duo ) >> 4; + uint8_t b_index = 0xF & pixel_duo; + + if ( palette_offset + a_index * 4 + 2 >= record.sz ) { // invalid src image + free( record.data ); + return dst_img_ptr; + } + if ( dst_pixels_idx + 3 > width * height * n_dst_chans ) { // done + free( record.data ); + return dst_img_ptr; + } + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[a_index * 4 + 2]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[a_index * 4 + 1]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[a_index * 4 + 0]; + if ( ++c >= width ) { // advance a column + c = 0; + r++; + if ( r >= height ) { // done. no need to get second pixel. eg a 1x1 pixel image. + free( record.data ); + return dst_img_ptr; + } + dst_pixels_idx = ( height - 1 - r ) * dst_stride_sz; + } + + if ( palette_offset + b_index * 4 + 2 >= record.sz ) { // invalid src image + free( record.data ); + return dst_img_ptr; + } + if ( dst_pixels_idx + 3 > width * height * n_dst_chans ) { // done. probably redundant check since checking r >= height. + free( record.data ); + return dst_img_ptr; + } + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[b_index * 4 + 2]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[b_index * 4 + 1]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[b_index * 4 + 0]; + src_byte_idx++; + } + src_byte_idx += row_padding_sz; + } + + // == 1-bpp -> 24-bit RGB == + } else if ( 1 == dib_hdr_ptr->bpp && has_palette ) { + /* encoding method for monochrome is not well documented. + a 2x2 pixel image is stored as 4 1-bit palette indexes + the palette is stored as any 2 RGB0 colours (not necessarily B&W) + so for an image with indexes like so: + 1 1 + 0 1 + it is bit-encoded as follows, starting at MSB: + 01000000 00000000 00000000 00000000 (first byte val 64) + 11000000 00000000 00000000 00000000 (first byte val 192) + data is still split by row and each row padded to 4 byte multiples + */ + size_t src_byte_idx = 0; + for ( uint32_t r = 0; r < height; r++ ) { + uint8_t bit_idx = 0; // used in monochrome + size_t dst_pixels_idx = ( height - 1 - r ) * dst_stride_sz; + for ( uint32_t c = 0; c < width; c++ ) { + if ( 8 == bit_idx ) { // start reading from the next byte + src_byte_idx++; + bit_idx = 0; + } + if ( file_hdr_ptr->image_data_offset + src_byte_idx > record.sz ) { + free( record.data ); + return dst_img_ptr; + } + uint8_t pixel_oct = src_img_ptr[src_byte_idx]; + uint8_t bit = 128 >> bit_idx; + uint8_t masked = pixel_oct & bit; + uint8_t palette_idx = masked > 0 ? 1 : 0; + + if ( palette_offset + palette_idx * 4 + 2 >= record.sz ) { + free( record.data ); + return dst_img_ptr; + } + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[palette_idx * 4 + 2]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[palette_idx * 4 + 1]; + dst_img_ptr[dst_pixels_idx++] = palette_data_ptr[palette_idx * 4 + 0]; + bit_idx++; + } + src_byte_idx += ( row_padding_sz + 1 ); // 1bpp is special here + } + + // == 24-bpp -> 24-bit RGB == (but also should handle some other n_chans cases) + } else { + // NOTE(Anton) this only supports 1 byte per channel + if ( file_hdr_ptr->image_data_offset + height * width * n_dst_chans > record.sz ) { + free( record.data ); + free( dst_img_ptr ); + return NULL; + } + size_t src_byte_idx = 0; + for ( uint32_t r = 0; r < height; r++ ) { + size_t dst_pixels_idx = ( height - 1 - r ) * dst_stride_sz; + for ( uint32_t c = 0; c < width; c++ ) { + // re-orders from BGR to RGB + if ( n_dst_chans > 3 ) { dst_img_ptr[dst_pixels_idx++] = src_img_ptr[src_byte_idx + 3]; } + if ( n_dst_chans > 2 ) { dst_img_ptr[dst_pixels_idx++] = src_img_ptr[src_byte_idx + 2]; } + if ( n_dst_chans > 1 ) { dst_img_ptr[dst_pixels_idx++] = src_img_ptr[src_byte_idx + 1]; } + dst_img_ptr[dst_pixels_idx++] = src_img_ptr[src_byte_idx]; + src_byte_idx += n_src_chans; + } + src_byte_idx += row_padding_sz; + } + } // endif bpp + + free( record.data ); + return dst_img_ptr; +} + +void apg_bmp_free( unsigned char* pixels_ptr ) { + if ( !pixels_ptr ) { return; } + free( pixels_ptr ); +} + +unsigned int apg_bmp_write( const char* filename, unsigned char* pixels_ptr, int w, int h, unsigned int n_chans ) { + if ( !filename || !pixels_ptr ) { return 0; } + if ( 0 == w || 0 == h ) { return 0; } + if ( labs( w ) > _BMP_MAX_DIMS || labs( h ) > _BMP_MAX_DIMS ) { return 0; } + if ( n_chans != 3 && n_chans != 4 ) { return 0; } + + uint32_t height = (uint32_t)labs( h ); + uint32_t width = (uint32_t)labs( w ); + // work out if any padding how much to skip at end of each row + const size_t unpadded_row_sz = width * n_chans; + const size_t row_padding_sz = 0 == unpadded_row_sz % 4 ? 0 : 4 - unpadded_row_sz % 4; + const size_t row_sz = unpadded_row_sz + row_padding_sz; + const size_t dst_pixels_padded_sz = row_sz * height; + + const size_t dib_hdr_sz = sizeof( _bmp_dib_BITMAPINFOHEADER_t ); + _bmp_file_header_t file_hdr; + { + file_hdr.file_type[0] = 'B'; + file_hdr.file_type[1] = 'M'; + file_hdr.file_sz = _BMP_FILE_HDR_SZ + (uint32_t)dib_hdr_sz + (uint32_t)dst_pixels_padded_sz; + file_hdr.reserved1 = 0; + file_hdr.reserved2 = 0; + file_hdr.image_data_offset = _BMP_FILE_HDR_SZ + (uint32_t)dib_hdr_sz; + } + _bmp_dib_BITMAPINFOHEADER_t dib_hdr; + { + dib_hdr.this_header_sz = _BMP_MIN_DIB_HDR_SZ; // NOTE: must be 40 and not include the bitmask memory in size here + dib_hdr.w = w; + dib_hdr.h = h; + dib_hdr.n_planes = 1; + dib_hdr.bpp = 3 == n_chans ? 24 : 32; + dib_hdr.compression_method = 3 == n_chans ? BI_RGB : BI_BITFIELDS; + dib_hdr.image_uncompressed_sz = 0; + dib_hdr.horiz_pixels_per_meter = 0; + dib_hdr.vert_pixels_per_meter = 0; + dib_hdr.n_colours_in_palette = 0; + dib_hdr.n_important_colours = 0; + // big-endian masks. only used in BI_BITFIELDS and BI_ALPHABITFIELDS ( 16 and 32-bit images ) + // important note: GIMP stores BMP data in this array order for 32-bit: [A][B][G][R] + dib_hdr.bitmask_r = 0xFF000000; + dib_hdr.bitmask_g = 0x00FF0000; + dib_hdr.bitmask_b = 0x0000FF00; + } + + uint8_t* dst_pixels_ptr = malloc( dst_pixels_padded_sz ); + if ( !dst_pixels_ptr ) { return 0; } + { + size_t dst_byte_idx = 0; + uint8_t padding[4] = {0, 0, 0, 0}; + uint8_t rgba[4] = {0, 0, 0, 0}; + uint8_t bgra[4] = {0, 0, 0, 0}; + + for ( uint32_t row = 0; row < height; row++ ) { + size_t src_byte_idx = ( height - 1 - row ) * n_chans * width; + for ( uint32_t col = 0; col < width; col++ ) { + for ( uint32_t chan = 0; chan < n_chans; chan++ ) { rgba[chan] = pixels_ptr[src_byte_idx++]; } + if ( 3 == n_chans ) { + bgra[0] = rgba[2]; + bgra[1] = rgba[1]; + bgra[2] = rgba[0]; + } else { + /* NOTE(Anton) RGBA with alpha channel would be better supported with an extended DIB header */ + bgra[0] = rgba[3]; + bgra[1] = rgba[2]; + bgra[2] = rgba[1]; + bgra[3] = rgba[0]; // alpha + } + memcpy( &dst_pixels_ptr[dst_byte_idx], bgra, n_chans ); + dst_byte_idx += (size_t)n_chans; + } // endfor col + if ( row_padding_sz > 0 ) { + memcpy( &dst_pixels_ptr[dst_byte_idx], padding, row_padding_sz ); + dst_byte_idx += row_padding_sz; + } + } // endfor row + } + { + FILE* fp = fopen( filename, "wb" ); + if ( !fp ) { + free( dst_pixels_ptr ); + return 0; + } + if ( 1 != fwrite( &file_hdr, _BMP_FILE_HDR_SZ, 1, fp ) ) { + free( dst_pixels_ptr ); + fclose( fp ); + return 0; + } + if ( 1 != fwrite( &dib_hdr, dib_hdr_sz, 1, fp ) ) { + free( dst_pixels_ptr ); + fclose( fp ); + return 0; + } + if ( 1 != fwrite( dst_pixels_ptr, dst_pixels_padded_sz, 1, fp ) ) { + free( dst_pixels_ptr ); + fclose( fp ); + return 0; + } + fclose( fp ); + } + free( dst_pixels_ptr ); + + return 1; +} diff --git a/thirdparty/basis_universal/encoder/apg_bmp.h b/thirdparty/basis_universal/encoder/apg_bmp.h new file mode 100644 index 0000000000..8cd73b62e0 --- /dev/null +++ b/thirdparty/basis_universal/encoder/apg_bmp.h @@ -0,0 +1,123 @@ +/* +BMP File Reader/Writer Implementation +Anton Gerdelan +Version: 3.1 18 March 2020. +Licence: see bottom of file. +C89 ( Implementation is C99 ) + +Contributors: +- Anton Gerdelan - Initial code. +- Saija Sorsa - Fuzz testing. + +Instructions: +- Just drop this header, and the matching .c file into your project. +- To get debug printouts during parsing define APG_BMP_DEBUG_OUTPUT. + +Advantages: +- The implementation is fast, simple, and supports more formats than most BMP reader libraries. +- The reader function is fuzzed with AFL https://lcamtuf.coredump.cx/afl/. +- The reader is robust to large files and malformed files, and will return any valid partial data in an image. +- Reader supports 32bpp (with alpha channel), 24bpp, 8bpp, 4bpp, and 1bpp monochrome BMP images. +- Reader handles indexed BMP images using a colour palette. +- Writer supports 32bpp RGBA and 24bpp uncompressed RGB images. + +Current Limitations: +- 16-bit images not supported (don't have any samples to test on). +- No support for interleaved channel bit layouts eg RGB101010 RGB555 RGB565. +- No support for compressed BMP images, although in practice these are not used. +- Output images with alpha channel are written in BITMAPINFOHEADER format. + For better alpha support in other apps the 124-bit v5 header could be used instead, + at the cost of some backward compatibility and bloat. + +To Do: +- FUZZING + - create a unique fuzz test set for (8,4,1 BPP). +- (maybe) FEATURE Flipping the image based on negative width and height in header, and/or function arguments. +- (maybe) PERF ifdef intrinsics/asm for bitscan. Platform-specific code so won't include unless necessary. +- (maybe) FEATURE Add parameter for padding output memory to eg 4-byte alignment or n channels. +- (maybe) FEATURE Improved apps support in alpha channel writing (using v5 header). +*/ + +#ifndef APG_BMP_H_ +#define APG_BMP_H_ + +#ifdef __cplusplus +extern "C" { +#endif /* CPP */ + +/* Reads a bitmap from a file, allocates memory for the raw image data, and returns it. +PARAMS + * w,h, - Retrieves the width and height of the BMP in pixels. + * n_chans - Retrieves the number of channels in the BMP. +RETURNS + * Tightly-packed pixel memory in RGBA order. The caller must call free() on the memory. + * NULL on any error. Any allocated memory is freed before returning NULL. */ +unsigned char* apg_bmp_read( const char* filename, int* w, int* h, unsigned int* n_chans ); + +/* Calls free() on memory created by apg_bmp_read */ +void apg_bmp_free( unsigned char* pixels_ptr ); + +/* Writes a bitmap to a file. +PARAMS + * filename - e.g."my_bitmap.bmp". Must not be NULL. + * pixels_ptr - Pointer to tightly-packed pixel memory in RGBA order. Must not be NULL. There must be abs(w)*abs(h)*n_chans bytes in the memory pointed to. + * w,h, - Width and height of the image in pixels. + * n_chans - The number of channels in the BMP. 3 or 4 supported for writing, which means RGB or RGBA memory, respectively. +RETURNS + * Zero on any error, non zero on success. */ +unsigned int apg_bmp_write( const char* filename, unsigned char* pixels_ptr, int w, int h, unsigned int n_chans ); + +#ifdef __cplusplus +} +#endif /* CPP */ + +#endif /*_APG_BMP_H_ */ + +/* +------------------------------------------------------------------------------------- +This software is available under two licences - you may use it under either licence. +------------------------------------------------------------------------------------- +FIRST LICENCE OPTION + +> Apache License +> Version 2.0, January 2004 +> http://www.apache.org/licenses/ +> Copyright 2019 Anton Gerdelan. +> Licensed under the Apache License, Version 2.0 (the "License"); +> you may not use this file except in compliance with the License. +> You may obtain a copy of the License at +> http://www.apache.org/licenses/LICENSE-2.0 +> Unless required by applicable law or agreed to in writing, software +> distributed under the License is distributed on an "AS IS" BASIS, +> WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +> See the License for the specific language governing permissions and +> limitations under the License. +------------------------------------------------------------------------------------- +SECOND LICENCE OPTION + +> This is free and unencumbered software released into the public domain. +> +> Anyone is free to copy, modify, publish, use, compile, sell, or +> distribute this software, either in source code form or as a compiled +> binary, for any purpose, commercial or non-commercial, and by any +> means. +> +> In jurisdictions that recognize copyright laws, the author or authors +> of this software dedicate any and all copyright interest in the +> software to the public domain. We make this dedication for the benefit +> of the public at large and to the detriment of our heirs and +> successors. We intend this dedication to be an overt act of +> relinquishment in perpetuity of all present and future rights to this +> software under copyright law. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +> IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +> OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +> ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +> OTHER DEALINGS IN THE SOFTWARE. +> +> For more information, please refer to <http://unlicense.org> +------------------------------------------------------------------------------------- +*/ diff --git a/thirdparty/basis_universal/basisu_astc_decomp.cpp b/thirdparty/basis_universal/encoder/basisu_astc_decomp.cpp index cc0a6ced7a..53bccfc515 100644 --- a/thirdparty/basis_universal/basisu_astc_decomp.cpp +++ b/thirdparty/basis_universal/encoder/basisu_astc_decomp.cpp @@ -50,6 +50,13 @@ typedef uint64_t deUint64; #define DE_ASSERT assert +#ifdef _MSC_VER +#pragma warning (disable:4505) // unreferenced local function has been removed +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + namespace basisu_astc { static bool inBounds(int v, int l, int h) @@ -150,7 +157,7 @@ namespace basisu_astc UVec4 asUint() const { - return UVec4(std::max(0, m_c[0]), std::max(0, m_c[1]), std::max(0, m_c[2]), std::max(0, m_c[3])); + return UVec4(basisu::maximum(0, m_c[0]), basisu::maximum(0, m_c[1]), basisu::maximum(0, m_c[2]), basisu::maximum(0, m_c[3])); } int32_t operator[] (uint32_t idx) const { assert(idx < 4); return m_c[idx]; } @@ -1256,7 +1263,7 @@ void interpolateWeights (TexelWeightPair* dst, const deUint32 (&unquantizedWeigh const int numWeightsPerTexel = blockMode.isDualPlane ? 2 : 1; const deUint32 scaleX = (1024 + blockWidth/2) / (blockWidth-1); const deUint32 scaleY = (1024 + blockHeight/2) / (blockHeight-1); - DE_ASSERT(blockMode.weightGridWidth*blockMode.weightGridHeight*numWeightsPerTexel <= DE_LENGTH_OF_ARRAY(unquantizedWeights)); + DE_ASSERT(blockMode.weightGridWidth*blockMode.weightGridHeight*numWeightsPerTexel <= (int)DE_LENGTH_OF_ARRAY(unquantizedWeights)); for (int texelY = 0; texelY < blockHeight; texelY++) { for (int texelX = 0; texelX < blockWidth; texelX++) @@ -1548,3 +1555,7 @@ bool decompress(uint8_t *pDst, const uint8_t * data, bool isSRGB, int blockWidth } // astc } // basisu_astc + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif diff --git a/thirdparty/basis_universal/basisu_astc_decomp.h b/thirdparty/basis_universal/encoder/basisu_astc_decomp.h index 6cd053b7b6..9ec2e46076 100644 --- a/thirdparty/basis_universal/basisu_astc_decomp.h +++ b/thirdparty/basis_universal/encoder/basisu_astc_decomp.h @@ -23,7 +23,7 @@ * \brief ASTC Utilities. *//*--------------------------------------------------------------------*/ -#include "transcoder/basisu.h" // to pick up the iterator debug level madness +#include "../transcoder/basisu.h" // to pick up the iterator debug level madness #include <vector> #include <stdint.h> diff --git a/thirdparty/basis_universal/basisu_backend.cpp b/thirdparty/basis_universal/encoder/basisu_backend.cpp index 3a689e58d7..19911fcbb4 100644 --- a/thirdparty/basis_universal/basisu_backend.cpp +++ b/thirdparty/basis_universal/encoder/basisu_backend.cpp @@ -1,5 +1,5 @@ // basisu_backend.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,6 +17,11 @@ // #include "basisu_backend.h" +#if BASISU_SUPPORT_SSE +#define CPPSPMD_NAME(a) a##_sse41 +#include "basisu_kernels_declares.h" +#endif + #define BASISU_FASTER_SELECTOR_REORDERING 0 #define BASISU_BACKEND_VERIFY(c) verify(c, __LINE__); @@ -176,64 +181,117 @@ namespace basisu void basisu_backend::reoptimize_and_sort_endpoints_codebook(uint32_t total_block_endpoints_remapped, uint_vec& all_endpoint_indices) { basisu_frontend& r = *m_pFront_end; - const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; + //const bool is_video = r.get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; - if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 0)) + if (m_params.m_used_global_codebooks) { - // We're changed the block endpoint indices, so we need to go and adjust the endpoint codebook (remove unused entries, optimize existing entries that have changed) - uint_vec new_block_endpoints(get_total_blocks()); - - for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + m_endpoint_remap_table_old_to_new.clear(); + m_endpoint_remap_table_old_to_new.resize(r.get_total_endpoint_clusters()); + for (uint32_t i = 0; i < r.get_total_endpoint_clusters(); i++) + m_endpoint_remap_table_old_to_new[i] = i; + } + else + { + //if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 0)) + if ((total_block_endpoints_remapped) && (m_params.m_compression_level > 1)) { - const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; - const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; - const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + // We've changed the block endpoint indices, so we need to go and adjust the endpoint codebook (remove unused entries, optimize existing entries that have changed) + uint_vec new_block_endpoints(get_total_blocks()); - for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) - for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) - new_block_endpoints[first_block_index + block_x + block_y * num_blocks_x] = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index; - } + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; - int_vec old_to_new_endpoint_indices; - r.reoptimize_remapped_endpoints(new_block_endpoints, old_to_new_endpoint_indices, true); + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + new_block_endpoints[first_block_index + block_x + block_y * num_blocks_x] = m_slice_encoder_blocks[slice_index](block_x, block_y).m_endpoint_index; + } - create_endpoint_palette(); + int_vec old_to_new_endpoint_indices; + r.reoptimize_remapped_endpoints(new_block_endpoints, old_to_new_endpoint_indices, true); - for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) - { - const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + create_endpoint_palette(); - const uint32_t width = m_slices[slice_index].m_width; - const uint32_t height = m_slices[slice_index].m_height; - const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; - const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; - - for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { - for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; + const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) { - const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; - encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); - m.m_endpoint_index = old_to_new_endpoint_indices[m.m_endpoint_index]; - } // block_x - } // block_y - } // slice_index + m.m_endpoint_index = old_to_new_endpoint_indices[m.m_endpoint_index]; + } // block_x + } // block_y + } // slice_index + + for (uint32_t i = 0; i < all_endpoint_indices.size(); i++) + all_endpoint_indices[i] = old_to_new_endpoint_indices[all_endpoint_indices[i]]; + + } //if (total_block_endpoints_remapped) + + // Sort endpoint codebook + palette_index_reorderer reorderer; + reorderer.init((uint32_t)all_endpoint_indices.size(), &all_endpoint_indices[0], r.get_total_endpoint_clusters(), nullptr, nullptr, 0); + m_endpoint_remap_table_old_to_new = reorderer.get_remap_table(); + } - for (uint32_t i = 0; i < all_endpoint_indices.size(); i++) - all_endpoint_indices[i] = old_to_new_endpoint_indices[all_endpoint_indices[i]]; + // For endpoints, old_to_new[] may not be bijective! + // Some "old" entries may be unused and don't get remapped into the "new" array. - } //if (total_block_endpoints_remapped) + m_old_endpoint_was_used.clear(); + m_old_endpoint_was_used.resize(r.get_total_endpoint_clusters()); + uint32_t first_old_entry_index = UINT32_MAX; - // Sort endpoint codebook - palette_index_reorderer reorderer; - reorderer.init((uint32_t)all_endpoint_indices.size(), &all_endpoint_indices[0], r.get_total_endpoint_clusters(), nullptr, nullptr, 0); - m_endpoint_remap_table_old_to_new = reorderer.get_remap_table(); + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y; + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + const uint32_t old_endpoint_index = m.m_endpoint_index; + m_old_endpoint_was_used[old_endpoint_index] = true; + first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index); + } // block_x + } // block_y + } // slice_index + + debug_printf("basisu_backend::reoptimize_and_sort_endpoints_codebook: First old entry index: %u\n", first_old_entry_index); + + m_new_endpoint_was_used.clear(); + m_new_endpoint_was_used.resize(r.get_total_endpoint_clusters()); + + m_endpoint_remap_table_new_to_old.clear(); m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters()); - for (uint32_t i = 0; i < m_endpoint_remap_table_old_to_new.size(); i++) - m_endpoint_remap_table_new_to_old[m_endpoint_remap_table_old_to_new[i]] = i; + + // Set unused entries in the new array to point to the first used entry in the old array. + m_endpoint_remap_table_new_to_old.set_all(first_old_entry_index); + + for (uint32_t old_index = 0; old_index < m_endpoint_remap_table_old_to_new.size(); old_index++) + { + if (m_old_endpoint_was_used[old_index]) + { + const uint32_t new_index = m_endpoint_remap_table_old_to_new[old_index]; + + m_new_endpoint_was_used[new_index] = true; + + m_endpoint_remap_table_new_to_old[new_index] = old_index; + } + } } void basisu_backend::sort_selector_codebook() @@ -242,7 +300,7 @@ namespace basisu m_selector_remap_table_new_to_old.resize(r.get_total_selector_clusters()); - if (m_params.m_compression_level == 0) + if ((m_params.m_compression_level == 0) || (m_params.m_used_global_codebooks)) { for (uint32_t i = 0; i < r.get_total_selector_clusters(); i++) m_selector_remap_table_new_to_old[i] = i; @@ -336,10 +394,10 @@ namespace basisu for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { const bool is_iframe = m_slices[slice_index].m_iframe; - const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; - const uint32_t width = m_slices[slice_index].m_width; - const uint32_t height = m_slices[slice_index].m_height; + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; const int prev_frame_slice_index = find_video_frame(slice_index, -1); @@ -393,6 +451,7 @@ namespace basisu BASISU_BACKEND_VERIFY(total_invalid_crs == 0); } + void basisu_backend::create_encoder_blocks() { basisu_frontend& r = *m_pFront_end; @@ -411,8 +470,8 @@ namespace basisu const bool is_iframe = m_slices[slice_index].m_iframe; const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; - const uint32_t width = m_slices[slice_index].m_width; - const uint32_t height = m_slices[slice_index].m_height; + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; @@ -590,7 +649,7 @@ namespace basisu { for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { - const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; + //const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; const uint32_t width = m_slices[slice_index].m_width; const uint32_t height = m_slices[slice_index].m_height; const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; @@ -603,7 +662,7 @@ namespace basisu { for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) { - const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); @@ -662,7 +721,7 @@ namespace basisu histogram selector_histogram(r.get_total_selector_clusters() + basist::MAX_SELECTOR_HISTORY_BUF_SIZE + 1); histogram selector_history_buf_rle_histogram(1 << basist::SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); - std::vector<uint_vec> selector_syms(m_slices.size()); + basisu::vector<uint_vec> selector_syms(m_slices.size()); const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = r.get_total_selector_clusters(); const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX + basist::MAX_SELECTOR_HISTORY_BUF_SIZE; @@ -672,7 +731,7 @@ namespace basisu histogram delta_endpoint_histogram(r.get_total_endpoint_clusters()); histogram endpoint_pred_histogram(basist::ENDPOINT_PRED_TOTAL_SYMBOLS); - std::vector<uint_vec> endpoint_pred_syms(m_slices.size()); + basisu::vector<uint_vec> endpoint_pred_syms(m_slices.size()); uint32_t total_endpoint_indices_remapped = 0; @@ -680,11 +739,11 @@ namespace basisu for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { - const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1; - const int next_frame_slice_index = is_video ? find_video_frame(slice_index, 1) : -1; + //const int prev_frame_slice_index = is_video ? find_video_frame(slice_index, -1) : -1; + //const int next_frame_slice_index = is_video ? find_video_frame(slice_index, 1) : -1; const uint32_t first_block_index = m_slices[slice_index].m_first_block_index; - const uint32_t width = m_slices[slice_index].m_width; - const uint32_t height = m_slices[slice_index].m_height; + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; @@ -702,7 +761,7 @@ namespace basisu { for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) { - const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; + //const uint32_t block_index = first_block_index + block_x + block_y * num_blocks_x; encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); @@ -723,6 +782,7 @@ namespace basisu } // block_x } // block_y + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) { for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) @@ -821,6 +881,10 @@ namespace basisu if (trial_idx == new_endpoint_index) continue; + // Skip it if this new endpoint palette entry is actually never used. + if (!m_new_endpoint_was_used[trial_idx]) + continue; + const etc1_endpoint_palette_entry& p = m_endpoint_palette[m_endpoint_remap_table_new_to_old[trial_idx]]; trial_etc_blk.set_block_color5_etc1s(p.m_color5); trial_etc_blk.set_inten_tables_etc1s(p.m_inten5); @@ -884,23 +948,32 @@ namespace basisu { const pixel_block& src_pixels = r.get_source_pixel_block(block_index); - etc_block etc_blk(r.get_output_block(block_index)); + const etc_block& etc_blk = r.get_output_block(block_index); color_rgba etc_blk_unpacked[16]; unpack_etc1(etc_blk, etc_blk_unpacked); uint64_t cur_err = 0; - for (uint32_t p = 0; p < 16; p++) - cur_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); - + if (r.get_params().m_perceptual) + { + for (uint32_t p = 0; p < 16; p++) + cur_err += color_distance(true, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); + } + else + { + for (uint32_t p = 0; p < 16; p++) + cur_err += color_distance(false, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); + } + uint64_t best_trial_err = UINT64_MAX; int best_trial_idx = 0; uint32_t best_trial_history_buf_idx = 0; - const float selector_remap_thresh = maximum(1.0f, m_params.m_selector_rdo_quality_thresh); //2.5f; const bool use_strict_search = (m_params.m_compression_level == 0) && (selector_remap_thresh == 1.0f); + const uint64_t limit_err = (uint64_t)ceilf(cur_err * selector_remap_thresh); + for (uint32_t j = 0; j < selector_history_buf.size(); j++) { const int trial_idx = selector_history_buf[j]; @@ -917,31 +990,43 @@ namespace basisu } else { - for (uint32_t sy = 0; sy < 4; sy++) - for (uint32_t sx = 0; sx < 4; sx++) - etc_blk.set_selector(sx, sy, m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]](sx, sy)); + uint64_t trial_err = 0; + const uint64_t thresh_err = minimum(limit_err, best_trial_err); - // TODO: Optimize this - unpack_etc1(etc_blk, etc_blk_unpacked); + color_rgba block_colors[4]; + etc_blk.get_block_colors(block_colors, 0); - uint64_t trial_err = 0; - const uint64_t thresh_err = minimum((uint64_t)ceilf(cur_err * selector_remap_thresh), best_trial_err); - for (uint32_t p = 0; p < 16; p++) + const uint8_t* pSelectors = &m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]](0, 0); + + if (r.get_params().m_perceptual) { - trial_err += color_distance(r.get_params().m_perceptual, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); - if (trial_err > thresh_err) - break; + for (uint32_t p = 0; p < 16; p++) + { + uint32_t sel = pSelectors[p]; + trial_err += color_distance(true, src_pixels.get_ptr()[p], block_colors[sel], false); + if (trial_err > thresh_err) + break; + } } - - if (trial_err <= cur_err * selector_remap_thresh) + else { - if (trial_err < best_trial_err) + for (uint32_t p = 0; p < 16; p++) { - best_trial_err = trial_err; - best_trial_idx = trial_idx; - best_trial_history_buf_idx = j; + uint32_t sel = pSelectors[p]; + trial_err += color_distance(false, src_pixels.get_ptr()[p], block_colors[sel], false); + if (trial_err > thresh_err) + break; } } + + if ((trial_err < best_trial_err) && (trial_err <= thresh_err)) + { + assert(trial_err <= limit_err); + + best_trial_err = trial_err; + best_trial_idx = trial_idx; + best_trial_history_buf_idx = j; + } } } @@ -1086,7 +1171,8 @@ namespace basisu total_selector_indices_remapped, total_selector_indices_remapped * 100.0f / get_total_blocks(), total_used_selector_history_buf, total_used_selector_history_buf * 100.0f / get_total_blocks()); - if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 0)) + //if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 0)) + if ((total_endpoint_indices_remapped) && (m_params.m_compression_level > 1) && (!m_params.m_used_global_codebooks)) { int_vec unused; r.reoptimize_remapped_endpoints(block_endpoint_indices, unused, false, &block_selector_indices); @@ -1168,8 +1254,8 @@ namespace basisu for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) { - const uint32_t width = m_slices[slice_index].m_width; - const uint32_t height = m_slices[slice_index].m_height; + //const uint32_t width = m_slices[slice_index].m_width; + //const uint32_t height = m_slices[slice_index].m_height; const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x; const uint32_t num_blocks_y = m_slices[slice_index].m_num_blocks_y; @@ -1296,10 +1382,53 @@ namespace basisu { const basisu_frontend& r = *m_pFront_end; + // The endpoint indices may have been changed by the backend's RDO step, so go and figure out which ones are actually used again. + bool_vec old_endpoint_was_used(r.get_total_endpoint_clusters()); + uint32_t first_old_entry_index = UINT32_MAX; + + for (uint32_t slice_index = 0; slice_index < m_slices.size(); slice_index++) + { + const uint32_t num_blocks_x = m_slices[slice_index].m_num_blocks_x, num_blocks_y = m_slices[slice_index].m_num_blocks_y; + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + { + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + { + encoder_block& m = m_slice_encoder_blocks[slice_index](block_x, block_y); + const uint32_t old_endpoint_index = m.m_endpoint_index; + + old_endpoint_was_used[old_endpoint_index] = true; + first_old_entry_index = basisu::minimum(first_old_entry_index, old_endpoint_index); + } // block_x + } // block_y + } // slice_index + + debug_printf("basisu_backend::encode_endpoint_palette: first_old_entry_index: %u\n", first_old_entry_index); + // Maps NEW to OLD endpoints - uint_vec endpoint_remap_table_inv(r.get_total_endpoint_clusters()); + uint_vec endpoint_remap_table_new_to_old(r.get_total_endpoint_clusters()); + endpoint_remap_table_new_to_old.set_all(first_old_entry_index); + + bool_vec new_endpoint_was_used(r.get_total_endpoint_clusters()); + for (uint32_t old_endpoint_index = 0; old_endpoint_index < m_endpoint_remap_table_old_to_new.size(); old_endpoint_index++) - endpoint_remap_table_inv[m_endpoint_remap_table_old_to_new[old_endpoint_index]] = old_endpoint_index; + { + if (old_endpoint_was_used[old_endpoint_index]) + { + const uint32_t new_endpoint_index = m_endpoint_remap_table_old_to_new[old_endpoint_index]; + + new_endpoint_was_used[new_endpoint_index] = true; + + endpoint_remap_table_new_to_old[new_endpoint_index] = old_endpoint_index; + } + } + + // TODO: Some new endpoint palette entries may actually be unused and aren't worth coding. Fix that. + + uint32_t total_unused_new_entries = 0; + for (uint32_t i = 0; i < new_endpoint_was_used.size(); i++) + if (!new_endpoint_was_used[i]) + total_unused_new_entries++; + debug_printf("basisu_backend::encode_endpoint_palette: total_unused_new_entries: %u out of %u\n", total_unused_new_entries, new_endpoint_was_used.size()); bool is_grayscale = true; for (uint32_t old_endpoint_index = 0; old_endpoint_index < (uint32_t)m_endpoint_palette.size(); old_endpoint_index++) @@ -1324,7 +1453,7 @@ namespace basisu for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++) { - const uint32_t old_endpoint_index = endpoint_remap_table_inv[new_endpoint_index]; + const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index]; int delta_inten = m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten; inten_delta_hist.inc(delta_inten & 7); @@ -1390,7 +1519,7 @@ namespace basisu for (uint32_t new_endpoint_index = 0; new_endpoint_index < r.get_total_endpoint_clusters(); new_endpoint_index++) { - const uint32_t old_endpoint_index = endpoint_remap_table_inv[new_endpoint_index]; + const uint32_t old_endpoint_index = endpoint_remap_table_new_to_old[new_endpoint_index]; int delta_inten = (m_endpoint_palette[old_endpoint_index].m_inten5 - prev_inten) & 7; coder.put_code(delta_inten, inten_delta_model); @@ -1644,9 +1773,11 @@ namespace basisu uint32_t basisu_backend::encode() { - const bool is_video = m_pFront_end->get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; + //const bool is_video = m_pFront_end->get_params().m_tex_type == basist::cBASISTexTypeVideoFrames; m_output.m_slice_desc = m_slices; m_output.m_etc1s = m_params.m_etc1s; + m_output.m_uses_global_codebooks = m_params.m_used_global_codebooks; + m_output.m_srgb = m_pFront_end->get_params().m_perceptual; create_endpoint_palette(); create_selector_palette(); diff --git a/thirdparty/basis_universal/basisu_backend.h b/thirdparty/basis_universal/encoder/basisu_backend.h index 1c72fa8cc8..393dccd22f 100644 --- a/thirdparty/basis_universal/basisu_backend.h +++ b/thirdparty/basis_universal/encoder/basisu_backend.h @@ -1,5 +1,5 @@ // basisu_backend.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,10 +14,10 @@ // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #include "basisu_enc.h" -#include "transcoder/basisu_transcoder_internal.h" -#include "transcoder/basisu_global_selector_palette.h" +#include "../transcoder/basisu_transcoder_internal.h" +#include "../transcoder/basisu_global_selector_palette.h" #include "basisu_frontend.h" namespace basisu @@ -49,7 +49,7 @@ namespace basisu } }; - typedef std::vector<encoder_block> encoder_block_vec; + typedef basisu::vector<encoder_block> encoder_block_vec; typedef vector2D<encoder_block> encoder_block_vec2D; struct etc1_endpoint_palette_entry @@ -69,7 +69,7 @@ namespace basisu } }; - typedef std::vector<etc1_endpoint_palette_entry> etc1_endpoint_palette_entry_vec; + typedef basisu::vector<etc1_endpoint_palette_entry> etc1_endpoint_palette_entry_vec; struct basisu_backend_params { @@ -84,6 +84,8 @@ namespace basisu uint32_t m_global_sel_codebook_mod_bits; bool m_use_hybrid_sel_codebooks; + bool m_used_global_codebooks; + basisu_backend_params() { clear(); @@ -102,6 +104,7 @@ namespace basisu m_global_sel_codebook_pal_bits = ETC1_GLOBAL_SELECTOR_CODEBOOK_MAX_PAL_BITS; m_global_sel_codebook_mod_bits = basist::etc1_global_palette_entry_modifier::cTotalBits; m_use_hybrid_sel_codebooks = false; + m_used_global_codebooks = false; } }; @@ -111,10 +114,12 @@ namespace basisu { clear(); } + void clear() { clear_obj(*this); } + uint32_t m_first_block_index; uint32_t m_orig_width; @@ -135,11 +140,15 @@ namespace basisu bool m_iframe; }; - typedef std::vector<basisu_backend_slice_desc> basisu_backend_slice_desc_vec; + typedef basisu::vector<basisu_backend_slice_desc> basisu_backend_slice_desc_vec; struct basisu_backend_output { + basist::basis_tex_format m_tex_format; + bool m_etc1s; + bool m_uses_global_codebooks; + bool m_srgb; uint32_t m_num_endpoints; uint32_t m_num_selectors; @@ -150,7 +159,7 @@ namespace basisu basisu_backend_slice_desc_vec m_slice_desc; uint8_vec m_slice_image_tables; - std::vector<uint8_vec> m_slice_image_data; + basisu::vector<uint8_vec> m_slice_image_data; uint16_vec m_slice_image_crcs; basisu_backend_output() @@ -160,7 +169,10 @@ namespace basisu void clear() { + m_tex_format = basist::basis_tex_format::cETC1S; m_etc1s = false; + m_uses_global_codebooks = false; + m_srgb = true; m_num_endpoints = 0; m_num_selectors = 0; @@ -198,6 +210,7 @@ namespace basisu uint32_t encode(); const basisu_backend_output &get_output() const { return m_output; } + const basisu_backend_params& get_params() const { return m_params; } private: basisu_frontend *m_pFront_end; @@ -216,15 +229,17 @@ namespace basisu bool m_was_used; }; - typedef std::vector<etc1_global_selector_cb_entry_desc> etc1_global_selector_cb_entry_desc_vec; + typedef basisu::vector<etc1_global_selector_cb_entry_desc> etc1_global_selector_cb_entry_desc_vec; etc1_global_selector_cb_entry_desc_vec m_global_selector_palette_desc; - std::vector<encoder_block_vec2D> m_slice_encoder_blocks; + basisu::vector<encoder_block_vec2D> m_slice_encoder_blocks; // Maps OLD to NEW endpoint/selector indices uint_vec m_endpoint_remap_table_old_to_new; uint_vec m_endpoint_remap_table_new_to_old; + bool_vec m_old_endpoint_was_used; + bool_vec m_new_endpoint_was_used; uint_vec m_selector_remap_table_old_to_new; diff --git a/thirdparty/basis_universal/basisu_basis_file.cpp b/thirdparty/basis_universal/encoder/basisu_basis_file.cpp index 3e6b1906b9..f4c77bef23 100644 --- a/thirdparty/basis_universal/basisu_basis_file.cpp +++ b/thirdparty/basis_universal/encoder/basisu_basis_file.cpp @@ -1,5 +1,5 @@ // basisu_basis_file.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "basisu_basis_file.h" -#include "transcoder/basisu_transcoder.h" +#include "../transcoder/basisu_transcoder.h" // The output file version. Keep in sync with BASISD_SUPPORTED_BASIS_VERSION. #define BASIS_FILE_VERSION (0x13) @@ -31,15 +31,26 @@ namespace basisu m_header.m_total_images = 0; for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) m_header.m_total_images = maximum<uint32_t>(m_header.m_total_images, encoder_output.m_slice_desc[i].m_source_file_index + 1); - - m_header.m_format = 0;// basist::block_format::cETC1; + + m_header.m_tex_format = (int)encoder_output.m_tex_format; m_header.m_flags = 0; if (encoder_output.m_etc1s) + { + assert(encoder_output.m_tex_format == basist::basis_tex_format::cETC1S); m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagETC1S; + } + else + { + assert(encoder_output.m_tex_format != basist::basis_tex_format::cETC1S); + } if (y_flipped) m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagYFlipped; + if (encoder_output.m_uses_global_codebooks) + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagUsesGlobalCodebook; + if (encoder_output.m_srgb) + m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagSRGB; for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) { @@ -57,12 +68,26 @@ namespace basisu m_header.m_userdata1 = userdata1; m_header.m_total_endpoints = encoder_output.m_num_endpoints; - m_header.m_endpoint_cb_file_ofs = m_endpoint_cb_file_ofs; - m_header.m_endpoint_cb_file_size = (uint32_t)encoder_output.m_endpoint_palette.size(); + if (!encoder_output.m_uses_global_codebooks) + { + m_header.m_endpoint_cb_file_ofs = m_endpoint_cb_file_ofs; + m_header.m_endpoint_cb_file_size = (uint32_t)encoder_output.m_endpoint_palette.size(); + } + else + { + assert(!m_endpoint_cb_file_ofs); + } m_header.m_total_selectors = encoder_output.m_num_selectors; - m_header.m_selector_cb_file_ofs = m_selector_cb_file_ofs; - m_header.m_selector_cb_file_size = (uint32_t)encoder_output.m_selector_palette.size(); + if (!encoder_output.m_uses_global_codebooks) + { + m_header.m_selector_cb_file_ofs = m_selector_cb_file_ofs; + m_header.m_selector_cb_file_size = (uint32_t)encoder_output.m_selector_palette.size(); + } + else + { + assert(!m_selector_cb_file_ofs); + } m_header.m_tables_file_ofs = m_tables_file_ofs; m_header.m_tables_file_size = (uint32_t)encoder_output.m_slice_image_tables.size(); @@ -85,7 +110,7 @@ namespace basisu m_images_descs[i].m_level_index = slice_descs[i].m_mip_index; if (slice_descs[i].m_alpha) - m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsIsAlphaData; + m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsHasAlpha; if (slice_descs[i].m_iframe) m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsFrameIsIFrame; @@ -127,14 +152,26 @@ namespace basisu assert(m_comp_data.size() == m_slice_descs_file_ofs); append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&m_images_descs[0]), m_images_descs.size() * sizeof(m_images_descs[0])); - assert(m_comp_data.size() == m_endpoint_cb_file_ofs); - append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&encoder_output.m_endpoint_palette[0]), encoder_output.m_endpoint_palette.size()); + if (!encoder_output.m_uses_global_codebooks) + { + if (encoder_output.m_endpoint_palette.size()) + { + assert(m_comp_data.size() == m_endpoint_cb_file_ofs); + append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&encoder_output.m_endpoint_palette[0]), encoder_output.m_endpoint_palette.size()); + } - assert(m_comp_data.size() == m_selector_cb_file_ofs); - append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&encoder_output.m_selector_palette[0]), encoder_output.m_selector_palette.size()); + if (encoder_output.m_selector_palette.size()) + { + assert(m_comp_data.size() == m_selector_cb_file_ofs); + append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&encoder_output.m_selector_palette[0]), encoder_output.m_selector_palette.size()); + } + } - assert(m_comp_data.size() == m_tables_file_ofs); - append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&encoder_output.m_slice_image_tables[0]), encoder_output.m_slice_image_tables.size()); + if (encoder_output.m_slice_image_tables.size()) + { + assert(m_comp_data.size() == m_tables_file_ofs); + append_vector(m_comp_data, reinterpret_cast<const uint8_t*>(&encoder_output.m_slice_image_tables[0]), encoder_output.m_slice_image_tables.size()); + } assert(m_comp_data.size() == m_first_image_file_ofs); for (uint32_t i = 0; i < slice_descs.size(); i++) @@ -163,8 +200,17 @@ namespace basisu const basisu_backend_slice_desc_vec &slice_descs = encoder_output.m_slice_desc; // The Basis file uses 32-bit fields for lots of stuff, so make sure it's not too large. - uint64_t check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() + + uint64_t check_size = 0; + if (!encoder_output.m_uses_global_codebooks) + { + check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() + (uint64_t)encoder_output.m_endpoint_palette.size() + (uint64_t)encoder_output.m_selector_palette.size() + (uint64_t)encoder_output.m_slice_image_tables.size(); + } + else + { + check_size = (uint64_t)sizeof(basist::basis_file_header) + (uint64_t)sizeof(basist::basis_slice_desc) * slice_descs.size() + + (uint64_t)encoder_output.m_slice_image_tables.size(); + } if (check_size >= 0xFFFF0000ULL) { error_printf("basisu_file::init: File is too large!\n"); @@ -173,10 +219,29 @@ namespace basisu m_header_file_ofs = 0; m_slice_descs_file_ofs = sizeof(basist::basis_file_header); - m_endpoint_cb_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); - m_selector_cb_file_ofs = m_endpoint_cb_file_ofs + (uint32_t)encoder_output.m_endpoint_palette.size(); - m_tables_file_ofs = m_selector_cb_file_ofs + (uint32_t)encoder_output.m_selector_palette.size(); - m_first_image_file_ofs = m_tables_file_ofs + (uint32_t)encoder_output.m_slice_image_tables.size(); + if (encoder_output.m_tex_format == basist::basis_tex_format::cETC1S) + { + if (encoder_output.m_uses_global_codebooks) + { + m_endpoint_cb_file_ofs = 0; + m_selector_cb_file_ofs = 0; + m_tables_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); + } + else + { + m_endpoint_cb_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); + m_selector_cb_file_ofs = m_endpoint_cb_file_ofs + (uint32_t)encoder_output.m_endpoint_palette.size(); + m_tables_file_ofs = m_selector_cb_file_ofs + (uint32_t)encoder_output.m_selector_palette.size(); + } + m_first_image_file_ofs = m_tables_file_ofs + (uint32_t)encoder_output.m_slice_image_tables.size(); + } + else + { + m_endpoint_cb_file_ofs = 0; + m_selector_cb_file_ofs = 0; + m_tables_file_ofs = 0; + m_first_image_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); + } uint64_t total_file_size = m_first_image_file_ofs; for (uint32_t i = 0; i < encoder_output.m_slice_image_data.size(); i++) diff --git a/thirdparty/basis_universal/basisu_basis_file.h b/thirdparty/basis_universal/encoder/basisu_basis_file.h index df3abbdcfd..98498a0121 100644 --- a/thirdparty/basis_universal/basisu_basis_file.h +++ b/thirdparty/basis_universal/encoder/basisu_basis_file.h @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "transcoder/basisu_file_headers.h" +#include "../transcoder/basisu_file_headers.h" #include "basisu_backend.h" namespace basisu @@ -49,7 +49,7 @@ namespace basisu private: basist::basis_file_header m_header; - std::vector<basist::basis_slice_desc> m_images_descs; + basisu::vector<basist::basis_slice_desc> m_images_descs; uint8_vec m_comp_data; diff --git a/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp b/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp new file mode 100644 index 0000000000..06aa7eb8b1 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_bc7enc.cpp @@ -0,0 +1,1984 @@ +// File: basisu_bc7enc.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_bc7enc.h" + +#ifdef _DEBUG +#define BC7ENC_CHECK_OVERALL_ERROR 1 +#else +#define BC7ENC_CHECK_OVERALL_ERROR 0 +#endif + +using namespace basist; + +namespace basisu +{ + +// Helpers +static inline color_quad_u8 *color_quad_u8_set_clamped(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->m_c[0] = (uint8_t)clampi(r, 0, 255); pRes->m_c[1] = (uint8_t)clampi(g, 0, 255); pRes->m_c[2] = (uint8_t)clampi(b, 0, 255); pRes->m_c[3] = (uint8_t)clampi(a, 0, 255); return pRes; } +static inline color_quad_u8 *color_quad_u8_set(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->m_c[0] = (uint8_t)r; pRes->m_c[1] = (uint8_t)g; pRes->m_c[2] = (uint8_t)b; pRes->m_c[3] = (uint8_t)a; return pRes; } +static inline bc7enc_bool color_quad_u8_notequals(const color_quad_u8 *pLHS, const color_quad_u8 *pRHS) { return (pLHS->m_c[0] != pRHS->m_c[0]) || (pLHS->m_c[1] != pRHS->m_c[1]) || (pLHS->m_c[2] != pRHS->m_c[2]) || (pLHS->m_c[3] != pRHS->m_c[3]); } +static inline bc7enc_vec4F*vec4F_set_scalar(bc7enc_vec4F*pV, float x) { pV->m_c[0] = x; pV->m_c[1] = x; pV->m_c[2] = x; pV->m_c[3] = x; return pV; } +static inline bc7enc_vec4F*vec4F_set(bc7enc_vec4F*pV, float x, float y, float z, float w) { pV->m_c[0] = x; pV->m_c[1] = y; pV->m_c[2] = z; pV->m_c[3] = w; return pV; } +static inline bc7enc_vec4F*vec4F_saturate_in_place(bc7enc_vec4F*pV) { pV->m_c[0] = saturate(pV->m_c[0]); pV->m_c[1] = saturate(pV->m_c[1]); pV->m_c[2] = saturate(pV->m_c[2]); pV->m_c[3] = saturate(pV->m_c[3]); return pV; } +static inline bc7enc_vec4F vec4F_saturate(const bc7enc_vec4F*pV) { bc7enc_vec4F res; res.m_c[0] = saturate(pV->m_c[0]); res.m_c[1] = saturate(pV->m_c[1]); res.m_c[2] = saturate(pV->m_c[2]); res.m_c[3] = saturate(pV->m_c[3]); return res; } +static inline bc7enc_vec4F vec4F_from_color(const color_quad_u8 *pC) { bc7enc_vec4F res; vec4F_set(&res, pC->m_c[0], pC->m_c[1], pC->m_c[2], pC->m_c[3]); return res; } +static inline bc7enc_vec4F vec4F_add(const bc7enc_vec4F*pLHS, const bc7enc_vec4F*pRHS) { bc7enc_vec4F res; vec4F_set(&res, pLHS->m_c[0] + pRHS->m_c[0], pLHS->m_c[1] + pRHS->m_c[1], pLHS->m_c[2] + pRHS->m_c[2], pLHS->m_c[3] + pRHS->m_c[3]); return res; } +static inline bc7enc_vec4F vec4F_sub(const bc7enc_vec4F*pLHS, const bc7enc_vec4F*pRHS) { bc7enc_vec4F res; vec4F_set(&res, pLHS->m_c[0] - pRHS->m_c[0], pLHS->m_c[1] - pRHS->m_c[1], pLHS->m_c[2] - pRHS->m_c[2], pLHS->m_c[3] - pRHS->m_c[3]); return res; } +static inline float vec4F_dot(const bc7enc_vec4F*pLHS, const bc7enc_vec4F*pRHS) { return pLHS->m_c[0] * pRHS->m_c[0] + pLHS->m_c[1] * pRHS->m_c[1] + pLHS->m_c[2] * pRHS->m_c[2] + pLHS->m_c[3] * pRHS->m_c[3]; } +static inline bc7enc_vec4F vec4F_mul(const bc7enc_vec4F*pLHS, float s) { bc7enc_vec4F res; vec4F_set(&res, pLHS->m_c[0] * s, pLHS->m_c[1] * s, pLHS->m_c[2] * s, pLHS->m_c[3] * s); return res; } +static inline bc7enc_vec4F* vec4F_normalize_in_place(bc7enc_vec4F*pV) { float s = pV->m_c[0] * pV->m_c[0] + pV->m_c[1] * pV->m_c[1] + pV->m_c[2] * pV->m_c[2] + pV->m_c[3] * pV->m_c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->m_c[0] *= s; pV->m_c[1] *= s; pV->m_c[2] *= s; pV->m_c[3] *= s; } return pV; } + +// Precomputed weight constants used during least fit determination. For each entry in g_bc7_weights[]: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w +const float g_bc7_weights1x[2 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_bc7_weights2x[4 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.107666f, 0.220459f, 0.451416f, 0.328125f, 0.451416f, 0.220459f, 0.107666f, 0.671875f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_bc7_weights3x[8 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.019775f, 0.120850f, 0.738525f, 0.140625f, 0.079102f, 0.202148f, 0.516602f, 0.281250f, 0.177979f, 0.243896f, 0.334229f, 0.421875f, 0.334229f, 0.243896f, 0.177979f, 0.578125f, 0.516602f, 0.202148f, + 0.079102f, 0.718750f, 0.738525f, 0.120850f, 0.019775f, 0.859375f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_bc7_weights4x[16 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.003906f, 0.058594f, 0.878906f, 0.062500f, 0.019775f, 0.120850f, 0.738525f, 0.140625f, 0.041260f, 0.161865f, 0.635010f, 0.203125f, 0.070557f, 0.195068f, 0.539307f, 0.265625f, 0.107666f, 0.220459f, + 0.451416f, 0.328125f, 0.165039f, 0.241211f, 0.352539f, 0.406250f, 0.219727f, 0.249023f, 0.282227f, 0.468750f, 0.282227f, 0.249023f, 0.219727f, 0.531250f, 0.352539f, 0.241211f, 0.165039f, 0.593750f, 0.451416f, 0.220459f, 0.107666f, 0.671875f, 0.539307f, 0.195068f, 0.070557f, 0.734375f, + 0.635010f, 0.161865f, 0.041260f, 0.796875f, 0.738525f, 0.120850f, 0.019775f, 0.859375f, 0.878906f, 0.058594f, 0.003906f, 0.937500f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_astc_weights4x[16 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.003906f, 0.058594f, 0.878906f, 0.062500f, 0.015625f, 0.109375f, 0.765625f, 0.125000f, 0.035156f, 0.152344f, 0.660156f, 0.187500f, 0.070557f, 0.195068f, 0.539307f, 0.265625f, 0.107666f, 0.220459f, + 0.451416f, 0.328125f, 0.152588f, 0.238037f, 0.371338f, 0.390625f, 0.205322f, 0.247803f, 0.299072f, 0.453125f, 0.299072f, 0.247803f, 0.205322f, 0.546875f, 0.371338f, 0.238037f, 0.152588f, 0.609375f, 0.451416f, 0.220459f, 0.107666f, 0.671875f, 0.539307f, 0.195068f, 0.070557f, 0.734375f, + 0.660156f, 0.152344f, 0.035156f, 0.812500f, 0.765625f, 0.109375f, 0.015625f, 0.875000f, 0.878906f, 0.058594f, 0.003906f, 0.937500f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_astc_weights5x[32 * 4] = { 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000977f, 0.030273f, 0.938477f, 0.031250f, 0.003906f, 0.058594f, 0.878906f, 0.062500f, 0.008789f, 0.084961f, 0.821289f, + 0.093750f, 0.015625f, 0.109375f, 0.765625f, 0.125000f, 0.024414f, 0.131836f, 0.711914f, 0.156250f, 0.035156f, 0.152344f, 0.660156f, 0.187500f, 0.047852f, 0.170898f, 0.610352f, 0.218750f, 0.062500f, 0.187500f, + 0.562500f, 0.250000f, 0.079102f, 0.202148f, 0.516602f, 0.281250f, 0.097656f, 0.214844f, 0.472656f, 0.312500f, 0.118164f, 0.225586f, 0.430664f, 0.343750f, 0.140625f, 0.234375f, 0.390625f, 0.375000f, 0.165039f, + 0.241211f, 0.352539f, 0.406250f, 0.191406f, 0.246094f, 0.316406f, 0.437500f, 0.219727f, 0.249023f, 0.282227f, 0.468750f, 0.282227f, 0.249023f, 0.219727f, 0.531250f, 0.316406f, 0.246094f, 0.191406f, 0.562500f, + 0.352539f, 0.241211f, 0.165039f, 0.593750f, 0.390625f, 0.234375f, 0.140625f, 0.625000f, 0.430664f, 0.225586f, 0.118164f, 0.656250f, 0.472656f, 0.214844f, 0.097656f, 0.687500f, 0.516602f, 0.202148f, 0.079102f, + 0.718750f, 0.562500f, 0.187500f, 0.062500f, 0.750000f, 0.610352f, 0.170898f, 0.047852f, 0.781250f, 0.660156f, 0.152344f, 0.035156f, 0.812500f, 0.711914f, 0.131836f, 0.024414f, 0.843750f, 0.765625f, 0.109375f, + 0.015625f, 0.875000f, 0.821289f, 0.084961f, 0.008789f, 0.906250f, 0.878906f, 0.058594f, 0.003906f, 0.937500f, 0.938477f, 0.030273f, 0.000977f, 0.968750f, 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +const float g_astc_weights_3levelsx[3 * 4] = { + 0.000000f, 0.000000f, 1.000000f, 0.000000f, + .5f * .5f, (1.0f - .5f) * .5f, (1.0f - .5f) * (1.0f - .5f), .5f, + 1.000000f, 0.000000f, 0.000000f, 1.000000f }; + +static endpoint_err g_bc7_mode_1_optimal_endpoints[256][2]; // [c][pbit] +static const uint32_t BC7ENC_MODE_1_OPTIMAL_INDEX = 2; + +static endpoint_err g_astc_4bit_3bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX = 2; + +static endpoint_err g_astc_4bit_2bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX = 1; + +static endpoint_err g_astc_range7_2bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX = 1; + +static endpoint_err g_astc_range13_4bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX = 2; + +static endpoint_err g_astc_range13_2bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX = 1; + +static endpoint_err g_astc_range11_5bit_optimal_endpoints[256]; // [c] +static const uint32_t BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX = 13; // not 1, which is optimal, because 26 losslessly maps to BC7 4-bit weights + +astc_quant_bin g_astc_sorted_order_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [sorted unquantized order] + +static uint8_t g_astc_nearest_sorted_index[BC7ENC_TOTAL_ASTC_RANGES][256]; + +static void astc_init() +{ + for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++) + { + if (!astc_is_valid_endpoint_range(range)) + continue; + + const uint32_t levels = astc_get_levels(range); + + uint32_t vals[256]; + // TODO + for (uint32_t i = 0; i < levels; i++) + vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i; + + std::sort(vals, vals + levels); + + for (uint32_t i = 0; i < levels; i++) + { + uint32_t order = vals[i] & 0xFF; + uint32_t unq = vals[i] >> 8; + + g_astc_sorted_order_unquant[range][i].m_unquant = (uint8_t)unq; + g_astc_sorted_order_unquant[range][i].m_index = (uint8_t)order; + + } // i + +#if 0 + if (g_astc_bise_range_table[range][1] || g_astc_bise_range_table[range][2]) + { + printf("// Range: %u, Levels: %u, Bits: %u, Trits: %u, Quints: %u\n", range, levels, g_astc_bise_range_table[range][0], g_astc_bise_range_table[range][1], g_astc_bise_range_table[range][2]); + + printf("{"); + for (uint32_t i = 0; i < levels; i++) + { + printf("{%u,%u}", g_astc_sorted_order_unquant[range][i].m_index, g_astc_sorted_order_unquant[range][i].m_unquant); + if (i != (levels - 1)) + printf(","); + } + printf("}\n"); + } +#endif + +#if 0 + if (g_astc_bise_range_table[range][1] || g_astc_bise_range_table[range][2]) + { + printf("// Range: %u, Levels: %u, Bits: %u, Trits: %u, Quints: %u\n", range, levels, g_astc_bise_range_table[range][0], g_astc_bise_range_table[range][1], g_astc_bise_range_table[range][2]); + + printf("{"); + for (uint32_t i = 0; i < levels; i++) + { + printf("{%u,%u}", g_astc_unquant[range][i].m_index, g_astc_unquant[range][i].m_unquant); + if (i != (levels - 1)) + printf(","); + } + printf("}\n"); + } +#endif + + for (uint32_t i = 0; i < 256; i++) + { + uint32_t best_index = 0; + int best_err = INT32_MAX; + + for (uint32_t j = 0; j < levels; j++) + { + int err = g_astc_sorted_order_unquant[range][j].m_unquant - i; + if (err < 0) + err = -err; + if (err < best_err) + { + best_err = err; + best_index = j; + } + } + + g_astc_nearest_sorted_index[range][i] = (uint8_t)best_index; + } // i + } // range +} + +static inline uint32_t astc_interpolate(uint32_t l, uint32_t h, uint32_t w) +{ + // This is for linear values, not sRGB. + l = (l << 8) | l; + h = (h << 8) | h; + uint32_t k = (l * (64 - w) + h * w + 32) >> 6; + return k >> 8; +} + +// Initialize the lookup table used for optimal single color compression in mode 1. Must be called before encoding. +void bc7enc_compress_block_init() +{ + astc_init(); + + // BC7 666.1 + for (int c = 0; c < 256; c++) + { + for (uint32_t lp = 0; lp < 2; lp++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 64; l++) + { + uint32_t low = ((l << 1) | lp) << 1; + low |= (low >> 7); + for (uint32_t h = 0; h < 64; h++) + { + uint32_t high = ((h << 1) | lp) << 1; + high |= (high >> 7); + const int k = (low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6; + const int err = (k - c) * (k - c); + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + g_bc7_mode_1_optimal_endpoints[c][lp] = best; + } // lp + } // c + + // ASTC [0,15] 3-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 16; l++) + { + uint32_t low = (l << 4) | l; + + for (uint32_t h = 0; h < 16; h++) + { + uint32_t high = (h << 4) | h; + + const int k = astc_interpolate(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_4bit_3bit_optimal_endpoints[c] = best; + + } // c + + // ASTC [0,15] 2-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 16; l++) + { + uint32_t low = (l << 4) | l; + + for (uint32_t h = 0; h < 16; h++) + { + uint32_t high = (h << 4) | h; + + const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_4bit_2bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 7 [0,11] 2-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 12; l++) + { + uint32_t low = g_astc_sorted_order_unquant[7][l].m_unquant; + + for (uint32_t h = 0; h < 12; h++) + { + uint32_t high = g_astc_sorted_order_unquant[7][h].m_unquant; + + const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range7_2bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 13 [0,47] 4-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 48; l++) + { + uint32_t low = g_astc_sorted_order_unquant[13][l].m_unquant; + + for (uint32_t h = 0; h < 48; h++) + { + uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; + + const int k = astc_interpolate(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range13_4bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 13 [0,47] 2-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 48; l++) + { + uint32_t low = g_astc_sorted_order_unquant[13][l].m_unquant; + + for (uint32_t h = 0; h < 48; h++) + { + uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; + + const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range13_2bit_optimal_endpoints[c] = best; + + } // c + + // ASTC range 11 [0,31] 5-bit + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + for (uint32_t l = 0; l < 32; l++) + { + uint32_t low = g_astc_sorted_order_unquant[11][l].m_unquant; + + for (uint32_t h = 0; h < 32; h++) + { + uint32_t high = g_astc_sorted_order_unquant[11][h].m_unquant; + + const int k = astc_interpolate(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]); + const int err = (k - c) * (k - c); + + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_astc_range11_5bit_optimal_endpoints[c] = best; + + } // c +} + +static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F* pSelector_weights, bc7enc_vec4F* pXl, bc7enc_vec4F* pXh, const color_quad_u8 *pColors) +{ + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + double q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + double q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + double q00_a = 0.0f, q10_a = 0.0f, t_a = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; + float w = pSelector_weights[sel].m_c[3]; + q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0]; + q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1]; + q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2]; + q00_a += w * pColors[i].m_c[3]; t_a += pColors[i].m_c[3]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + q10_a = t_a - q00_a; + + z01 = z10; + + double det = z00 * z11 - z01 * z10; + if (det != 0.0f) + det = 1.0f / det; + + double iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r); + pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g); + pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b); + pXl->m_c[3] = (float)(iz00 * q00_a + iz01 * q10_a); pXh->m_c[3] = (float)(iz10 * q00_a + iz11 * q10_a); + + for (uint32_t c = 0; c < 4; c++) + { + if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f)) + { + uint32_t lo_v = UINT32_MAX, hi_v = 0; + for (uint32_t i = 0; i < N; i++) + { + lo_v = minimumu(lo_v, pColors[i].m_c[c]); + hi_v = maximumu(hi_v, pColors[i].m_c[c]); + } + + if (lo_v == hi_v) + { + pXl->m_c[c] = (float)lo_v; + pXh->m_c[c] = (float)hi_v; + } + } + } +} + +static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F*pSelector_weights, bc7enc_vec4F*pXl, bc7enc_vec4F*pXh, const color_quad_u8 *pColors) +{ + double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; + double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; + double q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; + double q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; + + for (uint32_t i = 0; i < N; i++) + { + const uint32_t sel = pSelectors[i]; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; + float w = pSelector_weights[sel].m_c[3]; + q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0]; + q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1]; + q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2]; + } + + q10_r = t_r - q00_r; + q10_g = t_g - q00_g; + q10_b = t_b - q00_b; + + z01 = z10; + + double det = z00 * z11 - z01 * z10; + if (det != 0.0f) + det = 1.0f / det; + + double iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r); + pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g); + pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b); + pXl->m_c[3] = 255.0f; pXh->m_c[3] = 255.0f; + + for (uint32_t c = 0; c < 3; c++) + { + if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f)) + { + uint32_t lo_v = UINT32_MAX, hi_v = 0; + for (uint32_t i = 0; i < N; i++) + { + lo_v = minimumu(lo_v, pColors[i].m_c[c]); + hi_v = maximumu(hi_v, pColors[i].m_c[c]); + } + + if (lo_v == hi_v) + { + pXl->m_c[c] = (float)lo_v; + pXh->m_c[c] = (float)hi_v; + } + } + } +} + +static inline color_quad_u8 scale_color(const color_quad_u8* pC, const color_cell_compressor_params* pParams) +{ + color_quad_u8 results; + + if (pParams->m_astc_endpoint_range) + { + for (uint32_t i = 0; i < 4; i++) + { + results.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pC->m_c[i]].m_unquant; + } + } + else + { + const uint32_t n = pParams->m_comp_bits + (pParams->m_has_pbits ? 1 : 0); + assert((n >= 4) && (n <= 8)); + + for (uint32_t i = 0; i < 4; i++) + { + uint32_t v = pC->m_c[i] << (8 - n); + v |= (v >> n); + assert(v <= 255); + results.m_c[i] = (uint8_t)(v); + } + } + + return results; +} + +static inline uint64_t compute_color_distance_rgb(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4]) +{ + int dr, dg, db; + + if (perceptual) + { + const int l1 = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37; + const int cr1 = ((int)pE1->m_c[0] << 9) - l1; + const int cb1 = ((int)pE1->m_c[2] << 9) - l1; + const int l2 = pE2->m_c[0] * 109 + pE2->m_c[1] * 366 + pE2->m_c[2] * 37; + const int cr2 = ((int)pE2->m_c[0] << 9) - l2; + const int cb2 = ((int)pE2->m_c[2] << 9) - l2; + dr = (l1 - l2) >> 8; + dg = (cr1 - cr2) >> 8; + db = (cb1 - cb2) >> 8; + } + else + { + dr = (int)pE1->m_c[0] - (int)pE2->m_c[0]; + dg = (int)pE1->m_c[1] - (int)pE2->m_c[1]; + db = (int)pE1->m_c[2] - (int)pE2->m_c[2]; + } + + return weights[0] * (uint32_t)(dr * dr) + weights[1] * (uint32_t)(dg * dg) + weights[2] * (uint32_t)(db * db); +} + +static inline uint64_t compute_color_distance_rgba(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4]) +{ + int da = (int)pE1->m_c[3] - (int)pE2->m_c[3]; + return compute_color_distance_rgb(pE1, pE2, perceptual, weights) + (weights[3] * (uint32_t)(da * da)); +} + +static uint64_t pack_mode1_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + uint32_t best_err = UINT_MAX; + uint32_t best_p = 0; + + for (uint32_t p = 0; p < 2; p++) + { + uint32_t err = g_bc7_mode_1_optimal_endpoints[r][p].m_error + g_bc7_mode_1_optimal_endpoints[g][p].m_error + g_bc7_mode_1_optimal_endpoints[b][p].m_error; + if (err < best_err) + { + best_err = err; + best_p = p; + } + } + + const endpoint_err *pEr = &g_bc7_mode_1_optimal_endpoints[r][best_p]; + const endpoint_err *pEg = &g_bc7_mode_1_optimal_endpoints[g][best_p]; + const endpoint_err *pEb = &g_bc7_mode_1_optimal_endpoints[b][best_p]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 0); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 0); + pResults->m_pbits[0] = best_p; + pResults->m_pbits[1] = 0; + + memset(pSelectors, BC7ENC_MODE_1_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 3; i++) + { + uint32_t low = ((pResults->m_low_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1; + low |= (low >> 7); + + uint32_t high = ((pResults->m_high_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1; + high |= (high >> 7); + + p.m_c[i] = (uint8_t)((low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6); + } + p.m_c[3] = 255; + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_4bit_3bit_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + const endpoint_err *pEr = &g_astc_4bit_3bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_4bit_3bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_4bit_3bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 0); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 0); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 3; i++) + { + uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; + uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; + + p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); + } + p.m_c[3] = 255; + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_4bit_2bit_to_one_color_rgba(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint32_t a, uint8_t *pSelectors) +{ + const endpoint_err *pEr = &g_astc_4bit_2bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_4bit_2bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_4bit_2bit_optimal_endpoints[b]; + const endpoint_err *pEa = &g_astc_4bit_2bit_optimal_endpoints[a]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, pEa->m_lo); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, pEa->m_hi); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; + uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; + + p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); + } + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgba(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_range7_2bit_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + assert(pParams->m_astc_endpoint_range == 7 && pParams->m_num_selector_weights == 4); + + const endpoint_err *pEr = &g_astc_range7_2bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_range7_2bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_range7_2bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 0); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 0); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 3; i++) + { + uint32_t low = g_astc_sorted_order_unquant[7][pResults->m_low_endpoint.m_c[i]].m_unquant; + uint32_t high = g_astc_sorted_order_unquant[7][pResults->m_high_endpoint.m_c[i]].m_unquant; + + p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); + } + p.m_c[3] = 255; + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_range13_2bit_to_one_color(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t *pSelectors) +{ + assert(pParams->m_astc_endpoint_range == 13 && pParams->m_num_selector_weights == 4 && !pParams->m_has_alpha); + + const endpoint_err *pEr = &g_astc_range13_2bit_optimal_endpoints[r]; + const endpoint_err *pEg = &g_astc_range13_2bit_optimal_endpoints[g]; + const endpoint_err *pEb = &g_astc_range13_2bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 47); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 47); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t low = g_astc_sorted_order_unquant[13][pResults->m_low_endpoint.m_c[i]].m_unquant; + uint32_t high = g_astc_sorted_order_unquant[13][pResults->m_high_endpoint.m_c[i]].m_unquant; + + p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); + } + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t pack_astc_range11_5bit_to_one_color(const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, uint32_t r, uint32_t g, uint32_t b, uint8_t* pSelectors) +{ + assert(pParams->m_astc_endpoint_range == 11 && pParams->m_num_selector_weights == 32 && !pParams->m_has_alpha); + + const endpoint_err* pEr = &g_astc_range11_5bit_optimal_endpoints[r]; + const endpoint_err* pEg = &g_astc_range11_5bit_optimal_endpoints[g]; + const endpoint_err* pEb = &g_astc_range11_5bit_optimal_endpoints[b]; + + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 31); + color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 31); + pResults->m_pbits[0] = 0; + pResults->m_pbits[1] = 0; + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + + memset(pSelectors, BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX, pParams->m_num_pixels); + + color_quad_u8 p; + for (uint32_t i = 0; i < 4; i++) + { + uint32_t low = g_astc_sorted_order_unquant[11][pResults->m_low_endpoint.m_c[i]].m_unquant; + uint32_t high = g_astc_sorted_order_unquant[11][pResults->m_high_endpoint.m_c[i]].m_unquant; + + p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_astc_weights5[BC7ENC_ASTC_RANGE11_5BIT_OPTIMAL_INDEX]); + } + + uint64_t total_err = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + pResults->m_best_overall_err = total_err; + + return total_err; +} + +static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 *pHigh, const uint32_t pbits[2], const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +{ + color_quad_u8 quantMinColor = *pLow; + color_quad_u8 quantMaxColor = *pHigh; + + if (pParams->m_has_pbits) + { + uint32_t minPBit, maxPBit; + + if (pParams->m_endpoints_share_pbit) + maxPBit = minPBit = pbits[0]; + else + { + minPBit = pbits[0]; + maxPBit = pbits[1]; + } + + quantMinColor.m_c[0] = (uint8_t)((pLow->m_c[0] << 1) | minPBit); + quantMinColor.m_c[1] = (uint8_t)((pLow->m_c[1] << 1) | minPBit); + quantMinColor.m_c[2] = (uint8_t)((pLow->m_c[2] << 1) | minPBit); + quantMinColor.m_c[3] = (uint8_t)((pLow->m_c[3] << 1) | minPBit); + + quantMaxColor.m_c[0] = (uint8_t)((pHigh->m_c[0] << 1) | maxPBit); + quantMaxColor.m_c[1] = (uint8_t)((pHigh->m_c[1] << 1) | maxPBit); + quantMaxColor.m_c[2] = (uint8_t)((pHigh->m_c[2] << 1) | maxPBit); + quantMaxColor.m_c[3] = (uint8_t)((pHigh->m_c[3] << 1) | maxPBit); + } + + color_quad_u8 actualMinColor = scale_color(&quantMinColor, pParams); + color_quad_u8 actualMaxColor = scale_color(&quantMaxColor, pParams); + + const uint32_t N = pParams->m_num_selector_weights; + assert(N >= 1 && N <= 32); + + color_quad_u8 weightedColors[32]; + weightedColors[0] = actualMinColor; + weightedColors[N - 1] = actualMaxColor; + + const uint32_t nc = pParams->m_has_alpha ? 4 : 3; + if (pParams->m_astc_endpoint_range) + { + for (uint32_t i = 1; i < (N - 1); i++) + { + for (uint32_t j = 0; j < nc; j++) + weightedColors[i].m_c[j] = (uint8_t)(astc_interpolate(actualMinColor.m_c[j], actualMaxColor.m_c[j], pParams->m_pSelector_weights[i])); + } + } + else + { + for (uint32_t i = 1; i < (N - 1); i++) + for (uint32_t j = 0; j < nc; j++) + weightedColors[i].m_c[j] = (uint8_t)((actualMinColor.m_c[j] * (64 - pParams->m_pSelector_weights[i]) + actualMaxColor.m_c[j] * pParams->m_pSelector_weights[i] + 32) >> 6); + } + + const int lr = actualMinColor.m_c[0]; + const int lg = actualMinColor.m_c[1]; + const int lb = actualMinColor.m_c[2]; + const int dr = actualMaxColor.m_c[0] - lr; + const int dg = actualMaxColor.m_c[1] - lg; + const int db = actualMaxColor.m_c[2] - lb; + + uint64_t total_err = 0; + + if (pParams->m_pForce_selectors) + { + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8* pC = &pParams->m_pPixels[i]; + + const uint8_t sel = pParams->m_pForce_selectors[i]; + assert(sel < N); + + total_err += (pParams->m_has_alpha ? compute_color_distance_rgba : compute_color_distance_rgb)(&weightedColors[sel], pC, pParams->m_perceptual, pParams->m_weights); + + pResults->m_pSelectors_temp[i] = sel; + } + } + else if (!pParams->m_perceptual) + { + if (pParams->m_has_alpha) + { + const int la = actualMinColor.m_c[3]; + const int da = actualMaxColor.m_c[3] - la; + + const float f = N / (float)(squarei(dr) + squarei(dg) + squarei(db) + squarei(da) + .00000125f); + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8 *pC = &pParams->m_pPixels[i]; + int r = pC->m_c[0]; + int g = pC->m_c[1]; + int b = pC->m_c[2]; + int a = pC->m_c[3]; + + int best_sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db + (a - la) * da) * f + .5f); + best_sel = clampi(best_sel, 1, N - 1); + + uint64_t err0 = compute_color_distance_rgba(&weightedColors[best_sel - 1], pC, BC7ENC_FALSE, pParams->m_weights); + uint64_t err1 = compute_color_distance_rgba(&weightedColors[best_sel], pC, BC7ENC_FALSE, pParams->m_weights); + + if (err0 == err1) + { + // Prefer non-interpolation + if ((best_sel - 1) == 0) + best_sel = 0; + } + else if (err1 > err0) + { + err1 = err0; + --best_sel; + } + total_err += err1; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + else + { + const float f = N / (float)(squarei(dr) + squarei(dg) + squarei(db) + .00000125f); + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8 *pC = &pParams->m_pPixels[i]; + int r = pC->m_c[0]; + int g = pC->m_c[1]; + int b = pC->m_c[2]; + + int sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db) * f + .5f); + sel = clampi(sel, 1, N - 1); + + uint64_t err0 = compute_color_distance_rgb(&weightedColors[sel - 1], pC, BC7ENC_FALSE, pParams->m_weights); + uint64_t err1 = compute_color_distance_rgb(&weightedColors[sel], pC, BC7ENC_FALSE, pParams->m_weights); + + int best_sel = sel; + uint64_t best_err = err1; + if (err0 == err1) + { + // Prefer non-interpolation + if ((best_sel - 1) == 0) + best_sel = 0; + } + else if (err0 < best_err) + { + best_err = err0; + best_sel = sel - 1; + } + + total_err += best_err; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + } + else + { + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_sel = 0; + + if (pParams->m_has_alpha) + { + for (uint32_t j = 0; j < N; j++) + { + uint64_t err = compute_color_distance_rgba(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights); + if (err < best_err) + { + best_err = err; + best_sel = j; + } + // Prefer non-interpolation + else if ((err == best_err) && (j == (N - 1))) + best_sel = j; + } + } + else + { + for (uint32_t j = 0; j < N; j++) + { + uint64_t err = compute_color_distance_rgb(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights); + if (err < best_err) + { + best_err = err; + best_sel = j; + } + // Prefer non-interpolation + else if ((err == best_err) && (j == (N - 1))) + best_sel = j; + } + } + + total_err += best_err; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + + if (total_err < pResults->m_best_overall_err) + { + pResults->m_best_overall_err = total_err; + + pResults->m_low_endpoint = *pLow; + pResults->m_high_endpoint = *pHigh; + + pResults->m_pbits[0] = pbits[0]; + pResults->m_pbits[1] = pbits[1]; + + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + } + + return total_err; +} + +static bool areDegenerateEndpoints(color_quad_u8* pTrialMinColor, color_quad_u8* pTrialMaxColor, const bc7enc_vec4F* pXl, const bc7enc_vec4F* pXh) +{ + for (uint32_t i = 0; i < 3; i++) + { + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) + { + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.0f) + return true; + } + } + + return false; +} + +static void fixDegenerateEndpoints(uint32_t mode, color_quad_u8 *pTrialMinColor, color_quad_u8 *pTrialMaxColor, const bc7enc_vec4F*pXl, const bc7enc_vec4F*pXh, uint32_t iscale, int flags) +{ + if (mode == 255) + { + for (uint32_t i = 0; i < 3; i++) + { + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) + { + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.000125f) + { + if (flags & 1) + { + if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; + } + if (flags & 2) + { + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + } + } + } + } + } + else if (mode == 1) + { + // fix degenerate case where the input collapses to a single colorspace voxel, and we loose all freedom (test with grayscale ramps) + for (uint32_t i = 0; i < 3; i++) + { + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) + { + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.000125f) + { + if (pTrialMinColor->m_c[i] > (iscale >> 1)) + { + if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; + else + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + } + else + { + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + else if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; + } + } + } + } + } +} + +static uint64_t find_optimal_solution(uint32_t mode, bc7enc_vec4F xl, bc7enc_vec4F xh, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +{ + vec4F_saturate_in_place(&xl); vec4F_saturate_in_place(&xh); + + if (pParams->m_astc_endpoint_range) + { + const uint32_t levels = astc_get_levels(pParams->m_astc_endpoint_range); + + const float scale = 255.0f; + + color_quad_u8 trialMinColor8Bit, trialMaxColor8Bit; + color_quad_u8_set_clamped(&trialMinColor8Bit, (int)(xl.m_c[0] * scale + .5f), (int)(xl.m_c[1] * scale + .5f), (int)(xl.m_c[2] * scale + .5f), (int)(xl.m_c[3] * scale + .5f)); + color_quad_u8_set_clamped(&trialMaxColor8Bit, (int)(xh.m_c[0] * scale + .5f), (int)(xh.m_c[1] * scale + .5f), (int)(xh.m_c[2] * scale + .5f), (int)(xh.m_c[3] * scale + .5f)); + + color_quad_u8 trialMinColor, trialMaxColor; + for (uint32_t i = 0; i < 4; i++) + { + trialMinColor.m_c[i] = g_astc_nearest_sorted_index[pParams->m_astc_endpoint_range][trialMinColor8Bit.m_c[i]]; + trialMaxColor.m_c[i] = g_astc_nearest_sorted_index[pParams->m_astc_endpoint_range][trialMaxColor8Bit.m_c[i]]; + } + + if (areDegenerateEndpoints(&trialMinColor, &trialMaxColor, &xl, &xh)) + { + color_quad_u8 trialMinColorOrig(trialMinColor), trialMaxColorOrig(trialMaxColor); + + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 1); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + + trialMinColor = trialMinColorOrig; + trialMaxColor = trialMaxColorOrig; + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 0); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + + trialMinColor = trialMinColorOrig; + trialMaxColor = trialMaxColorOrig; + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 2); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + + trialMinColor = trialMinColorOrig; + trialMaxColor = trialMaxColorOrig; + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, levels - 1, 3); + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + } + else + { + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + { + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + } + } + + for (uint32_t i = 0; i < 4; i++) + { + pResults->m_astc_low_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[i]].m_index; + pResults->m_astc_high_endpoint.m_c[i] = g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[i]].m_index; + } + } + else if (pParams->m_has_pbits) + { + const int iscalep = (1 << (pParams->m_comp_bits + 1)) - 1; + const float scalep = (float)iscalep; + + const int32_t totalComps = pParams->m_has_alpha ? 4 : 3; + + uint32_t best_pbits[2]; + color_quad_u8 bestMinColor, bestMaxColor; + + if (!pParams->m_endpoints_share_pbit) + { + float best_err0 = 1e+9; + float best_err1 = 1e+9; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + + // Notes: The pbit controls which quantization intervals are selected. + // total_levels=2^(comp_bits+1), where comp_bits=4 for mode 0, etc. + // pbit 0: v=(b*2)/(total_levels-1), pbit 1: v=(b*2+1)/(total_levels-1) where b is the component bin from [0,total_levels/2-1] and v is the [0,1] component value + // rearranging you get for pbit 0: b=floor(v*(total_levels-1)/2+.5) + // rearranging you get for pbit 1: b=floor((v*(total_levels-1)-1)/2+.5) + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow = scale_color(&xMinColor, pParams); + color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams); + + float err0 = 0, err1 = 0; + for (int i = 0; i < totalComps; i++) + { + err0 += squaref(scaledLow.m_c[i] - xl.m_c[i] * 255.0f); + err1 += squaref(scaledHigh.m_c[i] - xh.m_c[i] * 255.0f); + } + + if (err0 < best_err0) + { + best_err0 = err0; + best_pbits[0] = p; + + bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1; + bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1; + bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1; + bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1; + } + + if (err1 < best_err1) + { + best_err1 = err1; + best_pbits[1] = p; + + bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1; + bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1; + bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1; + bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1; + } + } + } + else + { + // Endpoints share pbits + float best_err = 1e+9; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow = scale_color(&xMinColor, pParams); + color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams); + + float err = 0; + for (int i = 0; i < totalComps; i++) + err += squaref((scaledLow.m_c[i] / 255.0f) - xl.m_c[i]) + squaref((scaledHigh.m_c[i] / 255.0f) - xh.m_c[i]); + + if (err < best_err) + { + best_err = err; + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1; + bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1; + } + } + } + } + + fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1, 0); + + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&bestMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&bestMaxColor, &pResults->m_high_endpoint) || (best_pbits[0] != pResults->m_pbits[0]) || (best_pbits[1] != pResults->m_pbits[1])) + evaluate_solution(&bestMinColor, &bestMaxColor, best_pbits, pParams, pResults); + } + else + { + const int iscale = (1 << pParams->m_comp_bits) - 1; + const float scale = (float)iscale; + + color_quad_u8 trialMinColor, trialMaxColor; + color_quad_u8_set_clamped(&trialMinColor, (int)(xl.m_c[0] * scale + .5f), (int)(xl.m_c[1] * scale + .5f), (int)(xl.m_c[2] * scale + .5f), (int)(xl.m_c[3] * scale + .5f)); + color_quad_u8_set_clamped(&trialMaxColor, (int)(xh.m_c[0] * scale + .5f), (int)(xh.m_c[1] * scale + .5f), (int)(xh.m_c[2] * scale + .5f), (int)(xh.m_c[3] * scale + .5f)); + + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, iscale, 0); + + if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + } + + return pResults->m_best_overall_err; +} + +void check_best_overall_error(const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +{ + const uint32_t n = pParams->m_num_selector_weights; + + assert(n <= 32); + + color_quad_u8 colors[32]; + for (uint32_t c = 0; c < 4; c++) + { + colors[0].m_c[c] = g_astc_unquant[pParams->m_astc_endpoint_range][pResults->m_astc_low_endpoint.m_c[c]].m_unquant; + assert(colors[0].m_c[c] == g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_low_endpoint.m_c[c]].m_unquant); + + colors[n-1].m_c[c] = g_astc_unquant[pParams->m_astc_endpoint_range][pResults->m_astc_high_endpoint.m_c[c]].m_unquant; + assert(colors[n-1].m_c[c] == g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[c]].m_unquant); + } + + for (uint32_t i = 1; i < pParams->m_num_selector_weights - 1; i++) + for (uint32_t c = 0; c < 4; c++) + colors[i].m_c[c] = (uint8_t)astc_interpolate(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]); + + uint64_t total_err = 0; + for (uint32_t p = 0; p < pParams->m_num_pixels; p++) + { + const color_quad_u8 &orig = pParams->m_pPixels[p]; + const color_quad_u8 &packed = colors[pResults->m_pSelectors[p]]; + + if (pParams->m_has_alpha) + total_err += compute_color_distance_rgba(&orig, &packed, pParams->m_perceptual, pParams->m_weights); + else + total_err += compute_color_distance_rgb(&orig, &packed, pParams->m_perceptual, pParams->m_weights); + } + assert(total_err == pResults->m_best_overall_err); + + // HACK HACK + //if (total_err != pResults->m_best_overall_err) + // printf("X"); +} + +static bool is_solid_rgb(const color_cell_compressor_params *pParams, uint32_t &r, uint32_t &g, uint32_t &b) +{ + r = pParams->m_pPixels[0].m_c[0]; + g = pParams->m_pPixels[0].m_c[1]; + b = pParams->m_pPixels[0].m_c[2]; + + bool allSame = true; + for (uint32_t i = 1; i < pParams->m_num_pixels; i++) + { + if ((r != pParams->m_pPixels[i].m_c[0]) || (g != pParams->m_pPixels[i].m_c[1]) || (b != pParams->m_pPixels[i].m_c[2])) + { + allSame = false; + break; + } + } + + return allSame; +} + +static bool is_solid_rgba(const color_cell_compressor_params *pParams, uint32_t &r, uint32_t &g, uint32_t &b, uint32_t &a) +{ + r = pParams->m_pPixels[0].m_c[0]; + g = pParams->m_pPixels[0].m_c[1]; + b = pParams->m_pPixels[0].m_c[2]; + a = pParams->m_pPixels[0].m_c[3]; + + bool allSame = true; + for (uint32_t i = 1; i < pParams->m_num_pixels; i++) + { + if ((r != pParams->m_pPixels[i].m_c[0]) || (g != pParams->m_pPixels[i].m_c[1]) || (b != pParams->m_pPixels[i].m_c[2]) || (a != pParams->m_pPixels[i].m_c[3])) + { + allSame = false; + break; + } + } + + return allSame; +} + +uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, const bc7enc_compress_block_params *pComp_params) +{ + if (!pParams->m_astc_endpoint_range) + { + assert((mode == 6) || (!pParams->m_has_alpha)); + } + assert(pParams->m_num_selector_weights >= 1 && pParams->m_num_selector_weights <= 32); + assert(pParams->m_pSelector_weights[0] == 0); + assert(pParams->m_pSelector_weights[pParams->m_num_selector_weights - 1] == 64); + + pResults->m_best_overall_err = UINT64_MAX; + + uint32_t cr, cg, cb, ca; + + // If the partition's colors are all the same, then just pack them as a single color. + if (!pParams->m_pForce_selectors) + { + if (mode == 1) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_mode1_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 8) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_4bit_3bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 7) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_range7_2bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 4) && (pParams->m_has_alpha)) + { + if (is_solid_rgba(pParams, cr, cg, cb, ca)) + return pack_astc_4bit_2bit_to_one_color_rgba(pParams, pResults, cr, cg, cb, ca, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 13) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_range13_2bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + else if ((pParams->m_astc_endpoint_range == 11) && (pParams->m_num_selector_weights == 32) && (!pParams->m_has_alpha)) + { + if (is_solid_rgb(pParams, cr, cg, cb)) + return pack_astc_range11_5bit_to_one_color(pParams, pResults, cr, cg, cb, pResults->m_pSelectors); + } + } + + // Compute partition's mean color and principle axis. + bc7enc_vec4F meanColor, axis; + vec4F_set_scalar(&meanColor, 0.0f); + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); + meanColor = vec4F_add(&meanColor, &color); + } + + bc7enc_vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / (float)(pParams->m_num_pixels)); + + meanColor = vec4F_mul(&meanColor, 1.0f / (float)(pParams->m_num_pixels * 255.0f)); + vec4F_saturate_in_place(&meanColor); + + if (pParams->m_has_alpha) + { + // Use incremental PCA for RGBA PCA, because it's simple. + vec4F_set_scalar(&axis, 0.0f); + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); + color = vec4F_sub(&color, &meanColorScaled); + bc7enc_vec4F a = vec4F_mul(&color, color.m_c[0]); + bc7enc_vec4F b = vec4F_mul(&color, color.m_c[1]); + bc7enc_vec4F c = vec4F_mul(&color, color.m_c[2]); + bc7enc_vec4F d = vec4F_mul(&color, color.m_c[3]); + bc7enc_vec4F n = i ? axis : color; + vec4F_normalize_in_place(&n); + axis.m_c[0] += vec4F_dot(&a, &n); + axis.m_c[1] += vec4F_dot(&b, &n); + axis.m_c[2] += vec4F_dot(&c, &n); + axis.m_c[3] += vec4F_dot(&d, &n); + } + vec4F_normalize_in_place(&axis); + } + else + { + // Use covar technique for RGB PCA, because it doesn't require per-pixel normalization. + float cov[6] = { 0, 0, 0, 0, 0, 0 }; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const color_quad_u8 *pV = &pParams->m_pPixels[i]; + float r = pV->m_c[0] - meanColorScaled.m_c[0]; + float g = pV->m_c[1] - meanColorScaled.m_c[1]; + float b = pV->m_c[2] - meanColorScaled.m_c[2]; + cov[0] += r*r; cov[1] += r*g; cov[2] += r*b; cov[3] += g*g; cov[4] += g*b; cov[5] += b*b; + } + + float xr = .9f, xg = 1.0f, xb = .7f; + for (uint32_t iter = 0; iter < 3; iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + + float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b)); + if (m > 1e-10f) + { + m = 1.0f / m; + r *= m; g *= m; b *= m; + } + + xr = r; xg = g; xb = b; + } + + float len = xr * xr + xg * xg + xb * xb; + if (len < 1e-10f) + vec4F_set_scalar(&axis, 0.0f); + else + { + len = 1.0f / sqrtf(len); + xr *= len; xg *= len; xb *= len; + vec4F_set(&axis, xr, xg, xb, 0); + } + } + + if (vec4F_dot(&axis, &axis) < .5f) + { + if (pParams->m_perceptual) + vec4F_set(&axis, .213f, .715f, .072f, pParams->m_has_alpha ? .715f : 0); + else + vec4F_set(&axis, 1.0f, 1.0f, 1.0f, pParams->m_has_alpha ? 1.0f : 0); + vec4F_normalize_in_place(&axis); + } + + bc7enc_vec4F minColor, maxColor; + + float l = 1e+9f, h = -1e+9f; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); + + bc7enc_vec4F q = vec4F_sub(&color, &meanColorScaled); + float d = vec4F_dot(&q, &axis); + + l = minimumf(l, d); + h = maximumf(h, d); + } + + l *= (1.0f / 255.0f); + h *= (1.0f / 255.0f); + + bc7enc_vec4F b0 = vec4F_mul(&axis, l); + bc7enc_vec4F b1 = vec4F_mul(&axis, h); + bc7enc_vec4F c0 = vec4F_add(&meanColor, &b0); + bc7enc_vec4F c1 = vec4F_add(&meanColor, &b1); + minColor = vec4F_saturate(&c0); + maxColor = vec4F_saturate(&c1); + + bc7enc_vec4F whiteVec; + vec4F_set_scalar(&whiteVec, 1.0f); + if (vec4F_dot(&minColor, &whiteVec) > vec4F_dot(&maxColor, &whiteVec)) + { +#if 1 + std::swap(minColor.m_c[0], maxColor.m_c[0]); + std::swap(minColor.m_c[1], maxColor.m_c[1]); + std::swap(minColor.m_c[2], maxColor.m_c[2]); + std::swap(minColor.m_c[3], maxColor.m_c[3]); +#elif 0 + // Fails to compile correctly with MSVC 2019 (code generation bug) + std::swap(minColor, maxColor); +#else + // Fails with MSVC 2019 + bc7enc_vec4F temp = minColor; + minColor = maxColor; + maxColor = temp; +#endif + } + + // First find a solution using the block's PCA. + if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults)) + return 0; + + for (uint32_t i = 0; i < pComp_params->m_least_squares_passes; i++) + { + // Now try to refine the solution using least squares by computing the optimal endpoints from the current selectors. + bc7enc_vec4F xl, xh; + vec4F_set_scalar(&xl, 0.0f); + vec4F_set_scalar(&xh, 0.0f); + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, pResults->m_pSelectors, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, pResults->m_pSelectors, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + } + + if ((!pParams->m_pForce_selectors) && (pComp_params->m_uber_level > 0)) + { + // In uber level 1, try varying the selectors a little, somewhat like cluster fit would. First try incrementing the minimum selectors, + // then try decrementing the selectrors, then try both. + uint8_t selectors_temp[16], selectors_temp1[16]; + memcpy(selectors_temp, pResults->m_pSelectors, pParams->m_num_pixels); + + const int max_selector = pParams->m_num_selector_weights - 1; + + uint32_t min_sel = 256; + uint32_t max_sel = 0; + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + min_sel = minimumu(min_sel, sel); + max_sel = maximumu(max_sel, sel); + } + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + if ((sel == min_sel) && (sel < (pParams->m_num_selector_weights - 1))) + sel++; + selectors_temp1[i] = (uint8_t)sel; + } + + bc7enc_vec4F xl, xh; + vec4F_set_scalar(&xl, 0.0f); + vec4F_set_scalar(&xh, 0.0f); + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + if ((sel == max_sel) && (sel > 0)) + sel--; + selectors_temp1[i] = (uint8_t)sel; + } + + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + uint32_t sel = selectors_temp[i]; + if ((sel == min_sel) && (sel < (pParams->m_num_selector_weights - 1))) + sel++; + else if ((sel == max_sel) && (sel > 0)) + sel--; + selectors_temp1[i] = (uint8_t)sel; + } + + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + + // In uber levels 2+, try taking more advantage of endpoint extrapolation by scaling the selectors in one direction or another. + const uint32_t uber_err_thresh = (pParams->m_num_pixels * 56) >> 4; + if ((pComp_params->m_uber_level >= 2) && (pResults->m_best_overall_err > uber_err_thresh)) + { + const int Q = (pComp_params->m_uber_level >= 4) ? (pComp_params->m_uber_level - 2) : 1; + for (int ly = -Q; ly <= 1; ly++) + { + for (int hy = max_selector - 1; hy <= (max_selector + Q); hy++) + { + if ((ly == 0) && (hy == max_selector)) + continue; + + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + selectors_temp1[i] = (uint8_t)clampf(floorf((float)max_selector * ((float)selectors_temp[i] - (float)ly) / ((float)hy - (float)ly) + .5f), 0, (float)max_selector); + + //bc7enc_vec4F xl, xh; + vec4F_set_scalar(&xl, 0.0f); + vec4F_set_scalar(&xh, 0.0f); + if (pParams->m_has_alpha) + compute_least_squares_endpoints_rgba(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + else + compute_least_squares_endpoints_rgb(pParams->m_num_pixels, selectors_temp1, pParams->m_pSelector_weightsx, &xl, &xh, pParams->m_pPixels); + + xl = vec4F_mul(&xl, (1.0f / 255.0f)); + xh = vec4F_mul(&xh, (1.0f / 255.0f)); + + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + return 0; + } + } + } + } + + if (!pParams->m_pForce_selectors) + { + // Try encoding the partition as a single color by using the optimal single colors tables to encode the block to its mean. + if (mode == 1) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_mode1_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 8) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_4bit_3bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 7) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_range7_2bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 8) && (pParams->m_num_selector_weights == 4) && (pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f), a = (int)(.5f + meanColor.m_c[3] * 255.0f); + uint64_t avg_err = pack_astc_4bit_2bit_to_one_color_rgba(pParams, &avg_results, r, g, b, a, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 13) && (pParams->m_num_selector_weights == 4) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_range13_2bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + else if ((pParams->m_astc_endpoint_range == 11) && (pParams->m_num_selector_weights == 32) && (!pParams->m_has_alpha)) + { + color_cell_compressor_results avg_results = *pResults; + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); + uint64_t avg_err = pack_astc_range11_5bit_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); + if (avg_err < pResults->m_best_overall_err) + { + *pResults = avg_results; + memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); + pResults->m_best_overall_err = avg_err; + } + } + } + +#if BC7ENC_CHECK_OVERALL_ERROR + check_best_overall_error(pParams, pResults); +#endif + + return pResults->m_best_overall_err; +} + +uint64_t color_cell_compression_est_astc( + uint32_t num_weights, uint32_t num_comps, const uint32_t *pWeight_table, + uint32_t num_pixels, const color_quad_u8* pPixels, + uint64_t best_err_so_far, const uint32_t weights[4]) +{ + assert(num_comps == 3 || num_comps == 4); + assert(num_weights >= 1 && num_weights <= 32); + assert(pWeight_table[0] == 0 && pWeight_table[num_weights - 1] == 64); + + // Find RGB bounds as an approximation of the block's principle axis + uint32_t lr = 255, lg = 255, lb = 255, la = 255; + uint32_t hr = 0, hg = 0, hb = 0, ha = 0; + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + if (pC->m_c[0] < lr) lr = pC->m_c[0]; + if (pC->m_c[1] < lg) lg = pC->m_c[1]; + if (pC->m_c[2] < lb) lb = pC->m_c[2]; + if (pC->m_c[3] < la) la = pC->m_c[3]; + + if (pC->m_c[0] > hr) hr = pC->m_c[0]; + if (pC->m_c[1] > hg) hg = pC->m_c[1]; + if (pC->m_c[2] > hb) hb = pC->m_c[2]; + if (pC->m_c[3] > ha) ha = pC->m_c[3]; + } + } + else + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + if (pC->m_c[0] < lr) lr = pC->m_c[0]; + if (pC->m_c[1] < lg) lg = pC->m_c[1]; + if (pC->m_c[2] < lb) lb = pC->m_c[2]; + + if (pC->m_c[0] > hr) hr = pC->m_c[0]; + if (pC->m_c[1] > hg) hg = pC->m_c[1]; + if (pC->m_c[2] > hb) hb = pC->m_c[2]; + } + la = 255; + ha = 255; + } + + color_quad_u8 lowColor, highColor; + color_quad_u8_set(&lowColor, lr, lg, lb, la); + color_quad_u8_set(&highColor, hr, hg, hb, ha); + + // Place endpoints at bbox diagonals and compute interpolated colors + color_quad_u8 weightedColors[32]; + + weightedColors[0] = lowColor; + weightedColors[num_weights - 1] = highColor; + for (uint32_t i = 1; i < (num_weights - 1); i++) + { + weightedColors[i].m_c[0] = (uint8_t)astc_interpolate(lowColor.m_c[0], highColor.m_c[0], pWeight_table[i]); + weightedColors[i].m_c[1] = (uint8_t)astc_interpolate(lowColor.m_c[1], highColor.m_c[1], pWeight_table[i]); + weightedColors[i].m_c[2] = (uint8_t)astc_interpolate(lowColor.m_c[2], highColor.m_c[2], pWeight_table[i]); + weightedColors[i].m_c[3] = (num_comps == 4) ? (uint8_t)astc_interpolate(lowColor.m_c[3], highColor.m_c[3], pWeight_table[i]) : 255; + } + + // Compute dots and thresholds + const int ar = highColor.m_c[0] - lowColor.m_c[0]; + const int ag = highColor.m_c[1] - lowColor.m_c[1]; + const int ab = highColor.m_c[2] - lowColor.m_c[2]; + const int aa = highColor.m_c[3] - lowColor.m_c[3]; + + int dots[32]; + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_weights; i++) + dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab + weightedColors[i].m_c[3] * aa; + } + else + { + assert(aa == 0); + for (uint32_t i = 0; i < num_weights; i++) + dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab; + } + + int thresh[32 - 1]; + for (uint32_t i = 0; i < (num_weights - 1); i++) + thresh[i] = (dots[i] + dots[i + 1] + 1) >> 1; + + uint64_t total_err = 0; + if ((weights[0] | weights[1] | weights[2] | weights[3]) == 1) + { + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + int da = (int)pE1->m_c[3] - (int)pC->m_c[3]; + + total_err += (dr * dr) + (dg * dg) + (db * db) + (da * da); + if (total_err > best_err_so_far) + break; + } + } + else + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + + total_err += (dr * dr) + (dg * dg) + (db * db); + if (total_err > best_err_so_far) + break; + } + } + } + else + { + if (num_comps == 4) + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + int da = (int)pE1->m_c[3] - (int)pC->m_c[3]; + + total_err += weights[0] * (dr * dr) + weights[1] * (dg * dg) + weights[2] * (db * db) + weights[3] * (da * da); + if (total_err > best_err_so_far) + break; + } + } + else + { + for (uint32_t i = 0; i < num_pixels; i++) + { + const color_quad_u8* pC = &pPixels[i]; + + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2]; + + // Find approximate selector + uint32_t s = 0; + for (int j = num_weights - 2; j >= 0; j--) + { + if (d >= thresh[j]) + { + s = j + 1; + break; + } + } + + // Compute error + const color_quad_u8* pE1 = &weightedColors[s]; + + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + + total_err += weights[0] * (dr * dr) + weights[1] * (dg * dg) + weights[2] * (db * db); + if (total_err > best_err_so_far) + break; + } + } + } + + return total_err; +} + +} // namespace basisu diff --git a/thirdparty/basis_universal/encoder/basisu_bc7enc.h b/thirdparty/basis_universal/encoder/basisu_bc7enc.h new file mode 100644 index 0000000000..23469912e2 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_bc7enc.h @@ -0,0 +1,131 @@ +// File: basisu_bc7enc.h +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_enc.h" +#include "../transcoder/basisu_transcoder_uastc.h" + +namespace basisu +{ + +#define BC7ENC_MAX_PARTITIONS1 (64) +#define BC7ENC_MAX_UBER_LEVEL (4) + + typedef uint8_t bc7enc_bool; + +#define BC7ENC_TRUE (1) +#define BC7ENC_FALSE (0) + + typedef struct { float m_c[4]; } bc7enc_vec4F; + + extern const float g_bc7_weights1x[2 * 4]; + extern const float g_bc7_weights2x[4 * 4]; + extern const float g_bc7_weights3x[8 * 4]; + extern const float g_bc7_weights4x[16 * 4]; + extern const float g_astc_weights4x[16 * 4]; + extern const float g_astc_weights5x[32 * 4]; + extern const float g_astc_weights_3levelsx[3 * 4]; + + extern basist::astc_quant_bin g_astc_sorted_order_unquant[basist::BC7ENC_TOTAL_ASTC_RANGES][256]; // [sorted unquantized order] + + struct color_cell_compressor_params + { + uint32_t m_num_pixels; + const basist::color_quad_u8* m_pPixels; + + uint32_t m_num_selector_weights; + const uint32_t* m_pSelector_weights; + + const bc7enc_vec4F* m_pSelector_weightsx; + uint32_t m_comp_bits; + + const uint8_t *m_pForce_selectors; + + // Non-zero m_astc_endpoint_range enables ASTC mode. m_comp_bits and m_has_pbits are always false. We only support 2, 3, or 4 bit weight encodings. + uint32_t m_astc_endpoint_range; + + uint32_t m_weights[4]; + bc7enc_bool m_has_alpha; + bc7enc_bool m_has_pbits; + bc7enc_bool m_endpoints_share_pbit; + bc7enc_bool m_perceptual; + }; + + struct color_cell_compressor_results + { + uint64_t m_best_overall_err; + basist::color_quad_u8 m_low_endpoint; + basist::color_quad_u8 m_high_endpoint; + uint32_t m_pbits[2]; + uint8_t* m_pSelectors; + uint8_t* m_pSelectors_temp; + + // Encoded ASTC indices, if ASTC mode is enabled + basist::color_quad_u8 m_astc_low_endpoint; + basist::color_quad_u8 m_astc_high_endpoint; + }; + + struct bc7enc_compress_block_params + { + // m_max_partitions_mode1 may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS1. The higher this value, the slower the compressor, but the higher the quality. + uint32_t m_max_partitions_mode1; + + // Relative RGBA or YCbCrA weights. + uint32_t m_weights[4]; + + // m_uber_level may range from 0 to BC7ENC_MAX_UBER_LEVEL. The higher this value, the slower the compressor, but the higher the quality. + uint32_t m_uber_level; + + // If m_perceptual is true, colorspace error is computed in YCbCr space, otherwise RGB. + bc7enc_bool m_perceptual; + + uint32_t m_least_squares_passes; + }; + + uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, const bc7enc_compress_block_params* pComp_params); + + uint64_t color_cell_compression_est_astc( + uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeight_table, + uint32_t num_pixels, const basist::color_quad_u8* pPixels, + uint64_t best_err_so_far, const uint32_t weights[4]); + + inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_block_params* p) + { + p->m_perceptual = BC7ENC_FALSE; + p->m_weights[0] = 1; + p->m_weights[1] = 1; + p->m_weights[2] = 1; + p->m_weights[3] = 1; + } + + inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress_block_params* p) + { + p->m_perceptual = BC7ENC_TRUE; + p->m_weights[0] = 128; + p->m_weights[1] = 64; + p->m_weights[2] = 16; + p->m_weights[3] = 32; + } + + inline void bc7enc_compress_block_params_init(bc7enc_compress_block_params* p) + { + p->m_max_partitions_mode1 = BC7ENC_MAX_PARTITIONS1; + p->m_least_squares_passes = 1; + p->m_uber_level = 0; + bc7enc_compress_block_params_init_perceptual_weights(p); + } + + // bc7enc_compress_block_init() MUST be called before calling bc7enc_compress_block() (or you'll get artifacts). + void bc7enc_compress_block_init(); + +} // namespace basisu diff --git a/thirdparty/basis_universal/encoder/basisu_comp.cpp b/thirdparty/basis_universal/encoder/basisu_comp.cpp new file mode 100644 index 0000000000..dc4ae11539 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_comp.cpp @@ -0,0 +1,2113 @@ +// basisu_comp.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_comp.h" +#include "basisu_enc.h" +#include <unordered_set> +#include <atomic> + +// basisu_transcoder.cpp is where basisu_miniz lives now, we just need the declarations here. +#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES +#include "basisu_miniz.h" + +#if !BASISD_SUPPORT_KTX2 +#error BASISD_SUPPORT_KTX2 must be enabled (set to 1). +#endif + +#if BASISD_SUPPORT_KTX2_ZSTD +#include "../zstd/zstd.h" +#endif + +// Set to 1 to disable the mipPadding alignment workaround (which only seems to be needed when no key-values are written at all) +#define BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND (0) + +// Set to 1 to disable writing all KTX2 key values, triggering the validator bug. +#define BASISU_DISABLE_KTX2_KEY_VALUES (0) + +using namespace buminiz; + +#define BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN 0 +#define DEBUG_CROP_TEXTURE_TO_64x64 (0) +#define DEBUG_RESIZE_TEXTURE (0) +#define DEBUG_EXTRACT_SINGLE_BLOCK (0) + +namespace basisu +{ + basis_compressor::basis_compressor() : + m_basis_file_size(0), + m_basis_bits_per_texel(0.0f), + m_total_blocks(0), + m_auto_global_sel_pal(false), + m_any_source_image_has_alpha(false) + { + debug_printf("basis_compressor::basis_compressor\n"); + } + + bool basis_compressor::init(const basis_compressor_params ¶ms) + { + debug_printf("basis_compressor::init\n"); + + m_params = params; + + if (m_params.m_debug) + { + debug_printf("basis_compressor::init:\n"); + +#define PRINT_BOOL_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast<int>(m_params.v), m_params.v.was_changed()); +#define PRINT_INT_VALUE(v) debug_printf("%s: %i %u\n", BASISU_STRINGIZE2(v), static_cast<int>(m_params.v), m_params.v.was_changed()); +#define PRINT_UINT_VALUE(v) debug_printf("%s: %u %u\n", BASISU_STRINGIZE2(v), static_cast<uint32_t>(m_params.v), m_params.v.was_changed()); +#define PRINT_FLOAT_VALUE(v) debug_printf("%s: %f %u\n", BASISU_STRINGIZE2(v), static_cast<float>(m_params.v), m_params.v.was_changed()); + + debug_printf("Has global selector codebook: %i\n", m_params.m_pSel_codebook != nullptr); + + debug_printf("Source images: %u, source filenames: %u, source alpha filenames: %i, Source mipmap images: %u\n", + m_params.m_source_images.size(), m_params.m_source_filenames.size(), m_params.m_source_alpha_filenames.size(), m_params.m_source_mipmap_images.size()); + + if (m_params.m_source_mipmap_images.size()) + { + debug_printf("m_source_mipmap_images array sizes:\n"); + for (uint32_t i = 0; i < m_params.m_source_mipmap_images.size(); i++) + debug_printf("%u ", m_params.m_source_mipmap_images[i].size()); + debug_printf("\n"); + } + + PRINT_BOOL_VALUE(m_uastc); + PRINT_BOOL_VALUE(m_y_flip); + PRINT_BOOL_VALUE(m_debug); + PRINT_BOOL_VALUE(m_validate); + PRINT_BOOL_VALUE(m_debug_images); + PRINT_BOOL_VALUE(m_global_sel_pal); + PRINT_BOOL_VALUE(m_auto_global_sel_pal); + PRINT_INT_VALUE(m_compression_level); + PRINT_BOOL_VALUE(m_no_hybrid_sel_cb); + PRINT_BOOL_VALUE(m_perceptual); + PRINT_BOOL_VALUE(m_no_endpoint_rdo); + PRINT_BOOL_VALUE(m_no_selector_rdo); + PRINT_BOOL_VALUE(m_read_source_images); + PRINT_BOOL_VALUE(m_write_output_basis_files); + PRINT_BOOL_VALUE(m_compute_stats); + PRINT_BOOL_VALUE(m_check_for_alpha); + PRINT_BOOL_VALUE(m_force_alpha); + debug_printf("swizzle: %d,%d,%d,%d\n", + m_params.m_swizzle[0], + m_params.m_swizzle[1], + m_params.m_swizzle[2], + m_params.m_swizzle[3]); + PRINT_BOOL_VALUE(m_renormalize); + PRINT_BOOL_VALUE(m_multithreading); + PRINT_BOOL_VALUE(m_disable_hierarchical_endpoint_codebooks); + + PRINT_FLOAT_VALUE(m_hybrid_sel_cb_quality_thresh); + + PRINT_INT_VALUE(m_global_pal_bits); + PRINT_INT_VALUE(m_global_mod_bits); + + PRINT_FLOAT_VALUE(m_endpoint_rdo_thresh); + PRINT_FLOAT_VALUE(m_selector_rdo_thresh); + + PRINT_BOOL_VALUE(m_mip_gen); + PRINT_BOOL_VALUE(m_mip_renormalize); + PRINT_BOOL_VALUE(m_mip_wrapping); + PRINT_BOOL_VALUE(m_mip_fast); + PRINT_BOOL_VALUE(m_mip_srgb); + PRINT_FLOAT_VALUE(m_mip_premultiplied); + PRINT_FLOAT_VALUE(m_mip_scale); + PRINT_INT_VALUE(m_mip_smallest_dimension); + debug_printf("m_mip_filter: %s\n", m_params.m_mip_filter.c_str()); + + debug_printf("m_max_endpoint_clusters: %u\n", m_params.m_max_endpoint_clusters); + debug_printf("m_max_selector_clusters: %u\n", m_params.m_max_selector_clusters); + debug_printf("m_quality_level: %i\n", m_params.m_quality_level); + + debug_printf("m_tex_type: %u\n", m_params.m_tex_type); + debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1); + debug_printf("m_us_per_frame: %i (%f fps)\n", m_params.m_us_per_frame, m_params.m_us_per_frame ? 1.0f / (m_params.m_us_per_frame / 1000000.0f) : 0); + debug_printf("m_pack_uastc_flags: 0x%X\n", m_params.m_pack_uastc_flags); + + PRINT_BOOL_VALUE(m_rdo_uastc); + PRINT_FLOAT_VALUE(m_rdo_uastc_quality_scalar); + PRINT_INT_VALUE(m_rdo_uastc_dict_size); + PRINT_FLOAT_VALUE(m_rdo_uastc_max_allowed_rms_increase_ratio); + PRINT_FLOAT_VALUE(m_rdo_uastc_skip_block_rms_thresh); + PRINT_FLOAT_VALUE(m_rdo_uastc_max_smooth_block_error_scale); + PRINT_FLOAT_VALUE(m_rdo_uastc_smooth_block_max_std_dev); + PRINT_BOOL_VALUE(m_rdo_uastc_favor_simpler_modes_in_rdo_mode) + PRINT_BOOL_VALUE(m_rdo_uastc_multithreading); + + PRINT_INT_VALUE(m_resample_width); + PRINT_INT_VALUE(m_resample_height); + PRINT_FLOAT_VALUE(m_resample_factor); + debug_printf("Has global codebooks: %u\n", m_params.m_pGlobal_codebooks ? 1 : 0); + if (m_params.m_pGlobal_codebooks) + { + debug_printf("Global codebook endpoints: %u selectors: %u\n", m_params.m_pGlobal_codebooks->get_endpoints().size(), m_params.m_pGlobal_codebooks->get_selectors().size()); + } + + PRINT_BOOL_VALUE(m_create_ktx2_file); + + debug_printf("KTX2 UASTC supercompression: %u\n", m_params.m_ktx2_uastc_supercompression); + debug_printf("KTX2 Zstd supercompression level: %i\n", (int)m_params.m_ktx2_zstd_supercompression_level); + debug_printf("KTX2 sRGB transfer func: %u\n", (int)m_params.m_ktx2_srgb_transfer_func); + debug_printf("Total KTX2 key values: %u\n", m_params.m_ktx2_key_values.size()); + for (uint32_t i = 0; i < m_params.m_ktx2_key_values.size(); i++) + { + debug_printf("Key: \"%s\"\n", m_params.m_ktx2_key_values[i].m_key.data()); + debug_printf("Value size: %u\n", m_params.m_ktx2_key_values[i].m_value.size()); + } + +#undef PRINT_BOOL_VALUE +#undef PRINT_INT_VALUE +#undef PRINT_UINT_VALUE +#undef PRINT_FLOAT_VALUE + } + + if ((m_params.m_read_source_images) && (!m_params.m_source_filenames.size())) + { + assert(0); + return false; + } + + return true; + } + + basis_compressor::error_code basis_compressor::process() + { + debug_printf("basis_compressor::process\n"); + + if (!read_source_images()) + return cECFailedReadingSourceImages; + + if (!validate_texture_type_constraints()) + return cECFailedValidating; + + if (m_params.m_create_ktx2_file) + { + if (!validate_ktx2_constraints()) + return cECFailedValidating; + } + + if (!extract_source_blocks()) + return cECFailedFrontEnd; + + if (m_params.m_uastc) + { + error_code ec = encode_slices_to_uastc(); + if (ec != cECSuccess) + return ec; + } + else + { + if (!process_frontend()) + return cECFailedFrontEnd; + + if (!extract_frontend_texture_data()) + return cECFailedFontendExtract; + + if (!process_backend()) + return cECFailedBackend; + } + + if (!create_basis_file_and_transcode()) + return cECFailedCreateBasisFile; + + if (m_params.m_create_ktx2_file) + { + if (!create_ktx2_file()) + return cECFailedCreateKTX2File; + } + + if (!write_output_files_and_compute_stats()) + return cECFailedWritingOutput; + + return cECSuccess; + } + + basis_compressor::error_code basis_compressor::encode_slices_to_uastc() + { + debug_printf("basis_compressor::encode_slices_to_uastc\n"); + + m_uastc_slice_textures.resize(m_slice_descs.size()); + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + m_uastc_slice_textures[slice_index].init(texture_format::cUASTC4x4, m_slice_descs[slice_index].m_orig_width, m_slice_descs[slice_index].m_orig_height); + + m_uastc_backend_output.m_tex_format = basist::basis_tex_format::cUASTC4x4; + m_uastc_backend_output.m_etc1s = false; + m_uastc_backend_output.m_slice_desc = m_slice_descs; + m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size()); + m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + gpu_image& tex = m_uastc_slice_textures[slice_index]; + basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + (void)slice_desc; + + const uint32_t num_blocks_x = tex.get_blocks_x(); + const uint32_t num_blocks_y = tex.get_blocks_y(); + const uint32_t total_blocks = tex.get_total_blocks(); + const image& source_image = m_slice_images[slice_index]; + + std::atomic<uint32_t> total_blocks_processed; + total_blocks_processed = 0; + + const uint32_t N = 256; + for (uint32_t block_index_iter = 0; block_index_iter < total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum<uint32_t>(total_blocks, block_index_iter + N); + + // FIXME: This sucks, but we're having a stack size related problem with std::function with emscripten. +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job([this, first_index, last_index, num_blocks_x, num_blocks_y, total_blocks, &source_image, &tex, &total_blocks_processed] + { +#endif + BASISU_NOTE_UNUSED(num_blocks_y); + + uint32_t uastc_flags = m_params.m_pack_uastc_flags; + if ((m_params.m_rdo_uastc) && (m_params.m_rdo_uastc_favor_simpler_modes_in_rdo_mode)) + uastc_flags |= cPackUASTCFavorSimplerModes; + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const uint32_t block_x = block_index % num_blocks_x; + const uint32_t block_y = block_index / num_blocks_x; + + color_rgba block_pixels[4][4]; + + source_image.extract_block_clamped((color_rgba*)block_pixels, block_x * 4, block_y * 4, 4, 4); + + basist::uastc_block& dest_block = *(basist::uastc_block*)tex.get_block_ptr(block_x, block_y); + + encode_uastc(&block_pixels[0][0].r, dest_block, uastc_flags); + + total_blocks_processed++; + + uint32_t val = total_blocks_processed; + if ((val & 16383) == 16383) + { + debug_printf("basis_compressor::encode_slices_to_uastc: %3.1f%% done\n", static_cast<float>(val) * 100.0f / total_blocks); + } + + } + +#ifndef __EMSCRIPTEN__ + }); +#endif + + } // block_index_iter + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->wait_for_all(); +#endif + + if (m_params.m_rdo_uastc) + { + uastc_rdo_params rdo_params; + rdo_params.m_lambda = m_params.m_rdo_uastc_quality_scalar; + rdo_params.m_max_allowed_rms_increase_ratio = m_params.m_rdo_uastc_max_allowed_rms_increase_ratio; + rdo_params.m_skip_block_rms_thresh = m_params.m_rdo_uastc_skip_block_rms_thresh; + rdo_params.m_lz_dict_size = m_params.m_rdo_uastc_dict_size; + rdo_params.m_smooth_block_max_error_scale = m_params.m_rdo_uastc_max_smooth_block_error_scale; + rdo_params.m_max_smooth_block_std_dev = m_params.m_rdo_uastc_smooth_block_max_std_dev; + + bool status = uastc_rdo(tex.get_total_blocks(), (basist::uastc_block*)tex.get_ptr(), + (const color_rgba *)m_source_blocks[slice_desc.m_first_block_index].m_pixels, rdo_params, m_params.m_pack_uastc_flags, m_params.m_rdo_uastc_multithreading ? m_params.m_pJob_pool : nullptr, + (m_params.m_rdo_uastc_multithreading && m_params.m_pJob_pool) ? basisu::minimum<uint32_t>(4, (uint32_t)m_params.m_pJob_pool->get_total_threads()) : 0); + if (!status) + { + return cECFailedUASTCRDOPostProcess; + } + } + + m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes()); + memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes()); + + m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0); + + } // slice_index + + return cECSuccess; + } + + bool basis_compressor::generate_mipmaps(const image &img, basisu::vector<image> &mips, bool has_alpha) + { + debug_printf("basis_compressor::generate_mipmaps\n"); + + interval_timer tm; + tm.start(); + + uint32_t total_levels = 1; + uint32_t w = img.get_width(), h = img.get_height(); + while (maximum<uint32_t>(w, h) > (uint32_t)m_params.m_mip_smallest_dimension) + { + w = maximum(w >> 1U, 1U); + h = maximum(h >> 1U, 1U); + total_levels++; + } + +#if BASISU_USE_STB_IMAGE_RESIZE_FOR_MIPMAP_GEN + // Requires stb_image_resize + stbir_filter filter = STBIR_FILTER_DEFAULT; + if (m_params.m_mip_filter == "box") + filter = STBIR_FILTER_BOX; + else if (m_params.m_mip_filter == "triangle") + filter = STBIR_FILTER_TRIANGLE; + else if (m_params.m_mip_filter == "cubic") + filter = STBIR_FILTER_CUBICBSPLINE; + else if (m_params.m_mip_filter == "catmull") + filter = STBIR_FILTER_CATMULLROM; + else if (m_params.m_mip_filter == "mitchell") + filter = STBIR_FILTER_MITCHELL; + + for (uint32_t level = 1; level < total_levels; level++) + { + const uint32_t level_width = maximum<uint32_t>(1, img.get_width() >> level); + const uint32_t level_height = maximum<uint32_t>(1, img.get_height() >> level); + + image &level_img = *enlarge_vector(mips, 1); + level_img.resize(level_width, level_height); + + int result = stbir_resize_uint8_generic( + (const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba), + (uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba), + has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0, + m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, + nullptr); + + if (result == 0) + { + error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n"); + return false; + } + + if (m_params.m_mip_renormalize) + level_img.renormalize_normal_map(); + } +#else + for (uint32_t level = 1; level < total_levels; level++) + { + const uint32_t level_width = maximum<uint32_t>(1, img.get_width() >> level); + const uint32_t level_height = maximum<uint32_t>(1, img.get_height() >> level); + + image& level_img = *enlarge_vector(mips, 1); + level_img.resize(level_width, level_height); + + const image* pSource_image = &img; + + if (m_params.m_mip_fast) + { + if (level > 1) + pSource_image = &mips[level - 1]; + } + + bool status = image_resample(*pSource_image, level_img, m_params.m_mip_srgb, m_params.m_mip_filter.c_str(), m_params.m_mip_scale, m_params.m_mip_wrapping, 0, has_alpha ? 4 : 3); + if (!status) + { + error_printf("basis_compressor::generate_mipmaps: image_resample() failed!\n"); + return false; + } + + if (m_params.m_mip_renormalize) + level_img.renormalize_normal_map(); + } +#endif + + if (m_params.m_debug) + debug_printf("Total mipmap generation time: %f secs\n", tm.get_elapsed_secs()); + + return true; + } + + bool basis_compressor::read_source_images() + { + debug_printf("basis_compressor::read_source_images\n"); + + const uint32_t total_source_files = m_params.m_read_source_images ? (uint32_t)m_params.m_source_filenames.size() : (uint32_t)m_params.m_source_images.size(); + if (!total_source_files) + return false; + + m_stats.resize(0); + m_slice_descs.resize(0); + m_slice_images.resize(0); + + m_total_blocks = 0; + uint32_t total_macroblocks = 0; + + m_any_source_image_has_alpha = false; + + basisu::vector<image> source_images; + basisu::vector<std::string> source_filenames; + + // First load all source images, and determine if any have an alpha channel. + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + const char *pSource_filename = ""; + + image file_image; + + if (m_params.m_read_source_images) + { + pSource_filename = m_params.m_source_filenames[source_file_index].c_str(); + + // Load the source image + if (!load_image(pSource_filename, file_image)) + { + error_printf("Failed reading source image: %s\n", pSource_filename); + return false; + } + + printf("Read source image \"%s\", %ux%u\n", pSource_filename, file_image.get_width(), file_image.get_height()); + + // Optionally load another image and put a grayscale version of it into the alpha channel. + if ((source_file_index < m_params.m_source_alpha_filenames.size()) && (m_params.m_source_alpha_filenames[source_file_index].size())) + { + const char *pSource_alpha_image = m_params.m_source_alpha_filenames[source_file_index].c_str(); + + image alpha_data; + + if (!load_image(pSource_alpha_image, alpha_data)) + { + error_printf("Failed reading source image: %s\n", pSource_alpha_image); + return false; + } + + printf("Read source alpha image \"%s\", %ux%u\n", pSource_alpha_image, alpha_data.get_width(), alpha_data.get_height()); + + alpha_data.crop(file_image.get_width(), file_image.get_height()); + + for (uint32_t y = 0; y < file_image.get_height(); y++) + for (uint32_t x = 0; x < file_image.get_width(); x++) + file_image(x, y).a = (uint8_t)alpha_data(x, y).get_709_luma(); + } + } + else + { + file_image = m_params.m_source_images[source_file_index]; + } + + if (m_params.m_renormalize) + file_image.renormalize_normal_map(); + + bool alpha_swizzled = false; + if (m_params.m_swizzle[0] != 0 || + m_params.m_swizzle[1] != 1 || + m_params.m_swizzle[2] != 2 || + m_params.m_swizzle[3] != 3) + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < file_image.get_height(); y++) + for (uint32_t x = 0; x < file_image.get_width(); x++) + { + const color_rgba &c = file_image(x, y); + file_image(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]); + } + alpha_swizzled = m_params.m_swizzle[3] != 3; + } + + bool has_alpha = false; + if (m_params.m_force_alpha || alpha_swizzled) + has_alpha = true; + else if (!m_params.m_check_for_alpha) + file_image.set_alpha(255); + else if (file_image.has_alpha()) + has_alpha = true; + + if (has_alpha) + m_any_source_image_has_alpha = true; + + debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, file_image.get_width(), file_image.get_height(), has_alpha); + + if (m_params.m_y_flip) + file_image.flip_y(); + +#if DEBUG_EXTRACT_SINGLE_BLOCK + image block_image(4, 4); + const uint32_t block_x = 0; + const uint32_t block_y = 0; + block_image.blit(block_x * 4, block_y * 4, 4, 4, 0, 0, file_image, 0); + file_image = block_image; +#endif + +#if DEBUG_CROP_TEXTURE_TO_64x64 + file_image.resize(64, 64); +#endif + + if (m_params.m_resample_width > 0 && m_params.m_resample_height > 0) + { + int new_width = basisu::minimum<int>(m_params.m_resample_width, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + int new_height = basisu::minimum<int>(m_params.m_resample_height, BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + + debug_printf("Resampling to %ix%i\n", new_width, new_height); + + // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this. + image temp_img(new_width, new_height); + image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser"); + temp_img.swap(file_image); + } + else if (m_params.m_resample_factor > 0.0f) + { + int new_width = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_width() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + int new_height = basisu::minimum<int>(basisu::maximum(1, (int)ceilf(file_image.get_height() * m_params.m_resample_factor)), BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION); + + debug_printf("Resampling to %ix%i\n", new_width, new_height); + + // TODO: A box filter - kaiser looks too sharp on video. Let the caller control this. + image temp_img(new_width, new_height); + image_resample(file_image, temp_img, m_params.m_perceptual, "box"); // "kaiser"); + temp_img.swap(file_image); + } + + if ((!file_image.get_width()) || (!file_image.get_height())) + { + error_printf("basis_compressor::read_source_images: Source image has a zero width and/or height!\n"); + return false; + } + + if ((file_image.get_width() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION) || (file_image.get_height() > BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION)) + { + error_printf("basis_compressor::read_source_images: Source image is too large!\n"); + return false; + } + + source_images.push_back(file_image); + source_filenames.push_back(pSource_filename); + } + + // Check if the caller has generated their own mipmaps. + if (m_params.m_source_mipmap_images.size()) + { + // Make sure they've passed us enough mipmap chains. + if ((m_params.m_source_images.size() != m_params.m_source_mipmap_images.size()) || (total_source_files != m_params.m_source_images.size())) + { + error_printf("basis_compressor::read_source_images(): m_params.m_source_mipmap_images.size() must equal m_params.m_source_images.size()!\n"); + return false; + } + + // Check if any of the user-supplied mipmap levels has alpha. + // We're assuming the user has already preswizzled their mipmap source images. + if (!m_any_source_image_has_alpha) + { + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++) + { + const image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index]; + + if (mip_img.has_alpha()) + { + m_any_source_image_has_alpha = true; + break; + } + } + + if (m_any_source_image_has_alpha) + break; + } + } + } + + debug_printf("Any source image has alpha: %u\n", m_any_source_image_has_alpha); + + for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) + { + image &file_image = source_images[source_file_index]; + const std::string &source_filename = source_filenames[source_file_index]; + + // Now, for each source image, create the slices corresponding to that image. + basisu::vector<image> slices; + + slices.reserve(32); + + // The first (largest) mipmap level. + slices.push_back(file_image); + + if (m_params.m_source_mipmap_images.size()) + { + // User-provided mipmaps for each layer or image in the texture array. + for (uint32_t mip_index = 0; mip_index < m_params.m_source_mipmap_images[source_file_index].size(); mip_index++) + { + image& mip_img = m_params.m_source_mipmap_images[source_file_index][mip_index]; + + if (m_params.m_swizzle[0] != 0 || + m_params.m_swizzle[1] != 1 || + m_params.m_swizzle[2] != 2 || + m_params.m_swizzle[3] != 3) + { + // Used for XY normal maps in RG - puts X in color, Y in alpha + for (uint32_t y = 0; y < mip_img.get_height(); y++) + for (uint32_t x = 0; x < mip_img.get_width(); x++) + { + const color_rgba &c = mip_img(x, y); + mip_img(x, y).set_noclamp_rgba(c[m_params.m_swizzle[0]], c[m_params.m_swizzle[1]], c[m_params.m_swizzle[2]], c[m_params.m_swizzle[3]]); + } + } + + slices.push_back(mip_img); + } + } + else if (m_params.m_mip_gen) + { + // Automatically generate mipmaps. + if (!generate_mipmaps(file_image, slices, m_any_source_image_has_alpha)) + return false; + } + + uint_vec mip_indices(slices.size()); + for (uint32_t i = 0; i < slices.size(); i++) + mip_indices[i] = i; + + if ((m_any_source_image_has_alpha) && (!m_params.m_uastc)) + { + // For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. + basisu::vector<image> alpha_slices; + uint_vec new_mip_indices; + + alpha_slices.reserve(slices.size() * 2); + + for (uint32_t i = 0; i < slices.size(); i++) + { + image lvl_rgb(slices[i]); + image lvl_a(lvl_rgb); + + for (uint32_t y = 0; y < lvl_a.get_height(); y++) + { + for (uint32_t x = 0; x < lvl_a.get_width(); x++) + { + uint8_t a = lvl_a(x, y).a; + lvl_a(x, y).set_noclamp_rgba(a, a, a, 255); + } + } + + lvl_rgb.set_alpha(255); + + alpha_slices.push_back(lvl_rgb); + new_mip_indices.push_back(i); + + alpha_slices.push_back(lvl_a); + new_mip_indices.push_back(i); + } + + slices.swap(alpha_slices); + mip_indices.swap(new_mip_indices); + } + + assert(slices.size() == mip_indices.size()); + + for (uint32_t slice_index = 0; slice_index < slices.size(); slice_index++) + { + image& slice_image = slices[slice_index]; + const uint32_t orig_width = slice_image.get_width(); + const uint32_t orig_height = slice_image.get_height(); + + bool is_alpha_slice = false; + if (m_any_source_image_has_alpha) + { + if (m_params.m_uastc) + { + is_alpha_slice = slice_image.has_alpha(); + } + else + { + is_alpha_slice = (slice_index & 1) != 0; + } + } + + // Enlarge the source image to 4x4 block boundaries, duplicating edge pixels if necessary to avoid introducing extra colors into blocks. + slice_image.crop_dup_borders(slice_image.get_block_width(4) * 4, slice_image.get_block_height(4) * 4); + + if (m_params.m_debug_images) + { + save_png(string_format("basis_debug_source_image_%u_slice_%u.png", source_file_index, slice_index).c_str(), slice_image); + } + + enlarge_vector(m_stats, 1); + enlarge_vector(m_slice_images, 1); + enlarge_vector(m_slice_descs, 1); + + const uint32_t dest_image_index = (uint32_t)m_stats.size() - 1; + + m_stats[dest_image_index].m_filename = source_filename.c_str(); + m_stats[dest_image_index].m_width = orig_width; + m_stats[dest_image_index].m_height = orig_height; + + m_slice_images[dest_image_index] = slice_image; + + debug_printf("****** Slice %u: mip %u, alpha_slice: %u, filename: \"%s\", original: %ux%u actual: %ux%u\n", m_slice_descs.size() - 1, mip_indices[slice_index], is_alpha_slice, source_filename.c_str(), orig_width, orig_height, slice_image.get_width(), slice_image.get_height()); + + basisu_backend_slice_desc &slice_desc = m_slice_descs[dest_image_index]; + + slice_desc.m_first_block_index = m_total_blocks; + + slice_desc.m_orig_width = orig_width; + slice_desc.m_orig_height = orig_height; + + slice_desc.m_width = slice_image.get_width(); + slice_desc.m_height = slice_image.get_height(); + + slice_desc.m_num_blocks_x = slice_image.get_block_width(4); + slice_desc.m_num_blocks_y = slice_image.get_block_height(4); + + slice_desc.m_num_macroblocks_x = (slice_desc.m_num_blocks_x + 1) >> 1; + slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1; + + slice_desc.m_source_file_index = source_file_index; + + slice_desc.m_mip_index = mip_indices[slice_index]; + + slice_desc.m_alpha = is_alpha_slice; + slice_desc.m_iframe = false; + if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) + { + slice_desc.m_iframe = (source_file_index == 0); + } + + m_total_blocks += slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + total_macroblocks += slice_desc.m_num_macroblocks_x * slice_desc.m_num_macroblocks_y; + + } // slice_index + + } // source_file_index + + debug_printf("Total blocks: %u, Total macroblocks: %u\n", m_total_blocks, total_macroblocks); + + // Make sure we don't have too many slices + if (m_slice_descs.size() > BASISU_MAX_SLICES) + { + error_printf("Too many slices!\n"); + return false; + } + + // Basic sanity check on the slices + for (uint32_t i = 1; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &prev_slice_desc = m_slice_descs[i - 1]; + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + // Make sure images are in order + int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index; + if (image_delta > 1) + return false; + + // Make sure mipmap levels are in order + if (!image_delta) + { + int level_delta = (int)slice_desc.m_mip_index - (int)prev_slice_desc.m_mip_index; + if (level_delta > 1) + return false; + } + } + + if (m_params.m_status_output) + { + printf("Total basis file slices: %u\n", (uint32_t)m_slice_descs.size()); + } + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + if (m_params.m_status_output) + { + printf("Slice: %u, alpha: %u, orig width/height: %ux%u, width/height: %ux%u, first_block: %u, image_index: %u, mip_level: %u, iframe: %u\n", + i, slice_desc.m_alpha, slice_desc.m_orig_width, slice_desc.m_orig_height, slice_desc.m_width, slice_desc.m_height, slice_desc.m_first_block_index, slice_desc.m_source_file_index, slice_desc.m_mip_index, slice_desc.m_iframe); + } + + if (m_any_source_image_has_alpha) + { + if (!m_params.m_uastc) + { + // For ETC1S, alpha slices must be at odd slice indices. + if (slice_desc.m_alpha) + { + if ((i & 1) == 0) + return false; + + const basisu_backend_slice_desc& prev_slice_desc = m_slice_descs[i - 1]; + + // Make sure previous slice has this image's color data + if (prev_slice_desc.m_source_file_index != slice_desc.m_source_file_index) + return false; + if (prev_slice_desc.m_alpha) + return false; + if (prev_slice_desc.m_mip_index != slice_desc.m_mip_index) + return false; + if (prev_slice_desc.m_num_blocks_x != slice_desc.m_num_blocks_x) + return false; + if (prev_slice_desc.m_num_blocks_y != slice_desc.m_num_blocks_y) + return false; + } + else if (i & 1) + return false; + } + } + else if (slice_desc.m_alpha) + { + return false; + } + + if ((slice_desc.m_orig_width > slice_desc.m_width) || (slice_desc.m_orig_height > slice_desc.m_height)) + return false; + if ((slice_desc.m_source_file_index == 0) && (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) + { + if (!slice_desc.m_iframe) + return false; + } + } + + return true; + } + + // Do some basic validation for 2D arrays, cubemaps, video, and volumes. + bool basis_compressor::validate_texture_type_constraints() + { + debug_printf("basis_compressor::validate_texture_type_constraints\n"); + + // In 2D mode anything goes (each image may have a different resolution and # of mipmap levels). + if (m_params.m_tex_type == basist::cBASISTexType2D) + return true; + + uint32_t total_basis_images = 0; + + for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + total_basis_images = maximum<uint32_t>(total_basis_images, slice_desc.m_source_file_index + 1); + } + + if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) + { + // For cubemaps, validate that the total # of Basis images is a multiple of 6. + if ((total_basis_images % 6) != 0) + { + error_printf("basis_compressor::validate_texture_type_constraints: For cubemaps the total number of input images is not a multiple of 6!\n"); + return false; + } + } + + // Now validate that all the mip0's have the same dimensions, and that each image has the same # of mipmap levels. + uint_vec image_mipmap_levels(total_basis_images); + + int width = -1, height = -1; + for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + image_mipmap_levels[slice_desc.m_source_file_index] = maximum(image_mipmap_levels[slice_desc.m_source_file_index], slice_desc.m_mip_index + 1); + + if (slice_desc.m_mip_index != 0) + continue; + + if (width < 0) + { + width = slice_desc.m_orig_width; + height = slice_desc.m_orig_height; + } + else if ((width != (int)slice_desc.m_orig_width) || (height != (int)slice_desc.m_orig_height)) + { + error_printf("basis_compressor::validate_texture_type_constraints: The source image resolutions are not all equal!\n"); + return false; + } + } + + for (size_t i = 1; i < image_mipmap_levels.size(); i++) + { + if (image_mipmap_levels[0] != image_mipmap_levels[i]) + { + error_printf("basis_compressor::validate_texture_type_constraints: Each image must have the same number of mipmap levels!\n"); + return false; + } + } + + return true; + } + + bool basis_compressor::extract_source_blocks() + { + debug_printf("basis_compressor::extract_source_blocks\n"); + + m_source_blocks.resize(m_total_blocks); + + for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; + const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; + + const image& source_image = m_slice_images[slice_index]; + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + source_image.extract_block_clamped(m_source_blocks[slice_desc.m_first_block_index + block_x + block_y * num_blocks_x].get_ptr(), block_x * 4, block_y * 4, 4, 4); + } + + return true; + } + + bool basis_compressor::process_frontend() + { + debug_printf("basis_compressor::process_frontend\n"); + +#if 0 + // TODO + basis_etc1_pack_params pack_params; + pack_params.m_quality = cETCQualityMedium; + pack_params.m_perceptual = m_params.m_perceptual; + pack_params.m_use_color4 = false; + + pack_etc1_block_context pack_context; + + std::unordered_set<uint64_t> endpoint_hash; + std::unordered_set<uint32_t> selector_hash; + + for (uint32_t i = 0; i < m_source_blocks.size(); i++) + { + etc_block blk; + pack_etc1_block(blk, m_source_blocks[i].get_ptr(), pack_params, pack_context); + + const color_rgba c0(blk.get_block_color(0, false)); + endpoint_hash.insert((c0.r | (c0.g << 5) | (c0.b << 10)) | (blk.get_inten_table(0) << 16)); + + const color_rgba c1(blk.get_block_color(1, false)); + endpoint_hash.insert((c1.r | (c1.g << 5) | (c1.b << 10)) | (blk.get_inten_table(1) << 16)); + + selector_hash.insert(blk.get_raw_selector_bits()); + } + + const uint32_t total_unique_endpoints = (uint32_t)endpoint_hash.size(); + const uint32_t total_unique_selectors = (uint32_t)selector_hash.size(); + + if (m_params.m_debug) + { + debug_printf("Unique endpoints: %u, unique selectors: %u\n", total_unique_endpoints, total_unique_selectors); + } +#endif + + const double total_texels = m_total_blocks * 16.0f; + + int endpoint_clusters = m_params.m_max_endpoint_clusters; + int selector_clusters = m_params.m_max_selector_clusters; + + if (endpoint_clusters > basisu_frontend::cMaxEndpointClusters) + { + error_printf("Too many endpoint clusters! (%u but max is %u)\n", endpoint_clusters, basisu_frontend::cMaxEndpointClusters); + return false; + } + if (selector_clusters > basisu_frontend::cMaxSelectorClusters) + { + error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters); + return false; + } + + if (m_params.m_quality_level != -1) + { + const float quality = saturate(m_params.m_quality_level / 255.0f); + + const float bits_per_endpoint_cluster = 14.0f; + const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f + int max_endpoints = static_cast<int>((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster); + + const float mid = 128.0f / 255.0f; + + float color_endpoint_quality = quality; + + const float endpoint_split_point = 0.5f; + + // In v1.2 and in previous versions, the endpoint codebook size at quality 128 was 3072. This wasn't quite large enough. + const int ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE = 4800; + const int MAX_ENDPOINT_CODEBOOK_SIZE = 8192; + + if (color_endpoint_quality <= mid) + { + color_endpoint_quality = lerp(0.0f, endpoint_split_point, powf(color_endpoint_quality / mid, .65f)); + + max_endpoints = clamp<int>(max_endpoints, 256, ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE); + max_endpoints = minimum<uint32_t>(max_endpoints, m_total_blocks); + + if (max_endpoints < 64) + max_endpoints = 64; + endpoint_clusters = clamp<uint32_t>((uint32_t)(.5f + lerp<float>(32, static_cast<float>(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); + } + else + { + color_endpoint_quality = powf((color_endpoint_quality - mid) / (1.0f - mid), 1.6f); + + max_endpoints = clamp<int>(max_endpoints, 256, MAX_ENDPOINT_CODEBOOK_SIZE); + max_endpoints = minimum<uint32_t>(max_endpoints, m_total_blocks); + + if (max_endpoints < ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE) + max_endpoints = ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE; + endpoint_clusters = clamp<uint32_t>((uint32_t)(.5f + lerp<float>(ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE, static_cast<float>(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); + } + + float bits_per_selector_cluster = m_params.m_global_sel_pal ? 21.0f : 14.0f; + + const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f + int max_selectors = static_cast<int>((max_desired_selector_cluster_bits_per_texel * total_texels) / bits_per_selector_cluster); + max_selectors = clamp<int>(max_selectors, 256, basisu_frontend::cMaxSelectorClusters); + max_selectors = minimum<uint32_t>(max_selectors, m_total_blocks); + + float color_selector_quality = quality; + //color_selector_quality = powf(color_selector_quality, 1.65f); + color_selector_quality = powf(color_selector_quality, 2.62f); + + if (max_selectors < 96) + max_selectors = 96; + selector_clusters = clamp<uint32_t>((uint32_t)(.5f + lerp<float>(96, static_cast<float>(max_selectors), color_selector_quality)), 8, basisu_frontend::cMaxSelectorClusters); + + debug_printf("Max endpoints: %u, max selectors: %u\n", endpoint_clusters, selector_clusters); + + if (m_params.m_quality_level >= 223) + { + if (!m_params.m_selector_rdo_thresh.was_changed()) + { + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= .25f; + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .25f; + } + } + else if (m_params.m_quality_level >= 192) + { + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= .5f; + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .5f; + } + else if (m_params.m_quality_level >= 160) + { + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= .75f; + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= .75f; + } + else if (m_params.m_quality_level >= 129) + { + float l = (quality - 129 / 255.0f) / ((160 - 129) / 255.0f); + + if (!m_params.m_endpoint_rdo_thresh.was_changed()) + m_params.m_endpoint_rdo_thresh *= lerp<float>(1.0f, .75f, l); + + if (!m_params.m_selector_rdo_thresh.was_changed()) + m_params.m_selector_rdo_thresh *= lerp<float>(1.0f, .75f, l); + } + } + + m_auto_global_sel_pal = false; + if (!m_params.m_global_sel_pal && m_params.m_auto_global_sel_pal) + { + const float bits_per_selector_cluster = 31.0f; + double selector_codebook_bpp_est = (bits_per_selector_cluster * selector_clusters) / total_texels; + debug_printf("selector_codebook_bpp_est: %f\n", selector_codebook_bpp_est); + const float force_global_sel_pal_bpp_threshold = .15f; + if ((total_texels <= 128.0f*128.0f) && (selector_codebook_bpp_est > force_global_sel_pal_bpp_threshold)) + { + m_auto_global_sel_pal = true; + debug_printf("Auto global selector palette enabled\n"); + } + } + + basisu_frontend::params p; + p.m_num_source_blocks = m_total_blocks; + p.m_pSource_blocks = &m_source_blocks[0]; + p.m_max_endpoint_clusters = endpoint_clusters; + p.m_max_selector_clusters = selector_clusters; + p.m_perceptual = m_params.m_perceptual; + p.m_debug_stats = m_params.m_debug; + p.m_debug_images = m_params.m_debug_images; + p.m_compression_level = m_params.m_compression_level; + p.m_tex_type = m_params.m_tex_type; + p.m_multithreaded = m_params.m_multithreading; + p.m_disable_hierarchical_endpoint_codebooks = m_params.m_disable_hierarchical_endpoint_codebooks; + p.m_validate = m_params.m_validate; + p.m_pJob_pool = m_params.m_pJob_pool; + p.m_pGlobal_codebooks = m_params.m_pGlobal_codebooks; + + if ((m_params.m_global_sel_pal) || (m_auto_global_sel_pal)) + { + p.m_pGlobal_sel_codebook = m_params.m_pSel_codebook; + p.m_num_global_sel_codebook_pal_bits = m_params.m_global_pal_bits; + p.m_num_global_sel_codebook_mod_bits = m_params.m_global_mod_bits; + p.m_use_hybrid_selector_codebooks = !m_params.m_no_hybrid_sel_cb; + p.m_hybrid_codebook_quality_thresh = m_params.m_hybrid_sel_cb_quality_thresh; + } + + if (!m_frontend.init(p)) + { + error_printf("basisu_frontend::init() failed!\n"); + return false; + } + + m_frontend.compress(); + + if (m_params.m_debug_images) + { + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + char filename[1024]; +#ifdef _WIN32 + sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); +#else + snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); +#endif + m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true); + +#ifdef _WIN32 + sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); +#else + snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); +#endif + m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false); + } + } + + return true; + } + + bool basis_compressor::extract_frontend_texture_data() + { + debug_printf("basis_compressor::extract_frontend_texture_data\n"); + + m_frontend_output_textures.resize(m_slice_descs.size()); + m_best_etc1s_images.resize(m_slice_descs.size()); + m_best_etc1s_images_unpacked.resize(m_slice_descs.size()); + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[i]; + + const uint32_t num_blocks_x = slice_desc.m_num_blocks_x; + const uint32_t num_blocks_y = slice_desc.m_num_blocks_y; + + const uint32_t width = num_blocks_x * 4; + const uint32_t height = num_blocks_y * 4; + + m_frontend_output_textures[i].init(texture_format::cETC1, width, height); + + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + memcpy(m_frontend_output_textures[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_output_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); + +#if 0 + if (m_params.m_debug_images) + { + char filename[1024]; + sprintf_s(filename, sizeof(filename), "rdo_etc_frontend_%u_", i); + write_etc1_vis_images(m_frontend_output_textures[i], filename); + } +#endif + + m_best_etc1s_images[i].init(texture_format::cETC1, width, height); + for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) + for (uint32_t block_x = 0; block_x < num_blocks_x; block_x++) + memcpy(m_best_etc1s_images[i].get_block_ptr(block_x, block_y, 0), &m_frontend.get_etc1s_block(slice_desc.m_first_block_index + block_x + block_y * num_blocks_x), sizeof(etc_block)); + + m_best_etc1s_images[i].unpack(m_best_etc1s_images_unpacked[i]); + } + + return true; + } + + bool basis_compressor::process_backend() + { + debug_printf("basis_compressor::process_backend\n"); + + basisu_backend_params backend_params; + backend_params.m_debug = m_params.m_debug; + backend_params.m_debug_images = m_params.m_debug_images; + backend_params.m_etc1s = true; + backend_params.m_compression_level = m_params.m_compression_level; + + if (!m_params.m_no_endpoint_rdo) + backend_params.m_endpoint_rdo_quality_thresh = m_params.m_endpoint_rdo_thresh; + + if (!m_params.m_no_selector_rdo) + backend_params.m_selector_rdo_quality_thresh = m_params.m_selector_rdo_thresh; + + backend_params.m_use_global_sel_codebook = (m_frontend.get_params().m_pGlobal_sel_codebook != NULL); + backend_params.m_global_sel_codebook_pal_bits = m_frontend.get_params().m_num_global_sel_codebook_pal_bits; + backend_params.m_global_sel_codebook_mod_bits = m_frontend.get_params().m_num_global_sel_codebook_mod_bits; + backend_params.m_use_hybrid_sel_codebooks = m_frontend.get_params().m_use_hybrid_selector_codebooks; + backend_params.m_used_global_codebooks = m_frontend.get_params().m_pGlobal_codebooks != nullptr; + + m_backend.init(&m_frontend, backend_params, m_slice_descs, m_params.m_pSel_codebook); + uint32_t total_packed_bytes = m_backend.encode(); + + if (!total_packed_bytes) + { + error_printf("basis_compressor::encode() failed!\n"); + return false; + } + + debug_printf("Total packed bytes (estimated): %u\n", total_packed_bytes); + + return true; + } + + bool basis_compressor::create_basis_file_and_transcode() + { + debug_printf("basis_compressor::create_basis_file_and_transcode\n"); + + const basisu_backend_output& encoded_output = m_params.m_uastc ? m_uastc_backend_output : m_backend.get_output(); + + if (!m_basis_file.init(encoded_output, m_params.m_tex_type, m_params.m_userdata0, m_params.m_userdata1, m_params.m_y_flip, m_params.m_us_per_frame)) + { + error_printf("basis_compressor::create_basis_file_and_transcode: basisu_backend:init() failed!\n"); + return false; + } + + const uint8_vec &comp_data = m_basis_file.get_compressed_data(); + + m_output_basis_file = comp_data; + + interval_timer tm; + tm.start(); + + basist::basisu_transcoder_init(); + + debug_printf("basist::basisu_transcoder_init: Took %f ms\n", tm.get_elapsed_ms()); + + // Verify the compressed data by transcoding it to ASTC (or ETC1)/BC7 and validating the CRC's. + basist::basisu_transcoder decoder(m_params.m_pSel_codebook); + if (!decoder.validate_file_checksums(&comp_data[0], (uint32_t)comp_data.size(), true)) + { + error_printf("decoder.validate_file_checksums() failed!\n"); + return false; + } + + m_decoded_output_textures.resize(m_slice_descs.size()); + m_decoded_output_textures_unpacked.resize(m_slice_descs.size()); + + m_decoded_output_textures_bc7.resize(m_slice_descs.size()); + m_decoded_output_textures_unpacked_bc7.resize(m_slice_descs.size()); + + tm.start(); + if (m_params.m_pGlobal_codebooks) + { + decoder.set_global_codebooks(m_params.m_pGlobal_codebooks); + } + + if (!decoder.start_transcoding(&comp_data[0], (uint32_t)comp_data.size())) + { + error_printf("decoder.start_transcoding() failed!\n"); + return false; + } + + double start_transcoding_time = tm.get_elapsed_secs(); + + debug_printf("basisu_compressor::start_transcoding() took %3.3fms\n", start_transcoding_time * 1000.0f); + + uint32_t total_orig_pixels = 0; + uint32_t total_texels = 0; + + double total_time_etc1s_or_astc = 0; + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + gpu_image decoded_texture; + decoded_texture.init(m_params.m_uastc ? texture_format::cASTC4x4 : texture_format::cETC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + + tm.start(); + + basist::block_format format = m_params.m_uastc ? basist::block_format::cASTC_4x4 : basist::block_format::cETC1; + uint32_t bytes_per_block = m_params.m_uastc ? 16 : 8; + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, + reinterpret_cast<etc_block *>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, format, bytes_per_block)) + { + error_printf("Transcoding failed on slice %u!\n", i); + return false; + } + + total_time_etc1s_or_astc += tm.get_elapsed_secs(); + + if (encoded_output.m_tex_format == basist::basis_tex_format::cETC1S) + { + uint32_t image_crc16 = basist::crc16(decoded_texture.get_ptr(), decoded_texture.get_size_in_bytes(), 0); + if (image_crc16 != encoded_output.m_slice_image_crcs[i]) + { + error_printf("Decoded image data CRC check failed on slice %u!\n", i); + return false; + } + debug_printf("Decoded image data CRC check succeeded on slice %i\n", i); + } + + m_decoded_output_textures[i] = decoded_texture; + + total_orig_pixels += m_slice_descs[i].m_orig_width * m_slice_descs[i].m_orig_height; + total_texels += m_slice_descs[i].m_width * m_slice_descs[i].m_height; + } + + double total_time_bc7 = 0; + + if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC4x4) && + basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cETC1S)) + { + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + gpu_image decoded_texture; + decoded_texture.init(texture_format::cBC7, m_slice_descs[i].m_width, m_slice_descs[i].m_height); + + tm.start(); + + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, + reinterpret_cast<etc_block*>(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, basist::block_format::cBC7, 16)) + { + error_printf("Transcoding failed to BC7 on slice %u!\n", i); + return false; + } + + total_time_bc7 += tm.get_elapsed_secs(); + + m_decoded_output_textures_bc7[i] = decoded_texture; + } + } + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + m_decoded_output_textures[i].unpack(m_decoded_output_textures_unpacked[i]); + + if (m_decoded_output_textures_bc7[i].get_pixel_width()) + m_decoded_output_textures_bc7[i].unpack(m_decoded_output_textures_unpacked_bc7[i]); + } + + debug_printf("Transcoded to %s in %3.3fms, %f texels/sec\n", m_params.m_uastc ? "ASTC" : "ETC1", total_time_etc1s_or_astc * 1000.0f, total_orig_pixels / total_time_etc1s_or_astc); + + if (total_time_bc7 != 0) + debug_printf("Transcoded to BC7 in %3.3fms, %f texels/sec\n", total_time_bc7 * 1000.0f, total_orig_pixels / total_time_bc7); + + debug_printf("Total .basis output file size: %u, %3.3f bits/texel\n", comp_data.size(), comp_data.size() * 8.0f / total_orig_pixels); + + uint32_t total_orig_texels = 0; + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; + + const uint32_t total_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; + BASISU_NOTE_UNUSED(total_blocks); + + assert(m_decoded_output_textures[slice_index].get_total_blocks() == total_blocks); + } + + m_basis_file_size = (uint32_t)comp_data.size(); + m_basis_bits_per_texel = (comp_data.size() * 8.0f) / total_orig_texels; + + return true; + } + + bool basis_compressor::write_output_files_and_compute_stats() + { + debug_printf("basis_compressor::write_output_files_and_compute_stats\n"); + + const uint8_vec& comp_data = m_params.m_create_ktx2_file ? m_output_ktx2_file : m_basis_file.get_compressed_data(); + if (m_params.m_write_output_basis_files) + { + const std::string& output_filename = m_params.m_out_filename; + + if (!write_vec_to_file(output_filename.c_str(), comp_data)) + { + error_printf("Failed writing output data to file \"%s\"\n", output_filename.c_str()); + return false; + } + + printf("Wrote output .basis/.ktx2 file \"%s\"\n", output_filename.c_str()); + } + + size_t comp_size = 0; + if ((m_params.m_compute_stats) && (m_params.m_uastc) && (comp_data.size())) + { + void* pComp_data = tdefl_compress_mem_to_heap(&comp_data[0], comp_data.size(), &comp_size, TDEFL_MAX_PROBES_MASK);// TDEFL_DEFAULT_MAX_PROBES); + size_t decomp_size = 0; + void* pDecomp_data = tinfl_decompress_mem_to_heap(pComp_data, comp_size, &decomp_size, 0); + if ((decomp_size != comp_data.size()) || (memcmp(pDecomp_data, &comp_data[0], decomp_size) != 0)) + { + printf("basis_compressor::create_basis_file_and_transcode:: miniz compression or decompression failed!\n"); + return false; + } + + mz_free(pComp_data); + mz_free(pDecomp_data); + + uint32_t total_texels = 0; + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + total_texels += (m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y) * 16; + + m_basis_bits_per_texel = comp_size * 8.0f / total_texels; + + debug_printf(".basis file size: %u, LZ compressed file size: %u, %3.2f bits/texel\n", + (uint32_t)comp_data.size(), + (uint32_t)comp_size, + m_basis_bits_per_texel); + } + + m_stats.resize(m_slice_descs.size()); + + uint32_t total_orig_texels = 0; + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; + + total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; + + if (m_params.m_compute_stats) + { + printf("Slice: %u\n", slice_index); + + image_stats &s = m_stats[slice_index]; + + // TODO: We used to output SSIM (during heavy encoder development), but this slowed down compression too much. We'll be adding it back. + + image_metrics em; + + // ---- .basis stats + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3); + em.print(".basis RGB Avg: "); + s.m_basis_rgb_avg_psnr = em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 4); + em.print(".basis RGBA Avg: "); + s.m_basis_rgba_avg_psnr = em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1); + em.print(".basis R Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1); + em.print(".basis G Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1); + em.print(".basis B Avg: "); + + if (m_params.m_uastc) + { + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 3, 1); + em.print(".basis A Avg: "); + + s.m_basis_a_avg_psnr = em.m_psnr; + } + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0); + em.print(".basis 709 Luma: "); + s.m_basis_luma_709_psnr = static_cast<float>(em.m_psnr); + s.m_basis_luma_709_ssim = static_cast<float>(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true); + em.print(".basis 601 Luma: "); + s.m_basis_luma_601_psnr = static_cast<float>(em.m_psnr); + + if (m_slice_descs.size() == 1) + { + const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size(); + debug_printf(".basis RGB PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_rgb_avg_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + debug_printf(".basis Luma 709 PSNR per bit/texel*10000: %3.3f\n", 10000.0f * s.m_basis_luma_709_psnr / ((output_size * 8.0f) / (slice_desc.m_orig_width * slice_desc.m_orig_height))); + } + + if (m_decoded_output_textures_unpacked_bc7[slice_index].get_width()) + { + // ---- BC7 stats + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 3); + em.print("BC7 RGB Avg: "); + s.m_bc7_rgb_avg_psnr = em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 4); + em.print("BC7 RGBA Avg: "); + s.m_bc7_rgba_avg_psnr = em.m_psnr; + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 1); + em.print("BC7 R Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 1, 1); + em.print("BC7 G Avg: "); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 2, 1); + em.print("BC7 B Avg: "); + + if (m_params.m_uastc) + { + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 3, 1); + em.print("BC7 A Avg: "); + + s.m_bc7_a_avg_psnr = em.m_psnr; + } + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0); + em.print("BC7 709 Luma: "); + s.m_bc7_luma_709_psnr = static_cast<float>(em.m_psnr); + s.m_bc7_luma_709_ssim = static_cast<float>(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked_bc7[slice_index], 0, 0, true, true); + em.print("BC7 601 Luma: "); + s.m_bc7_luma_601_psnr = static_cast<float>(em.m_psnr); + } + + if (!m_params.m_uastc) + { + // ---- Nearly best possible ETC1S stats + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0); + em.print("Unquantized ETC1S 709 Luma: "); + + s.m_best_etc1s_luma_709_psnr = static_cast<float>(em.m_psnr); + s.m_best_etc1s_luma_709_ssim = static_cast<float>(em.m_ssim); + + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 0, true, true); + em.print("Unquantized ETC1S 601 Luma: "); + + s.m_best_etc1s_luma_601_psnr = static_cast<float>(em.m_psnr); + + em.calc(m_slice_images[slice_index], m_best_etc1s_images_unpacked[slice_index], 0, 3); + em.print("Unquantized ETC1S RGB Avg: "); + + s.m_best_etc1s_rgb_avg_psnr = static_cast<float>(em.m_psnr); + } + } + + std::string out_basename; + if (m_params.m_out_filename.size()) + string_get_filename(m_params.m_out_filename.c_str(), out_basename); + else if (m_params.m_source_filenames.size()) + string_get_filename(m_params.m_source_filenames[slice_desc.m_source_file_index].c_str(), out_basename); + + string_remove_extension(out_basename); + out_basename = "basis_debug_" + out_basename + string_format("_slice_%u", slice_index); + + if ((!m_params.m_uastc) && (m_frontend.get_params().m_debug_images)) + { + // Write "best" ETC1S debug images + if (!m_params.m_uastc) + { + gpu_image best_etc1s_gpu_image(m_best_etc1s_images[slice_index]); + best_etc1s_gpu_image.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_best_etc1s.ktx").c_str(), best_etc1s_gpu_image); + + image best_etc1s_unpacked; + best_etc1s_gpu_image.unpack(best_etc1s_unpacked); + save_png(out_basename + "_best_etc1s.png", best_etc1s_unpacked); + } + } + + if (m_params.m_debug_images) + { + // Write decoded ETC1S/ASTC debug images + { + gpu_image decoded_etc1s_or_astc(m_decoded_output_textures[slice_index]); + decoded_etc1s_or_astc.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_transcoded_etc1s_or_astc.ktx").c_str(), decoded_etc1s_or_astc); + + image temp(m_decoded_output_textures_unpacked[slice_index]); + temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + save_png(out_basename + "_transcoded_etc1s_or_astc.png", temp); + } + + // Write decoded BC7 debug images + if (m_decoded_output_textures_bc7[slice_index].get_pixel_width()) + { + gpu_image decoded_bc7(m_decoded_output_textures_bc7[slice_index]); + decoded_bc7.override_dimensions(slice_desc.m_orig_width, slice_desc.m_orig_height); + write_compressed_texture_file((out_basename + "_transcoded_bc7.ktx").c_str(), decoded_bc7); + + image temp(m_decoded_output_textures_unpacked_bc7[slice_index]); + temp.crop(slice_desc.m_orig_width, slice_desc.m_orig_height); + save_png(out_basename + "_transcoded_bc7.png", temp); + } + } + } + + return true; + } + + // Make sure all the mip 0's have the same dimensions and number of mipmap levels, or we can't encode the KTX2 file. + bool basis_compressor::validate_ktx2_constraints() + { + uint32_t base_width = 0, base_height = 0; + uint32_t total_layers = 0; + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + if (m_slice_descs[i].m_mip_index == 0) + { + if (!base_width) + { + base_width = m_slice_descs[i].m_orig_width; + base_height = m_slice_descs[i].m_orig_height; + } + else + { + if ((m_slice_descs[i].m_orig_width != base_width) || (m_slice_descs[i].m_orig_height != base_height)) + { + return false; + } + } + + total_layers = maximum<uint32_t>(total_layers, m_slice_descs[i].m_source_file_index + 1); + } + } + + basisu::vector<uint32_t> total_mips(total_layers); + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + total_mips[m_slice_descs[i].m_source_file_index] = maximum<uint32_t>(total_mips[m_slice_descs[i].m_source_file_index], m_slice_descs[i].m_mip_index + 1); + + for (uint32_t i = 1; i < total_layers; i++) + { + if (total_mips[0] != total_mips[i]) + { + return false; + } + } + + return true; + } + + static uint8_t g_ktx2_etc1s_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_uastc_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x4,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + static uint8_t g_ktx2_uastc_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; + + void basis_compressor::get_dfd(uint8_vec &dfd, const basist::ktx2_header &header) + { + const uint8_t* pDFD; + uint32_t dfd_len; + + if (m_params.m_uastc) + { + if (m_any_source_image_has_alpha) + { + pDFD = g_ktx2_uastc_alpha_dfd; + dfd_len = sizeof(g_ktx2_uastc_alpha_dfd); + } + else + { + pDFD = g_ktx2_uastc_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_uastc_nonalpha_dfd); + } + } + else + { + if (m_any_source_image_has_alpha) + { + pDFD = g_ktx2_etc1s_alpha_dfd; + dfd_len = sizeof(g_ktx2_etc1s_alpha_dfd); + } + else + { + pDFD = g_ktx2_etc1s_nonalpha_dfd; + dfd_len = sizeof(g_ktx2_etc1s_nonalpha_dfd); + } + } + + assert(dfd_len >= 44); + + dfd.resize(dfd_len); + memcpy(dfd.data(), pDFD, dfd_len); + + uint32_t dfd_bits = basisu::read_le_dword(dfd.data() + 3 * sizeof(uint32_t)); + + dfd_bits &= ~(0xFF << 16); + + if (m_params.m_ktx2_srgb_transfer_func) + dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_SRGB << 16); + else + dfd_bits |= (basist::KTX2_KHR_DF_TRANSFER_LINEAR << 16); + + basisu::write_le_dword(dfd.data() + 3 * sizeof(uint32_t), dfd_bits); + + if (header.m_supercompression_scheme != basist::KTX2_SS_NONE) + { + uint32_t plane_bits = basisu::read_le_dword(dfd.data() + 5 * sizeof(uint32_t)); + + plane_bits &= ~0xFF; + + basisu::write_le_dword(dfd.data() + 5 * sizeof(uint32_t), plane_bits); + } + + // Fix up the DFD channel(s) + uint32_t dfd_chan0 = basisu::read_le_dword(dfd.data() + 7 * sizeof(uint32_t)); + + if (m_params.m_uastc) + { + dfd_chan0 &= ~(0xF << 24); + + // TODO: Allow the caller to override this + if (m_any_source_image_has_alpha) + dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGBA << 24); + else + dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGB << 24); + } + + basisu::write_le_dword(dfd.data() + 7 * sizeof(uint32_t), dfd_chan0); + } + + bool basis_compressor::create_ktx2_file() + { + if (m_params.m_uastc) + { + if ((m_params.m_ktx2_uastc_supercompression != basist::KTX2_SS_NONE) && (m_params.m_ktx2_uastc_supercompression != basist::KTX2_SS_ZSTANDARD)) + return false; + } + + const basisu_backend_output& backend_output = m_backend.get_output(); + + // Determine the width/height, number of array layers, mipmap levels, and the number of faces (1 for 2D, 6 for cubemap). + // This does not support 1D or 3D. + uint32_t base_width = 0, base_height = 0, total_layers = 0, total_levels = 0, total_faces = 1; + + for (uint32_t i = 0; i < m_slice_descs.size(); i++) + { + if ((m_slice_descs[i].m_mip_index == 0) && (!base_width)) + { + base_width = m_slice_descs[i].m_orig_width; + base_height = m_slice_descs[i].m_orig_height; + } + + total_layers = maximum<uint32_t>(total_layers, m_slice_descs[i].m_source_file_index + 1); + + if (!m_slice_descs[i].m_source_file_index) + total_levels = maximum<uint32_t>(total_levels, m_slice_descs[i].m_mip_index + 1); + } + + if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) + { + assert((total_layers % 6) == 0); + + total_layers /= 6; + assert(total_layers >= 1); + + total_faces = 6; + } + + basist::ktx2_header header; + memset(&header, 0, sizeof(header)); + + memcpy(header.m_identifier, basist::g_ktx2_file_identifier, sizeof(basist::g_ktx2_file_identifier)); + header.m_pixel_width = base_width; + header.m_pixel_height = base_height; + header.m_face_count = total_faces; + header.m_vk_format = basist::KTX2_VK_FORMAT_UNDEFINED; + header.m_type_size = 1; + header.m_level_count = total_levels; + header.m_layer_count = (total_layers > 1) ? total_layers : 0; + + if (m_params.m_uastc) + { + switch (m_params.m_ktx2_uastc_supercompression) + { + case basist::KTX2_SS_NONE: + { + header.m_supercompression_scheme = basist::KTX2_SS_NONE; + break; + } + case basist::KTX2_SS_ZSTANDARD: + { +#if BASISD_SUPPORT_KTX2_ZSTD + header.m_supercompression_scheme = basist::KTX2_SS_ZSTANDARD; +#else + header.m_supercompression_scheme = basist::KTX2_SS_NONE; +#endif + break; + } + default: assert(0); return false; + } + } + + basisu::vector<uint8_vec> level_data_bytes(total_levels); + basisu::vector<uint8_vec> compressed_level_data_bytes(total_levels); + uint_vec slice_level_offsets(m_slice_descs.size()); + + // This will append the texture data in the correct order (for each level: layer, then face). + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + slice_level_offsets[slice_index] = level_data_bytes[slice_desc.m_mip_index].size(); + + if (m_params.m_uastc) + append_vector(level_data_bytes[slice_desc.m_mip_index], m_uastc_backend_output.m_slice_image_data[slice_index]); + else + append_vector(level_data_bytes[slice_desc.m_mip_index], backend_output.m_slice_image_data[slice_index]); + } + + // UASTC supercompression + if ((m_params.m_uastc) && (header.m_supercompression_scheme == basist::KTX2_SS_ZSTANDARD)) + { +#if BASISD_SUPPORT_KTX2_ZSTD + for (uint32_t level_index = 0; level_index < total_levels; level_index++) + { + compressed_level_data_bytes[level_index].resize(ZSTD_compressBound(level_data_bytes[level_index].size())); + + size_t result = ZSTD_compress(compressed_level_data_bytes[level_index].data(), compressed_level_data_bytes[level_index].size(), + level_data_bytes[level_index].data(), level_data_bytes[level_index].size(), + m_params.m_ktx2_zstd_supercompression_level); + + if (ZSTD_isError(result)) + return false; + + compressed_level_data_bytes[level_index].resize(result); + } +#else + // Can't get here + assert(0); + return false; +#endif + } + else + { + // No supercompression + compressed_level_data_bytes = level_data_bytes; + } + + uint8_vec etc1s_global_data; + + // Create ETC1S global supercompressed data + if (!m_params.m_uastc) + { + basist::ktx2_etc1s_global_data_header etc1s_global_data_header; + clear_obj(etc1s_global_data_header); + + etc1s_global_data_header.m_endpoint_count = backend_output.m_num_endpoints; + etc1s_global_data_header.m_selector_count = backend_output.m_num_selectors; + etc1s_global_data_header.m_endpoints_byte_length = backend_output.m_endpoint_palette.size(); + etc1s_global_data_header.m_selectors_byte_length = backend_output.m_selector_palette.size(); + etc1s_global_data_header.m_tables_byte_length = backend_output.m_slice_image_tables.size(); + + basisu::vector<basist::ktx2_etc1s_image_desc> etc1s_image_descs(total_levels * total_layers * total_faces); + memset(etc1s_image_descs.data(), 0, etc1s_image_descs.size_in_bytes()); + + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) + { + const basisu_backend_slice_desc& slice_desc = m_slice_descs[slice_index]; + + const uint32_t level_index = slice_desc.m_mip_index; + uint32_t layer_index = slice_desc.m_source_file_index; + uint32_t face_index = 0; + + if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) + { + face_index = layer_index % 6; + layer_index /= 6; + } + + const uint32_t etc1s_image_index = level_index * (total_layers * total_faces) + layer_index * total_faces + face_index; + + if (slice_desc.m_alpha) + { + etc1s_image_descs[etc1s_image_index].m_alpha_slice_byte_length = backend_output.m_slice_image_data[slice_index].size(); + etc1s_image_descs[etc1s_image_index].m_alpha_slice_byte_offset = slice_level_offsets[slice_index]; + } + else + { + if (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) + etc1s_image_descs[etc1s_image_index].m_image_flags = !slice_desc.m_iframe ? basist::KTX2_IMAGE_IS_P_FRAME : 0; + + etc1s_image_descs[etc1s_image_index].m_rgb_slice_byte_length = backend_output.m_slice_image_data[slice_index].size(); + etc1s_image_descs[etc1s_image_index].m_rgb_slice_byte_offset = slice_level_offsets[slice_index]; + } + } // slice_index + + append_vector(etc1s_global_data, (const uint8_t*)&etc1s_global_data_header, sizeof(etc1s_global_data_header)); + append_vector(etc1s_global_data, (const uint8_t*)etc1s_image_descs.data(), etc1s_image_descs.size_in_bytes()); + append_vector(etc1s_global_data, backend_output.m_endpoint_palette); + append_vector(etc1s_global_data, backend_output.m_selector_palette); + append_vector(etc1s_global_data, backend_output.m_slice_image_tables); + + header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ; + } + + // Key values + basist::ktx2_transcoder::key_value_vec key_values(m_params.m_ktx2_key_values); + key_values.enlarge(1); + + const char* pKTXwriter = "KTXwriter"; + key_values.back().m_key.resize(strlen(pKTXwriter) + 1); + memcpy(key_values.back().m_key.data(), pKTXwriter, strlen(pKTXwriter) + 1); + + char writer_id[128]; +#ifdef _MSC_VER + sprintf_s(writer_id, sizeof(writer_id), "Basis Universal %s", BASISU_LIB_VERSION_STRING); +#else + snprintf(writer_id, sizeof(writer_id), "Basis Universal %s", BASISU_LIB_VERSION_STRING); +#endif + key_values.back().m_value.resize(strlen(writer_id) + 1); + memcpy(key_values.back().m_value.data(), writer_id, strlen(writer_id) + 1); + + key_values.sort(); + +#if BASISU_DISABLE_KTX2_KEY_VALUES + // HACK HACK - Clear the key values array, which causes no key values to be written (triggering the ktx2check validator bug). + key_values.clear(); +#endif + + uint8_vec key_value_data; + + // DFD + uint8_vec dfd; + get_dfd(dfd, header); + + const uint32_t kvd_file_offset = sizeof(header) + sizeof(basist::ktx2_level_index) * total_levels + dfd.size(); + + for (uint32_t pass = 0; pass < 2; pass++) + { + for (uint32_t i = 0; i < key_values.size(); i++) + { + if (key_values[i].m_key.size() < 2) + return false; + + if (key_values[i].m_key.back() != 0) + return false; + + const uint64_t total_len = (uint64_t)key_values[i].m_key.size() + (uint64_t)key_values[i].m_value.size(); + if (total_len >= UINT32_MAX) + return false; + + packed_uint<4> le_len((uint32_t)total_len); + append_vector(key_value_data, (const uint8_t*)&le_len, sizeof(le_len)); + + append_vector(key_value_data, key_values[i].m_key); + append_vector(key_value_data, key_values[i].m_value); + + const uint32_t ofs = key_value_data.size() & 3; + const uint32_t padding = (4 - ofs) & 3; + for (uint32_t p = 0; p < padding; p++) + key_value_data.push_back(0); + } + + if (header.m_supercompression_scheme != basist::KTX2_SS_NONE) + break; + +#if BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND + break; +#endif + + // Hack to ensure the KVD block ends on a 16 byte boundary, because we have no other official way of aligning the data. + uint32_t kvd_end_file_offset = kvd_file_offset + key_value_data.size(); + uint32_t bytes_needed_to_pad = (16 - (kvd_end_file_offset & 15)) & 15; + if (!bytes_needed_to_pad) + { + // We're good. No need to add a dummy key. + break; + } + + assert(!pass); + if (pass) + return false; + + if (bytes_needed_to_pad < 6) + bytes_needed_to_pad += 16; + + printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad); + + // We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned. + // We can't just add some bytes before the mip level array because ktx2check will see that as extra data in the file that shouldn't be there in ktxValidator::validateDataSize(). + key_values.enlarge(1); + for (uint32_t i = 0; i < (bytes_needed_to_pad - 4 - 1 - 1); i++) + key_values.back().m_key.push_back(127); + + key_values.back().m_key.push_back(0); + + key_values.back().m_value.push_back(0); + + key_values.sort(); + + key_value_data.resize(0); + + // Try again + } + + basisu::vector<basist::ktx2_level_index> level_index_array(total_levels); + memset(level_index_array.data(), 0, level_index_array.size_in_bytes()); + + m_output_ktx2_file.clear(); + m_output_ktx2_file.reserve(m_output_basis_file.size()); + + // Dummy header + m_output_ktx2_file.resize(sizeof(header)); + + // Level index array + append_vector(m_output_ktx2_file, (const uint8_t*)level_index_array.data(), level_index_array.size_in_bytes()); + + // DFD + const uint8_t* pDFD = dfd.data(); + uint32_t dfd_len = dfd.size(); + + header.m_dfd_byte_offset = m_output_ktx2_file.size(); + header.m_dfd_byte_length = dfd_len; + append_vector(m_output_ktx2_file, pDFD, dfd_len); + + // Key value data + if (key_value_data.size()) + { + assert(kvd_file_offset == m_output_ktx2_file.size()); + + header.m_kvd_byte_offset = m_output_ktx2_file.size(); + header.m_kvd_byte_length = key_value_data.size(); + append_vector(m_output_ktx2_file, key_value_data); + } + + // Global Supercompressed Data + if (etc1s_global_data.size()) + { + uint32_t ofs = m_output_ktx2_file.size() & 7; + uint32_t padding = (8 - ofs) & 7; + for (uint32_t i = 0; i < padding; i++) + m_output_ktx2_file.push_back(0); + + header.m_sgd_byte_length = etc1s_global_data.size(); + header.m_sgd_byte_offset = m_output_ktx2_file.size(); + + append_vector(m_output_ktx2_file, etc1s_global_data); + } + + // mipPadding + if (header.m_supercompression_scheme == basist::KTX2_SS_NONE) + { + // We currently can't do this or the validator will incorrectly give an error. + uint32_t ofs = m_output_ktx2_file.size() & 15; + uint32_t padding = (16 - ofs) & 15; + + // Make sure we're always aligned here (due to a validator bug). + if (padding) + { + printf("Warning: KTX2 mip level data is not 16-byte aligned. This may trigger a ktx2check validation bug. Writing %u bytes of mipPadding.\n", padding); + } + + for (uint32_t i = 0; i < padding; i++) + m_output_ktx2_file.push_back(0); + } + + // Level data - write the smallest mipmap first. + for (int level = total_levels - 1; level >= 0; level--) + { + level_index_array[level].m_byte_length = compressed_level_data_bytes[level].size(); + if (m_params.m_uastc) + level_index_array[level].m_uncompressed_byte_length = level_data_bytes[level].size(); + + level_index_array[level].m_byte_offset = m_output_ktx2_file.size(); + append_vector(m_output_ktx2_file, compressed_level_data_bytes[level]); + } + + // Write final header + memcpy(m_output_ktx2_file.data(), &header, sizeof(header)); + + // Write final level index array + memcpy(m_output_ktx2_file.data() + sizeof(header), level_index_array.data(), level_index_array.size_in_bytes()); + + debug_printf("Total .ktx2 output file size: %u\n", m_output_ktx2_file.size()); + + return true; + } + +} // namespace basisu diff --git a/thirdparty/basis_universal/basisu_comp.h b/thirdparty/basis_universal/encoder/basisu_comp.h index 1c201ddbed..2c3af968f7 100644 --- a/thirdparty/basis_universal/basisu_comp.h +++ b/thirdparty/basis_universal/encoder/basisu_comp.h @@ -1,5 +1,5 @@ // basisu_comp.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,8 +16,23 @@ #include "basisu_frontend.h" #include "basisu_backend.h" #include "basisu_basis_file.h" -#include "transcoder/basisu_global_selector_palette.h" -#include "transcoder/basisu_transcoder.h" +#include "../transcoder/basisu_global_selector_palette.h" +#include "../transcoder/basisu_transcoder.h" +#include "basisu_uastc_enc.h" + +#define BASISU_LIB_VERSION 115 +#define BASISU_LIB_VERSION_STRING "1.15" + +#ifndef BASISD_SUPPORT_KTX2 + #error BASISD_SUPPORT_KTX2 is undefined +#endif +#ifndef BASISD_SUPPORT_KTX2_ZSTD + #error BASISD_SUPPORT_KTX2_ZSTD is undefined +#endif + +#if !BASISD_SUPPORT_KTX2 + #error BASISD_SUPPORT_KTX2 must be enabled when building the encoder. To reduce code size if KTX2 support is not needed, set BASISD_SUPPORT_KTX2_ZSTD to 0 +#endif namespace basisu { @@ -40,6 +55,10 @@ namespace basisu const uint32_t BASISU_MAX_SLICES = 0xFFFFFF; + const int BASISU_RDO_UASTC_DICT_SIZE_DEFAULT = 4096; // 32768; + const int BASISU_RDO_UASTC_DICT_SIZE_MIN = 64; + const int BASISU_RDO_UASTC_DICT_SIZE_MAX = 65536; + struct image_stats { image_stats() @@ -52,43 +71,52 @@ namespace basisu m_filename.clear(); m_width = 0; m_height = 0; - - m_basis_etc1s_rgb_avg_psnr = 0.0f; - m_basis_etc1s_luma_709_psnr = 0.0f; - m_basis_etc1s_luma_601_psnr = 0.0f; - m_basis_etc1s_luma_709_ssim = 0.0f; - - m_basis_bc1_rgb_avg_psnr = 0.0f; - m_basis_bc1_luma_709_psnr = 0.0f; - m_basis_bc1_luma_601_psnr = 0.0f; - m_basis_bc1_luma_709_ssim = 0.0f; - - m_best_rgb_avg_psnr = 0.0f; - m_best_luma_709_psnr = 0.0f; - m_best_luma_601_psnr = 0.0f; - m_best_luma_709_ssim = 0.0f; + + m_basis_rgb_avg_psnr = 0.0f; + m_basis_rgba_avg_psnr = 0.0f; + m_basis_a_avg_psnr = 0.0f; + m_basis_luma_709_psnr = 0.0f; + m_basis_luma_601_psnr = 0.0f; + m_basis_luma_709_ssim = 0.0f; + + m_bc7_rgb_avg_psnr = 0.0f; + m_bc7_rgba_avg_psnr = 0.0f; + m_bc7_a_avg_psnr = 0.0f; + m_bc7_luma_709_psnr = 0.0f; + m_bc7_luma_601_psnr = 0.0f; + m_bc7_luma_709_ssim = 0.0f; + + m_best_etc1s_rgb_avg_psnr = 0.0f; + m_best_etc1s_luma_709_psnr = 0.0f; + m_best_etc1s_luma_601_psnr = 0.0f; + m_best_etc1s_luma_709_ssim = 0.0f; } std::string m_filename; uint32_t m_width; uint32_t m_height; - // .basis compressed - float m_basis_etc1s_rgb_avg_psnr; - float m_basis_etc1s_luma_709_psnr; - float m_basis_etc1s_luma_601_psnr; - float m_basis_etc1s_luma_709_ssim; + // .basis compressed (ETC1S or UASTC statistics) + float m_basis_rgb_avg_psnr; + float m_basis_rgba_avg_psnr; + float m_basis_a_avg_psnr; + float m_basis_luma_709_psnr; + float m_basis_luma_601_psnr; + float m_basis_luma_709_ssim; + + // BC7 statistics + float m_bc7_rgb_avg_psnr; + float m_bc7_rgba_avg_psnr; + float m_bc7_a_avg_psnr; + float m_bc7_luma_709_psnr; + float m_bc7_luma_601_psnr; + float m_bc7_luma_709_ssim; - float m_basis_bc1_rgb_avg_psnr; - float m_basis_bc1_luma_709_psnr; - float m_basis_bc1_luma_601_psnr; - float m_basis_bc1_luma_709_ssim; - - // Normal (highest quality) compressed ETC1S - float m_best_rgb_avg_psnr; - float m_best_luma_709_psnr; - float m_best_luma_601_psnr; - float m_best_luma_709_ssim; + // Highest achievable quality ETC1S statistics + float m_best_etc1s_rgb_avg_psnr; + float m_best_etc1s_luma_709_psnr; + float m_best_etc1s_luma_601_psnr; + float m_best_etc1s_luma_709_ssim; }; template<bool def> @@ -175,18 +203,30 @@ namespace basisu struct basis_compressor_params { basis_compressor_params() : + m_pSel_codebook(NULL), + m_compression_level((int)BASISU_DEFAULT_COMPRESSION_LEVEL, 0, (int)BASISU_MAX_COMPRESSION_LEVEL), + m_selector_rdo_thresh(BASISU_DEFAULT_SELECTOR_RDO_THRESH, 0.0f, 1e+10f), + m_endpoint_rdo_thresh(BASISU_DEFAULT_ENDPOINT_RDO_THRESH, 0.0f, 1e+10f), m_hybrid_sel_cb_quality_thresh(BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH, 0.0f, 1e+10f), m_global_pal_bits(8, 0, ETC1_GLOBAL_SELECTOR_CODEBOOK_MAX_PAL_BITS), m_global_mod_bits(8, 0, basist::etc1_global_palette_entry_modifier::cTotalBits), - m_endpoint_rdo_thresh(BASISU_DEFAULT_ENDPOINT_RDO_THRESH, 0.0f, 1e+10f), - m_selector_rdo_thresh(BASISU_DEFAULT_SELECTOR_RDO_THRESH, 0.0f, 1e+10f), - m_pSel_codebook(NULL), + m_mip_scale(1.0f, .000125f, 4.0f), + m_mip_smallest_dimension(1, 1, 16384), m_max_endpoint_clusters(512), m_max_selector_clusters(512), m_quality_level(-1), - m_mip_scale(1.0f, .000125f, 4.0f), - m_mip_smallest_dimension(1, 1, 16384), - m_compression_level((int)BASISU_DEFAULT_COMPRESSION_LEVEL, 0, (int)BASISU_MAX_COMPRESSION_LEVEL), + m_pack_uastc_flags(cPackUASTCLevelDefault), + m_rdo_uastc_quality_scalar(1.0f, 0.001f, 50.0f), + m_rdo_uastc_dict_size(BASISU_RDO_UASTC_DICT_SIZE_DEFAULT, BASISU_RDO_UASTC_DICT_SIZE_MIN, BASISU_RDO_UASTC_DICT_SIZE_MAX), + m_rdo_uastc_max_smooth_block_error_scale(UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE, 1.0f, 300.0f), + m_rdo_uastc_smooth_block_max_std_dev(UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV, .01f, 65536.0f), + m_rdo_uastc_max_allowed_rms_increase_ratio(UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO, .01f, 100.0f), + m_rdo_uastc_skip_block_rms_thresh(UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH, .01f, 100.0f), + m_resample_width(0, 1, 16384), + m_resample_height(0, 1, 16384), + m_resample_factor(0.0f, .00125f, 100.0f), + m_ktx2_uastc_supercompression(basist::KTX2_SS_NONE), + m_ktx2_zstd_supercompression_level(6, INT_MIN, INT_MAX), m_pJob_pool(nullptr) { clear(); @@ -196,15 +236,20 @@ namespace basisu { m_pSel_codebook = NULL; + m_uastc.clear(); + m_status_output.clear(); + m_source_filenames.clear(); m_source_alpha_filenames.clear(); m_source_images.clear(); + m_source_mipmap_images.clear(); m_out_filename.clear(); m_y_flip.clear(); m_debug.clear(); + m_validate.clear(); m_debug_images.clear(); m_global_sel_pal.clear(); m_auto_global_sel_pal.clear(); @@ -219,7 +264,11 @@ namespace basisu m_check_for_alpha.clear(); m_force_alpha.clear(); m_multithreading.clear(); - m_seperate_rg_to_color_alpha.clear(); + m_swizzle[0] = 0; + m_swizzle[1] = 1; + m_swizzle[2] = 2; + m_swizzle[3] = 3; + m_renormalize.clear(); m_hybrid_sel_cb_quality_thresh.clear(); m_global_pal_bits.clear(); m_global_mod_bits.clear(); @@ -236,6 +285,7 @@ namespace basisu m_mip_premultiplied.clear(); m_mip_renormalize.clear(); m_mip_wrapping.clear(); + m_mip_fast.clear(); m_mip_smallest_dimension.clear(); m_max_endpoint_clusters = 0; @@ -247,30 +297,63 @@ namespace basisu m_userdata1 = 0; m_us_per_frame = 0; + m_pack_uastc_flags = cPackUASTCLevelDefault; + m_rdo_uastc.clear(); + m_rdo_uastc_quality_scalar.clear(); + m_rdo_uastc_max_smooth_block_error_scale.clear(); + m_rdo_uastc_smooth_block_max_std_dev.clear(); + m_rdo_uastc_max_allowed_rms_increase_ratio.clear(); + m_rdo_uastc_skip_block_rms_thresh.clear(); + m_rdo_uastc_favor_simpler_modes_in_rdo_mode.clear(); + m_rdo_uastc_multithreading.clear(); + + m_resample_width.clear(); + m_resample_height.clear(); + m_resample_factor.clear(); + + m_pGlobal_codebooks = nullptr; + + m_create_ktx2_file.clear(); + m_ktx2_uastc_supercompression = basist::KTX2_SS_NONE; + m_ktx2_key_values.clear(); + m_ktx2_zstd_supercompression_level.clear(); + m_ktx2_srgb_transfer_func.clear(); + m_pJob_pool = nullptr; } - + // Pointer to the global selector codebook, or nullptr to not use a global selector codebook const basist::etc1_global_selector_codebook *m_pSel_codebook; + // True to generate UASTC .basis file data, otherwise ETC1S. + bool_param<false> m_uastc; + // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read. // Otherwise, the compressor processes the images in m_source_images. - std::vector<std::string> m_source_filenames; - std::vector<std::string> m_source_alpha_filenames; + basisu::vector<std::string> m_source_filenames; + basisu::vector<std::string> m_source_alpha_filenames; - std::vector<image> m_source_images; - // TODO: Allow caller to supply their own mipmaps + basisu::vector<image> m_source_images; + + // Stores mipmaps starting from level 1. Level 0 is still stored in m_source_images, as usual. + // If m_source_mipmaps isn't empty, automatic mipmap generation isn't done. m_source_mipmaps.size() MUST equal m_source_images.size() or the compressor returns an error. + // The compressor applies the user-provided swizzling (in m_swizzle) to these images. + basisu::vector< basisu::vector<image> > m_source_mipmap_images; // Filename of the output basis file - std::string m_out_filename; + std::string m_out_filename; // The params are done this way so we can detect when the user has explictly changed them. // Flip images across Y axis bool_param<false> m_y_flip; + + // If true, the compressor will print basis status to stdout during compression. + bool_param<true> m_status_output; // Output debug information during compression bool_param<false> m_debug; + bool_param<false> m_validate; // m_debug_images is pretty slow bool_param<false> m_debug_images; @@ -284,7 +367,7 @@ namespace basisu // Frontend/backend codec parameters bool_param<false> m_no_hybrid_sel_cb; - // Use perceptual sRGB colorspace metrics (for normal maps, etc.) + // Use perceptual sRGB colorspace metrics instead of linear bool_param<true> m_perceptual; // Disable selector RDO, for faster compression but larger files @@ -299,7 +382,7 @@ namespace basisu // Write the output basis file to disk using m_out_filename bool_param<false> m_write_output_basis_files; - + // Compute and display image metrics bool_param<false> m_compute_stats; @@ -311,7 +394,9 @@ namespace basisu bool_param<true> m_multithreading; // Split the R channel to RGB and the G channel to alpha, then write a basis file with alpha channels - bool_param<false> m_seperate_rg_to_color_alpha; + char m_swizzle[4]; + + bool_param<false> m_renormalize; bool_param<false> m_disable_hierarchical_endpoint_codebooks; @@ -328,10 +413,11 @@ namespace basisu bool_param<true> m_mip_premultiplied; // not currently supported bool_param<false> m_mip_renormalize; bool_param<true> m_mip_wrapping; + bool_param<true> m_mip_fast; param<int> m_mip_smallest_dimension; // Codebook size (quality) control. - // If m_quality_level != -1, it controls the quality level. It ranges from [0,255]. + // If m_quality_level != -1, it controls the quality level. It ranges from [0,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX]. // Otherwise m_max_endpoint_clusters/m_max_selector_clusters controls the codebook sizes directly. uint32_t m_max_endpoint_clusters; uint32_t m_max_selector_clusters; @@ -343,6 +429,31 @@ namespace basisu uint32_t m_userdata1; uint32_t m_us_per_frame; + // cPackUASTCLevelDefault, etc. + uint32_t m_pack_uastc_flags; + bool_param<false> m_rdo_uastc; + param<float> m_rdo_uastc_quality_scalar; + param<int> m_rdo_uastc_dict_size; + param<float> m_rdo_uastc_max_smooth_block_error_scale; + param<float> m_rdo_uastc_smooth_block_max_std_dev; + param<float> m_rdo_uastc_max_allowed_rms_increase_ratio; + param<float> m_rdo_uastc_skip_block_rms_thresh; + bool_param<true> m_rdo_uastc_favor_simpler_modes_in_rdo_mode; + bool_param<true> m_rdo_uastc_multithreading; + + param<int> m_resample_width; + param<int> m_resample_height; + param<float> m_resample_factor; + const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; + + // KTX2 specific parameters. + // Internally, the compressor always creates a .basis file then it converts that lossless to KTX2. + bool_param<false> m_create_ktx2_file; + basist::ktx2_supercompression m_ktx2_uastc_supercompression; + basist::ktx2_transcoder::key_value_vec m_ktx2_key_values; + param<int> m_ktx2_zstd_supercompression_level; + bool_param<false> m_ktx2_srgb_transfer_func; + job_pool *m_pJob_pool; }; @@ -360,35 +471,41 @@ namespace basisu cECSuccess = 0, cECFailedReadingSourceImages, cECFailedValidating, + cECFailedEncodeUASTC, cECFailedFrontEnd, cECFailedFontendExtract, cECFailedBackend, cECFailedCreateBasisFile, - cECFailedWritingOutput + cECFailedWritingOutput, + cECFailedUASTCRDOPostProcess, + cECFailedCreateKTX2File }; error_code process(); + // The output .basis file will always be valid of process() succeeded. const uint8_vec &get_output_basis_file() const { return m_output_basis_file; } - const etc_block_vec &get_output_blocks() const { return m_output_blocks; } + + // The output .ktx2 file will only be valid if m_create_ktx2_file was true and process() succeeded. + const uint8_vec& get_output_ktx2_file() const { return m_output_ktx2_file; } - const std::vector<image_stats> &get_stats() const { return m_stats; } + const basisu::vector<image_stats> &get_stats() const { return m_stats; } uint32_t get_basis_file_size() const { return m_basis_file_size; } double get_basis_bits_per_texel() const { return m_basis_bits_per_texel; } - + bool get_any_source_image_has_alpha() const { return m_any_source_image_has_alpha; } - + private: basis_compressor_params m_params; - std::vector<image> m_slice_images; + basisu::vector<image> m_slice_images; - std::vector<image_stats> m_stats; + basisu::vector<image_stats> m_stats; uint32_t m_basis_file_size; double m_basis_bits_per_texel; - + basisu_backend_slice_desc_vec m_slice_descs; uint32_t m_total_blocks; @@ -397,33 +514,41 @@ namespace basisu basisu_frontend m_frontend; pixel_block_vec m_source_blocks; - std::vector<gpu_image> m_frontend_output_textures; + basisu::vector<gpu_image> m_frontend_output_textures; - std::vector<gpu_image> m_best_etc1s_images; - std::vector<image> m_best_etc1s_images_unpacked; + basisu::vector<gpu_image> m_best_etc1s_images; + basisu::vector<image> m_best_etc1s_images_unpacked; basisu_backend m_backend; basisu_file m_basis_file; - std::vector<gpu_image> m_decoded_output_textures; - std::vector<image> m_decoded_output_textures_unpacked; - std::vector<gpu_image> m_decoded_output_textures_bc1; - std::vector<image> m_decoded_output_textures_unpacked_bc1; + basisu::vector<gpu_image> m_decoded_output_textures; + basisu::vector<image> m_decoded_output_textures_unpacked; + basisu::vector<gpu_image> m_decoded_output_textures_bc7; + basisu::vector<image> m_decoded_output_textures_unpacked_bc7; uint8_vec m_output_basis_file; - etc_block_vec m_output_blocks; + uint8_vec m_output_ktx2_file; + + basisu::vector<gpu_image> m_uastc_slice_textures; + basisu_backend_output m_uastc_backend_output; bool m_any_source_image_has_alpha; bool read_source_images(); + bool extract_source_blocks(); bool process_frontend(); bool extract_frontend_texture_data(); bool process_backend(); bool create_basis_file_and_transcode(); bool write_output_files_and_compute_stats(); - bool generate_mipmaps(const image &img, std::vector<image> &mips, bool has_alpha); + error_code encode_slices_to_uastc(); + bool generate_mipmaps(const image &img, basisu::vector<image> &mips, bool has_alpha); bool validate_texture_type_constraints(); + bool validate_ktx2_constraints(); + void get_dfd(uint8_vec& dfd, const basist::ktx2_header& hdr); + bool create_ktx2_file(); }; } // namespace basisu diff --git a/thirdparty/basis_universal/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp index 7057c65cf8..f02fb62c11 100644 --- a/thirdparty/basis_universal/basisu_enc.cpp +++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp @@ -1,5 +1,5 @@ // basisu_enc.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,11 @@ #include "basisu_resampler.h" #include "basisu_resampler_filters.h" #include "basisu_etc.h" -#include "transcoder/basisu_transcoder.h" +#include "../transcoder/basisu_transcoder.h" +#include "basisu_bc7enc.h" +#include "apg_bmp.h" +#include "jpgd.h" +#include <vector> #if defined(_WIN32) // For QueryPerformanceCounter/QueryPerformanceFrequency @@ -29,6 +33,9 @@ namespace basisu { uint64_t interval_timer::g_init_ticks, interval_timer::g_freq; double interval_timer::g_timer_freq; +#if BASISU_SUPPORT_SSE + bool g_cpu_supports_sse41; +#endif uint8_t g_hamming_dist[256] = { @@ -50,10 +57,117 @@ namespace basisu 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; + // This is a Public Domain 8x8 font from here: + // https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h + const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = + { + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 ( ) + { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!) + { 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0022 (") + { 0x36, 0x36, 0x7F, 0x36, 0x7F, 0x36, 0x36, 0x00}, // U+0023 (#) + { 0x0C, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x0C, 0x00}, // U+0024 ($) + { 0x00, 0x63, 0x33, 0x18, 0x0C, 0x66, 0x63, 0x00}, // U+0025 (%) + { 0x1C, 0x36, 0x1C, 0x6E, 0x3B, 0x33, 0x6E, 0x00}, // U+0026 (&) + { 0x06, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0027 (') + { 0x18, 0x0C, 0x06, 0x06, 0x06, 0x0C, 0x18, 0x00}, // U+0028 (() + { 0x06, 0x0C, 0x18, 0x18, 0x18, 0x0C, 0x06, 0x00}, // U+0029 ()) + { 0x00, 0x66, 0x3C, 0xFF, 0x3C, 0x66, 0x00, 0x00}, // U+002A (*) + { 0x00, 0x0C, 0x0C, 0x3F, 0x0C, 0x0C, 0x00, 0x00}, // U+002B (+) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+002C (,) + { 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00}, // U+002D (-) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+002E (.) + { 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01, 0x00}, // U+002F (/) + { 0x3E, 0x63, 0x73, 0x7B, 0x6F, 0x67, 0x3E, 0x00}, // U+0030 (0) + { 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x3F, 0x00}, // U+0031 (1) + { 0x1E, 0x33, 0x30, 0x1C, 0x06, 0x33, 0x3F, 0x00}, // U+0032 (2) + { 0x1E, 0x33, 0x30, 0x1C, 0x30, 0x33, 0x1E, 0x00}, // U+0033 (3) + { 0x38, 0x3C, 0x36, 0x33, 0x7F, 0x30, 0x78, 0x00}, // U+0034 (4) + { 0x3F, 0x03, 0x1F, 0x30, 0x30, 0x33, 0x1E, 0x00}, // U+0035 (5) + { 0x1C, 0x06, 0x03, 0x1F, 0x33, 0x33, 0x1E, 0x00}, // U+0036 (6) + { 0x3F, 0x33, 0x30, 0x18, 0x0C, 0x0C, 0x0C, 0x00}, // U+0037 (7) + { 0x1E, 0x33, 0x33, 0x1E, 0x33, 0x33, 0x1E, 0x00}, // U+0038 (8) + { 0x1E, 0x33, 0x33, 0x3E, 0x30, 0x18, 0x0E, 0x00}, // U+0039 (9) + { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x00}, // U+003A (:) + { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x06}, // U+003B (;) + { 0x18, 0x0C, 0x06, 0x03, 0x06, 0x0C, 0x18, 0x00}, // U+003C (<) + { 0x00, 0x00, 0x3F, 0x00, 0x00, 0x3F, 0x00, 0x00}, // U+003D (=) + { 0x06, 0x0C, 0x18, 0x30, 0x18, 0x0C, 0x06, 0x00}, // U+003E (>) + { 0x1E, 0x33, 0x30, 0x18, 0x0C, 0x00, 0x0C, 0x00}, // U+003F (?) + { 0x3E, 0x63, 0x7B, 0x7B, 0x7B, 0x03, 0x1E, 0x00}, // U+0040 (@) + { 0x0C, 0x1E, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x00}, // U+0041 (A) + { 0x3F, 0x66, 0x66, 0x3E, 0x66, 0x66, 0x3F, 0x00}, // U+0042 (B) + { 0x3C, 0x66, 0x03, 0x03, 0x03, 0x66, 0x3C, 0x00}, // U+0043 (C) + { 0x1F, 0x36, 0x66, 0x66, 0x66, 0x36, 0x1F, 0x00}, // U+0044 (D) + { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x46, 0x7F, 0x00}, // U+0045 (E) + { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x06, 0x0F, 0x00}, // U+0046 (F) + { 0x3C, 0x66, 0x03, 0x03, 0x73, 0x66, 0x7C, 0x00}, // U+0047 (G) + { 0x33, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x33, 0x00}, // U+0048 (H) + { 0x1E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0049 (I) + { 0x78, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E, 0x00}, // U+004A (J) + { 0x67, 0x66, 0x36, 0x1E, 0x36, 0x66, 0x67, 0x00}, // U+004B (K) + { 0x0F, 0x06, 0x06, 0x06, 0x46, 0x66, 0x7F, 0x00}, // U+004C (L) + { 0x63, 0x77, 0x7F, 0x7F, 0x6B, 0x63, 0x63, 0x00}, // U+004D (M) + { 0x63, 0x67, 0x6F, 0x7B, 0x73, 0x63, 0x63, 0x00}, // U+004E (N) + { 0x1C, 0x36, 0x63, 0x63, 0x63, 0x36, 0x1C, 0x00}, // U+004F (O) + { 0x3F, 0x66, 0x66, 0x3E, 0x06, 0x06, 0x0F, 0x00}, // U+0050 (P) + { 0x1E, 0x33, 0x33, 0x33, 0x3B, 0x1E, 0x38, 0x00}, // U+0051 (Q) + { 0x3F, 0x66, 0x66, 0x3E, 0x36, 0x66, 0x67, 0x00}, // U+0052 (R) + { 0x1E, 0x33, 0x07, 0x0E, 0x38, 0x33, 0x1E, 0x00}, // U+0053 (S) + { 0x3F, 0x2D, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0054 (T) + { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x3F, 0x00}, // U+0055 (U) + { 0x33, 0x33, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0056 (V) + { 0x63, 0x63, 0x63, 0x6B, 0x7F, 0x77, 0x63, 0x00}, // U+0057 (W) + { 0x63, 0x63, 0x36, 0x1C, 0x1C, 0x36, 0x63, 0x00}, // U+0058 (X) + { 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x0C, 0x1E, 0x00}, // U+0059 (Y) + { 0x7F, 0x63, 0x31, 0x18, 0x4C, 0x66, 0x7F, 0x00}, // U+005A (Z) + { 0x1E, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1E, 0x00}, // U+005B ([) + { 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0x40, 0x00}, // U+005C (\) + { 0x1E, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1E, 0x00}, // U+005D (]) + { 0x08, 0x1C, 0x36, 0x63, 0x00, 0x00, 0x00, 0x00}, // U+005E (^) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF}, // U+005F (_) + { 0x0C, 0x0C, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0060 (`) + { 0x00, 0x00, 0x1E, 0x30, 0x3E, 0x33, 0x6E, 0x00}, // U+0061 (a) + { 0x07, 0x06, 0x06, 0x3E, 0x66, 0x66, 0x3B, 0x00}, // U+0062 (b) + { 0x00, 0x00, 0x1E, 0x33, 0x03, 0x33, 0x1E, 0x00}, // U+0063 (c) + { 0x38, 0x30, 0x30, 0x3e, 0x33, 0x33, 0x6E, 0x00}, // U+0064 (d) + { 0x00, 0x00, 0x1E, 0x33, 0x3f, 0x03, 0x1E, 0x00}, // U+0065 (e) + { 0x1C, 0x36, 0x06, 0x0f, 0x06, 0x06, 0x0F, 0x00}, // U+0066 (f) + { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0067 (g) + { 0x07, 0x06, 0x36, 0x6E, 0x66, 0x66, 0x67, 0x00}, // U+0068 (h) + { 0x0C, 0x00, 0x0E, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+0069 (i) + { 0x30, 0x00, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E}, // U+006A (j) + { 0x07, 0x06, 0x66, 0x36, 0x1E, 0x36, 0x67, 0x00}, // U+006B (k) + { 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00}, // U+006C (l) + { 0x00, 0x00, 0x33, 0x7F, 0x7F, 0x6B, 0x63, 0x00}, // U+006D (m) + { 0x00, 0x00, 0x1F, 0x33, 0x33, 0x33, 0x33, 0x00}, // U+006E (n) + { 0x00, 0x00, 0x1E, 0x33, 0x33, 0x33, 0x1E, 0x00}, // U+006F (o) + { 0x00, 0x00, 0x3B, 0x66, 0x66, 0x3E, 0x06, 0x0F}, // U+0070 (p) + { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x78}, // U+0071 (q) + { 0x00, 0x00, 0x3B, 0x6E, 0x66, 0x06, 0x0F, 0x00}, // U+0072 (r) + { 0x00, 0x00, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x00}, // U+0073 (s) + { 0x08, 0x0C, 0x3E, 0x0C, 0x0C, 0x2C, 0x18, 0x00}, // U+0074 (t) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x33, 0x6E, 0x00}, // U+0075 (u) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00}, // U+0076 (v) + { 0x00, 0x00, 0x63, 0x6B, 0x7F, 0x7F, 0x36, 0x00}, // U+0077 (w) + { 0x00, 0x00, 0x63, 0x36, 0x1C, 0x36, 0x63, 0x00}, // U+0078 (x) + { 0x00, 0x00, 0x33, 0x33, 0x33, 0x3E, 0x30, 0x1F}, // U+0079 (y) + { 0x00, 0x00, 0x3F, 0x19, 0x0C, 0x26, 0x3F, 0x00}, // U+007A (z) + { 0x38, 0x0C, 0x0C, 0x07, 0x0C, 0x0C, 0x38, 0x00}, // U+007B ({) + { 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00}, // U+007C (|) + { 0x07, 0x0C, 0x0C, 0x38, 0x0C, 0x0C, 0x07, 0x00}, // U+007D (}) + { 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+007E (~) + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F + }; + // Encoder library initialization (just call once at startup) void basisu_encoder_init() { + detect_sse41(); + basist::basisu_transcoder_init(); + pack_etc1_solid_color_init(); + //uastc_init(); + bc7enc_compress_block_init(); // must be after uastc_init() } void error_printf(const char *pFmt, ...) @@ -108,7 +222,7 @@ namespace basisu #else #error TODO #endif - + interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) { if (!g_timer_freq) @@ -169,38 +283,142 @@ namespace basisu return ticks * g_timer_freq; } - bool load_png(const char* pFilename, image& img) + const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000; + + bool load_bmp(const char* pFilename, image& img) { - std::vector<uint8_t> buffer; - unsigned err = lodepng::load_file(buffer, std::string(pFilename)); - if (err) + int w = 0, h = 0; + unsigned int n_chans = 0; + unsigned char* pImage_data = apg_bmp_read(pFilename, &w, &h, &n_chans); + + if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) + { + error_printf("Failed loading .BMP image \"%s\"!\n", pFilename); + + if (pImage_data) + apg_bmp_free(pImage_data); + return false; + } - unsigned w = 0, h = 0; + if (sizeof(void *) == sizeof(uint32_t)) + { + if ((w * h * n_chans) > MAX_32BIT_ALLOC_SIZE) + { + error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h); + + if (pImage_data) + apg_bmp_free(pImage_data); + + return false; + } + } + + img.resize(w, h); + + const uint8_t *pSrc = pImage_data; + for (int y = 0; y < h; y++) + { + color_rgba *pDst = &img(0, y); + + for (int x = 0; x < w; x++) + { + pDst->r = pSrc[0]; + pDst->g = pSrc[1]; + pDst->b = pSrc[2]; + pDst->a = (n_chans == 3) ? 255 : pSrc[3]; + + pSrc += n_chans; + ++pDst; + } + } + + apg_bmp_free(pImage_data); + + return true; + } + + bool load_tga(const char* pFilename, image& img) + { + int w = 0, h = 0, n_chans = 0; + uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans); + if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) + { + error_printf("Failed loading .TGA image \"%s\"!\n", pFilename); + + if (pImage_data) + free(pImage_data); + + return false; + } + if (sizeof(void *) == sizeof(uint32_t)) { + if ((w * h * n_chans) > MAX_32BIT_ALLOC_SIZE) + { + error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h); + + if (pImage_data) + free(pImage_data); + + return false; + } + } + + img.resize(w, h); + + const uint8_t *pSrc = pImage_data; + for (int y = 0; y < h; y++) + { + color_rgba *pDst = &img(0, y); + + for (int x = 0; x < w; x++) + { + pDst->r = pSrc[0]; + pDst->g = pSrc[1]; + pDst->b = pSrc[2]; + pDst->a = (n_chans == 3) ? 255 : pSrc[3]; + + pSrc += n_chans; + ++pDst; + } + } + + free(pImage_data); + + return true; + } + + bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename) + { + if (!buf_size) + return false; + + unsigned err = 0, w = 0, h = 0; + + if (sizeof(void*) == sizeof(uint32_t)) + { // Inspect the image first on 32-bit builds, to see if the image would require too much memory. lodepng::State state; - err = lodepng_inspect(&w, &h, &state, &buffer[0], buffer.size()); + err = lodepng_inspect(&w, &h, &state, pBuf, buf_size); if ((err != 0) || (!w) || (!h)) return false; const uint32_t exepected_alloc_size = w * h * sizeof(uint32_t); - + // If the file is too large on 32-bit builds then just bail now, to prevent causing a memory exception. - const uint32_t MAX_ALLOC_SIZE = 250000000; - if (exepected_alloc_size >= MAX_ALLOC_SIZE) + if (exepected_alloc_size >= MAX_32BIT_ALLOC_SIZE) { - error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h); + error_printf("Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", (pFilename != nullptr) ? pFilename : "<memory>", w, h); return false; } - + w = h = 0; } - + std::vector<uint8_t> out; - err = lodepng::decode(out, w, h, &buffer[0], buffer.size()); + err = lodepng::decode(out, w, h, pBuf, buf_size); if ((err != 0) || (!w) || (!h)) return false; @@ -213,12 +431,62 @@ namespace basisu return true; } + + bool load_png(const char* pFilename, image& img) + { + std::vector<uint8_t> buffer; + unsigned err = lodepng::load_file(buffer, std::string(pFilename)); + if (err) + return false; + + + return load_png(buffer.data(), buffer.size(), img, pFilename); + } + + bool load_jpg(const char *pFilename, image& img) + { + int width = 0, height = 0, actual_comps = 0; + uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering); + if (!pImage_data) + return false; + + img.init(pImage_data, width, height, 4); + + free(pImage_data); + + return true; + } + + bool load_image(const char* pFilename, image& img) + { + std::string ext(string_get_extension(std::string(pFilename))); + + if (ext.length() == 0) + return false; + + const char *pExt = ext.c_str(); + + if (strcasecmp(pExt, "png") == 0) + return load_png(pFilename, img); + if (strcasecmp(pExt, "bmp") == 0) + return load_bmp(pFilename, img); + if (strcasecmp(pExt, "tga") == 0) + return load_tga(pFilename, img); + if ( (strcasecmp(pExt, "jpg") == 0) || (strcasecmp(pExt, "jfif") == 0) || (strcasecmp(pExt, "jpeg") == 0) ) + return load_jpg(pFilename, img); + + return false; + } - bool save_png(const char* pFilename, const image & img, uint32_t image_save_flags, uint32_t grayscale_comp) + bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp) { if (!img.get_total_pixels()) return false; + const uint32_t MAX_PNG_IMAGE_DIM = 32768; + if ((img.get_width() > MAX_PNG_IMAGE_DIM) || (img.get_height() > MAX_PNG_IMAGE_DIM)) + return false; + std::vector<uint8_t> out; unsigned err = 0; @@ -231,16 +499,19 @@ namespace basisu for (uint32_t x = 0; x < img.get_width(); x++) *pDst++ = img(x, y)[grayscale_comp]; - err = lodepng::encode(out, (const uint8_t*)& g_pixels[0], img.get_width(), img.get_height(), LCT_GREY, 8); + err = lodepng::encode(out, (const uint8_t*)&g_pixels[0], img.get_width(), img.get_height(), LCT_GREY, 8); } else { bool has_alpha = img.has_alpha(); if ((!has_alpha) || ((image_save_flags & cImageSaveIgnoreAlpha) != 0)) { - uint8_vec rgb_pixels(img.get_width() * 3 * img.get_height()); + const uint64_t total_bytes = (uint64_t)img.get_width() * 3U * (uint64_t)img.get_height(); + if (total_bytes > INT_MAX) + return false; + uint8_vec rgb_pixels(static_cast<size_t>(total_bytes)); uint8_t *pDst = &rgb_pixels[0]; - + for (uint32_t y = 0; y < img.get_height(); y++) { for (uint32_t x = 0; x < img.get_width(); x++) @@ -302,7 +573,11 @@ namespace basisu } } - data.resize((size_t)filesize); + if (!data.try_resize((size_t)filesize)) + { + fclose(pFile); + return false; + } if (filesize) { @@ -525,7 +800,7 @@ namespace basisu if ((s >= num_syms) || (A[r].m_key < A[s].m_key)) { A[next].m_key = A[r].m_key; - A[r].m_key = static_cast<uint16_t>(next); + A[r].m_key = next; ++r; } else @@ -536,13 +811,13 @@ namespace basisu if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key)) { - A[next].m_key = static_cast<uint16_t>(A[next].m_key + A[r].m_key); - A[r].m_key = static_cast<uint16_t>(next); + A[next].m_key = A[next].m_key + A[r].m_key; + A[r].m_key = next; ++r; } else { - A[next].m_key = static_cast<uint16_t>(A[next].m_key + A[s].m_key); + A[next].m_key = A[next].m_key + A[s].m_key; ++s; } } @@ -562,7 +837,7 @@ namespace basisu ; for ( ; num_avail > num_used; --next, --num_avail) - A[next].m_key = static_cast<uint16_t>(depth); + A[next].m_key = depth; num_avail = 2 * num_used; num_used = 0; @@ -610,6 +885,10 @@ namespace basisu for (i = 0; i < num_syms; i++) { uint32_t freq = pSyms0[i].m_key; + + // We scale all input frequencies to 16-bits. + assert(freq <= UINT16_MAX); + hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } @@ -731,8 +1010,13 @@ namespace basisu else { for (uint32_t i = 0; i < num_syms; i++) + { if (pSym_freq[i]) - sym_freq[i] = static_cast<uint16_t>(maximum<uint32_t>((pSym_freq[i] * 65534U + (max_freq >> 1)) / max_freq, 1)); + { + uint32_t f = static_cast<uint32_t>((static_cast<uint64_t>(pSym_freq[i]) * 65534U + (max_freq >> 1)) / max_freq); + sym_freq[i] = static_cast<uint16_t>(clamp<uint32_t>(f, 1, 65534)); + } + } } return init(num_syms, &sym_freq[0], max_code_size); @@ -1125,10 +1409,10 @@ namespace basisu void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma) { assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); - - const uint32_t width = std::min(a.get_width(), b.get_width()); - const uint32_t height = std::min(a.get_height(), b.get_height()); - + + const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); + const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); + double hist[256]; clear_obj(hist); @@ -1159,7 +1443,7 @@ namespace basisu { if (hist[i]) { - m_max = std::max<float>(m_max, (float)i); + m_max = basisu::maximum<float>(m_max, (float)i); double v = i * hist[i]; sum += v; sum2 += i * v; @@ -1171,9 +1455,9 @@ namespace basisu total_values *= (double)clamp<uint32_t>(total_chans, 1, 4); m_mean = (float)clamp<double>(sum / total_values, 0.0f, 255.0); - m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, 255.0 * 255.0); + m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, 255.0f * 255.0f); m_rms = (float)sqrt(m_mean_squared); - m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0, 0.0f, 300.0f) : 1e+10f; + m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0f, 0.0f, 100.0f) : 100.0f; } void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed) @@ -1253,8 +1537,8 @@ namespace basisu } job_pool::job_pool(uint32_t num_threads) : - m_kill_flag(false), - m_num_active_jobs(0) + m_num_active_jobs(0), + m_kill_flag(false) { assert(num_threads >= 1U); @@ -1302,13 +1586,15 @@ namespace basisu std::unique_lock<std::mutex> lock(m_mutex); m_queue.emplace_back(std::move(job)); - + const size_t queue_size = m_queue.size(); lock.unlock(); if (queue_size > 1) + { m_has_work.notify_one(); + } } void job_pool::wait_for_all() @@ -1373,4 +1659,481 @@ namespace basisu debug_printf("job_pool::job_thread: exiting\n"); } + // .TGA image loading + #pragma pack(push) + #pragma pack(1) + struct tga_header + { + uint8_t m_id_len; + uint8_t m_cmap; + uint8_t m_type; + packed_uint<2> m_cmap_first; + packed_uint<2> m_cmap_len; + uint8_t m_cmap_bpp; + packed_uint<2> m_x_org; + packed_uint<2> m_y_org; + packed_uint<2> m_width; + packed_uint<2> m_height; + uint8_t m_depth; + uint8_t m_desc; + }; + #pragma pack(pop) + + const uint32_t MAX_TGA_IMAGE_SIZE = 16384; + + enum tga_image_type + { + cITPalettized = 1, + cITRGB = 2, + cITGrayscale = 3 + }; + + uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans) + { + width = 0; + height = 0; + n_chans = 0; + + if (buf_size <= sizeof(tga_header)) + return nullptr; + + const tga_header &hdr = *reinterpret_cast<const tga_header *>(pBuf); + + if ((!hdr.m_width) || (!hdr.m_height) || (hdr.m_width > MAX_TGA_IMAGE_SIZE) || (hdr.m_height > MAX_TGA_IMAGE_SIZE)) + return nullptr; + + if (hdr.m_desc >> 6) + return nullptr; + + // Simple validation + if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1)) + return nullptr; + + if (hdr.m_cmap) + { + if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32)) + return nullptr; + + // Nobody implements CMapFirst correctly, so we're not supporting it. Never seen it used, either. + if (hdr.m_cmap_first != 0) + return nullptr; + } + + const bool x_flipped = (hdr.m_desc & 0x10) != 0; + const bool y_flipped = (hdr.m_desc & 0x20) == 0; + + bool rle_flag = false; + int file_image_type = hdr.m_type; + if (file_image_type > 8) + { + file_image_type -= 8; + rle_flag = true; + } + + const tga_image_type image_type = static_cast<tga_image_type>(file_image_type); + + switch (file_image_type) + { + case cITRGB: + if (hdr.m_depth == 8) + return nullptr; + break; + case cITPalettized: + if ((hdr.m_depth != 8) || (hdr.m_cmap != 1) || (hdr.m_cmap_len == 0)) + return nullptr; + break; + case cITGrayscale: + if ((hdr.m_cmap != 0) || (hdr.m_cmap_len != 0)) + return nullptr; + if ((hdr.m_depth != 8) && (hdr.m_depth != 16)) + return nullptr; + break; + default: + return nullptr; + } + + uint32_t tga_bytes_per_pixel = 0; + + switch (hdr.m_depth) + { + case 32: + tga_bytes_per_pixel = 4; + n_chans = 4; + break; + case 24: + tga_bytes_per_pixel = 3; + n_chans = 3; + break; + case 16: + case 15: + tga_bytes_per_pixel = 2; + // For compatibility with stb_image_write.h + n_chans = ((file_image_type == cITGrayscale) && (hdr.m_depth == 16)) ? 4 : 3; + break; + case 8: + tga_bytes_per_pixel = 1; + // For palettized RGBA support, which both FreeImage and stb_image support. + n_chans = ((file_image_type == cITPalettized) && (hdr.m_cmap_bpp == 32)) ? 4 : 3; + break; + default: + return nullptr; + } + + const uint32_t bytes_per_line = hdr.m_width * tga_bytes_per_pixel; + + const uint8_t *pSrc = pBuf + sizeof(tga_header); + uint32_t bytes_remaining = buf_size - sizeof(tga_header); + + if (hdr.m_id_len) + { + if (bytes_remaining < hdr.m_id_len) + return nullptr; + pSrc += hdr.m_id_len; + bytes_remaining += hdr.m_id_len; + } + + color_rgba pal[256]; + for (uint32_t i = 0; i < 256; i++) + pal[i].set(0, 0, 0, 255); + + if ((hdr.m_cmap) && (hdr.m_cmap_len)) + { + if (image_type == cITPalettized) + { + // Note I cannot find any files using 32bpp palettes in the wild (never seen any in ~30 years). + if ( ((hdr.m_cmap_bpp != 32) && (hdr.m_cmap_bpp != 24) && (hdr.m_cmap_bpp != 15) && (hdr.m_cmap_bpp != 16)) || (hdr.m_cmap_len > 256) ) + return nullptr; + + if (hdr.m_cmap_bpp == 32) + { + const uint32_t pal_size = hdr.m_cmap_len * 4; + if (bytes_remaining < pal_size) + return nullptr; + + for (uint32_t i = 0; i < hdr.m_cmap_len; i++) + { + pal[i].r = pSrc[i * 4 + 2]; + pal[i].g = pSrc[i * 4 + 1]; + pal[i].b = pSrc[i * 4 + 0]; + pal[i].a = pSrc[i * 4 + 3]; + } + + bytes_remaining -= pal_size; + pSrc += pal_size; + } + else if (hdr.m_cmap_bpp == 24) + { + const uint32_t pal_size = hdr.m_cmap_len * 3; + if (bytes_remaining < pal_size) + return nullptr; + + for (uint32_t i = 0; i < hdr.m_cmap_len; i++) + { + pal[i].r = pSrc[i * 3 + 2]; + pal[i].g = pSrc[i * 3 + 1]; + pal[i].b = pSrc[i * 3 + 0]; + pal[i].a = 255; + } + + bytes_remaining -= pal_size; + pSrc += pal_size; + } + else + { + const uint32_t pal_size = hdr.m_cmap_len * 2; + if (bytes_remaining < pal_size) + return nullptr; + + for (uint32_t i = 0; i < hdr.m_cmap_len; i++) + { + const uint32_t v = pSrc[i * 2 + 0] | (pSrc[i * 2 + 1] << 8); + + pal[i].r = (((v >> 10) & 31) * 255 + 15) / 31; + pal[i].g = (((v >> 5) & 31) * 255 + 15) / 31; + pal[i].b = ((v & 31) * 255 + 15) / 31; + pal[i].a = 255; + } + + bytes_remaining -= pal_size; + pSrc += pal_size; + } + } + else + { + const uint32_t bytes_to_skip = (hdr.m_cmap_bpp >> 3) * hdr.m_cmap_len; + if (bytes_remaining < bytes_to_skip) + return nullptr; + pSrc += bytes_to_skip; + bytes_remaining += bytes_to_skip; + } + } + + width = hdr.m_width; + height = hdr.m_height; + + const uint32_t source_pitch = width * tga_bytes_per_pixel; + const uint32_t dest_pitch = width * n_chans; + + uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height); + if (!pImage) + return nullptr; + + std::vector<uint8_t> input_line_buf; + if (rle_flag) + input_line_buf.resize(source_pitch); + + int run_type = 0, run_remaining = 0; + uint8_t run_pixel[4]; + memset(run_pixel, 0, sizeof(run_pixel)); + + for (int y = 0; y < height; y++) + { + const uint8_t *pLine_data; + + if (rle_flag) + { + int pixels_remaining = width; + uint8_t *pDst = &input_line_buf[0]; + + do + { + if (!run_remaining) + { + if (bytes_remaining < 1) + { + free(pImage); + return nullptr; + } + + int v = *pSrc++; + bytes_remaining--; + + run_type = v & 0x80; + run_remaining = (v & 0x7F) + 1; + + if (run_type) + { + if (bytes_remaining < tga_bytes_per_pixel) + { + free(pImage); + return nullptr; + } + + memcpy(run_pixel, pSrc, tga_bytes_per_pixel); + pSrc += tga_bytes_per_pixel; + bytes_remaining -= tga_bytes_per_pixel; + } + } + + const uint32_t n = basisu::minimum<uint32_t>(pixels_remaining, run_remaining); + pixels_remaining -= n; + run_remaining -= n; + + if (run_type) + { + for (uint32_t i = 0; i < n; i++) + for (uint32_t j = 0; j < tga_bytes_per_pixel; j++) + *pDst++ = run_pixel[j]; + } + else + { + const uint32_t bytes_wanted = n * tga_bytes_per_pixel; + + if (bytes_remaining < bytes_wanted) + { + free(pImage); + return nullptr; + } + + memcpy(pDst, pSrc, bytes_wanted); + pDst += bytes_wanted; + + pSrc += bytes_wanted; + bytes_remaining -= bytes_wanted; + } + + } while (pixels_remaining); + + assert((pDst - &input_line_buf[0]) == width * tga_bytes_per_pixel); + + pLine_data = &input_line_buf[0]; + } + else + { + if (bytes_remaining < source_pitch) + { + free(pImage); + return nullptr; + } + + pLine_data = pSrc; + bytes_remaining -= source_pitch; + pSrc += source_pitch; + } + + // Convert to 24bpp RGB or 32bpp RGBA. + uint8_t *pDst = pImage + (y_flipped ? (height - 1 - y) : y) * dest_pitch + (x_flipped ? (width - 1) * n_chans : 0); + const int dst_stride = x_flipped ? -((int)n_chans) : n_chans; + + switch (hdr.m_depth) + { + case 32: + assert(tga_bytes_per_pixel == 4 && n_chans == 4); + for (int i = 0; i < width; i++, pLine_data += 4, pDst += dst_stride) + { + pDst[0] = pLine_data[2]; + pDst[1] = pLine_data[1]; + pDst[2] = pLine_data[0]; + pDst[3] = pLine_data[3]; + } + break; + case 24: + assert(tga_bytes_per_pixel == 3 && n_chans == 3); + for (int i = 0; i < width; i++, pLine_data += 3, pDst += dst_stride) + { + pDst[0] = pLine_data[2]; + pDst[1] = pLine_data[1]; + pDst[2] = pLine_data[0]; + } + break; + case 16: + case 15: + if (image_type == cITRGB) + { + assert(tga_bytes_per_pixel == 2 && n_chans == 3); + for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride) + { + const uint32_t v = pLine_data[0] | (pLine_data[1] << 8); + pDst[0] = (((v >> 10) & 31) * 255 + 15) / 31; + pDst[1] = (((v >> 5) & 31) * 255 + 15) / 31; + pDst[2] = ((v & 31) * 255 + 15) / 31; + } + } + else + { + assert(image_type == cITGrayscale && tga_bytes_per_pixel == 2 && n_chans == 4); + for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride) + { + pDst[0] = pLine_data[0]; + pDst[1] = pLine_data[0]; + pDst[2] = pLine_data[0]; + pDst[3] = pLine_data[1]; + } + } + break; + case 8: + assert(tga_bytes_per_pixel == 1); + if (image_type == cITPalettized) + { + if (hdr.m_cmap_bpp == 32) + { + assert(n_chans == 4); + for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) + { + const uint32_t c = *pLine_data; + pDst[0] = pal[c].r; + pDst[1] = pal[c].g; + pDst[2] = pal[c].b; + pDst[3] = pal[c].a; + } + } + else + { + assert(n_chans == 3); + for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) + { + const uint32_t c = *pLine_data; + pDst[0] = pal[c].r; + pDst[1] = pal[c].g; + pDst[2] = pal[c].b; + } + } + } + else + { + assert(n_chans == 3); + for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) + { + const uint8_t c = *pLine_data; + pDst[0] = c; + pDst[1] = c; + pDst[2] = c; + } + } + break; + default: + assert(0); + break; + } + } // y + + return pImage; + } + + uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans) + { + width = height = n_chans = 0; + + uint8_vec filedata; + if (!read_file_to_vec(pFilename, filedata)) + return nullptr; + + if (!filedata.size() || (filedata.size() > UINT32_MAX)) + return nullptr; + + return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans); + } + + void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...) + { + char buf[2048]; + + va_list args; + va_start(args, pFmt); +#ifdef _WIN32 + vsprintf_s(buf, sizeof(buf), pFmt, args); +#else + vsnprintf(buf, sizeof(buf), pFmt, args); +#endif + va_end(args); + + const char* p = buf; + + const uint32_t orig_x_ofs = x_ofs; + + while (*p) + { + uint8_t c = *p++; + if ((c < 32) || (c > 127)) + c = '.'; + + const uint8_t* pGlpyh = &g_debug_font8x8_basic[c - 32][0]; + + for (uint32_t y = 0; y < 8; y++) + { + uint32_t row_bits = pGlpyh[y]; + for (uint32_t x = 0; x < 8; x++) + { + const uint32_t q = row_bits & (1 << x); + + const color_rgba* pColor = q ? &fg : pBG; + if (!pColor) + continue; + + if (alpha_only) + fill_box_alpha(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor); + else + fill_box(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor); + } + } + + x_ofs += 8 * scale_x; + if ((x_ofs + 8 * scale_x) > m_width) + { + x_ofs = orig_x_ofs; + y_ofs += 8 * scale_y; + } + } + } + } // namespace basisu diff --git a/thirdparty/basis_universal/basisu_enc.h b/thirdparty/basis_universal/encoder/basisu_enc.h index 0a0c3c6fc0..05c95cbc3b 100644 --- a/thirdparty/basis_universal/basisu_enc.h +++ b/thirdparty/basis_universal/encoder/basisu_enc.h @@ -1,5 +1,5 @@ // basisu_enc.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "transcoder/basisu.h" -#include "transcoder/basisu_transcoder_internal.h" +#include "../transcoder/basisu.h" +#include "../transcoder/basisu_transcoder_internal.h" #include <mutex> #include <atomic> @@ -28,13 +28,29 @@ #include <libgen.h> #endif +// This module is really just a huge grab bag of classes and helper functions needed by the encoder. + +// If BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE is 1, quality in perceptual mode will be slightly greater, but at a large increase in encoding CPU time. +#define BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE (0) + namespace basisu { extern uint8_t g_hamming_dist[256]; + extern const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8]; - // Encoder library initialization + // Encoder library initialization. + // This function MUST be called before encoding anything! void basisu_encoder_init(); + // basisu_kernels_sse.cpp - will be a no-op and g_cpu_supports_sse41 will always be false unless compiled with BASISU_SUPPORT_SSE=1 + extern void detect_sse41(); + +#if BASISU_SUPPORT_SSE + extern bool g_cpu_supports_sse41; +#else + const bool g_cpu_supports_sse41 = false; +#endif + void error_printf(const char *pFmt, ...); // Helpers @@ -43,7 +59,68 @@ namespace basisu { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } - + + inline int32_t clampi(int32_t value, int32_t low, int32_t high) + { + if (value < low) + value = low; + else if (value > high) + value = high; + return value; + } + + inline uint8_t mul_8(uint32_t v, uint32_t a) + { + v = v * a + 128; + return (uint8_t)((v + (v >> 8)) >> 8); + } + + inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 64); + uint64_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= ((uint64_t)(byte_bits) << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + + inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 32); + uint32_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = minimum<int>(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= (byte_bits << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + // Hashing inline uint32_t bitmix32c(uint32_t v) @@ -69,6 +146,16 @@ namespace basisu return v; } + inline uint32_t wang_hash(uint32_t seed) + { + seed = (seed ^ 61) ^ (seed >> 16); + seed *= 9; + seed = seed ^ (seed >> 4); + seed *= 0x27d4eb2d; + seed = seed ^ (seed >> 15); + return seed; + } + uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); template <typename Key> @@ -80,6 +167,72 @@ namespace basisu } }; + class running_stat + { + public: + running_stat() : + m_n(0), + m_old_m(0), m_new_m(0), m_old_s(0), m_new_s(0) + { + } + void clear() + { + m_n = 0; + } + void push(double x) + { + m_n++; + if (m_n == 1) + { + m_old_m = m_new_m = x; + m_old_s = 0.0; + m_min = x; + m_max = x; + } + else + { + m_new_m = m_old_m + (x - m_old_m) / m_n; + m_new_s = m_old_s + (x - m_old_m) * (x - m_new_m); + m_old_m = m_new_m; + m_old_s = m_new_s; + m_min = basisu::minimum(x, m_min); + m_max = basisu::maximum(x, m_max); + } + } + uint32_t get_num() const + { + return m_n; + } + double get_mean() const + { + return (m_n > 0) ? m_new_m : 0.0; + } + + double get_variance() const + { + return ((m_n > 1) ? m_new_s / (m_n - 1) : 0.0); + } + + double get_std_dev() const + { + return sqrt(get_variance()); + } + + double get_min() const + { + return m_min; + } + + double get_max() const + { + return m_max; + } + + private: + uint32_t m_n; + double m_old_m, m_new_m, m_old_s, m_new_s, m_min, m_max; + }; + // Linear algebra template <uint32_t N, typename T> @@ -118,7 +271,7 @@ namespace basisu inline vec &set(const vec<OtherN, OtherT> &other) { uint32_t i; - if (static_cast<void *>(&other) == static_cast<void *>(this)) + if ((const void *)(&other) == (const void *)(this)) return *this; const uint32_t m = minimum(OtherN, N); for (i = 0; i < m; i++) @@ -358,6 +511,7 @@ namespace basisu BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(job_pool); public: + // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc. job_pool(uint32_t num_threads); ~job_pool(); @@ -370,7 +524,7 @@ namespace basisu private: std::vector<std::thread> m_threads; - std::vector<std::function<void()> > m_queue; + std::vector<std::function<void()> > m_queue; std::mutex m_mutex; std::condition_variable m_has_work; @@ -420,7 +574,7 @@ namespace basisu return *this; } }; - + class color_rgba { public: @@ -440,6 +594,25 @@ namespace basisu inline color_rgba() { static_assert(sizeof(*this) == 4, "sizeof(*this) != 4"); + static_assert(sizeof(*this) == sizeof(basist::color32), "sizeof(*this) != sizeof(basist::color32)"); + } + + // Not too hot about this idea. + inline color_rgba(const basist::color32& other) : + r(other.r), + g(other.g), + b(other.b), + a(other.a) + { + } + + color_rgba& operator= (const basist::color32& rhs) + { + r = rhs.r; + g = rhs.g; + b = rhs.b; + a = rhs.a; + return *this; } inline color_rgba(int y) @@ -563,11 +736,20 @@ namespace basisu inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } + + inline basist::color32 get_color32() const + { + return basist::color32(r, g, b, a); + } + + static color_rgba comp_min(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } + static color_rgba comp_max(const color_rgba& a, const color_rgba& b) { return color_rgba(basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } }; - typedef std::vector<color_rgba> color_rgba_vec; + typedef basisu::vector<color_rgba> color_rgba_vec; const color_rgba g_black_color(0, 0, 0, 255); + const color_rgba g_black_trans_color(0, 0, 0, 0); const color_rgba g_white_color(255, 255, 255, 255); inline int color_distance(int r0, int g0, int b0, int r1, int g1, int b1) @@ -595,6 +777,7 @@ namespace basisu { if (perceptual) { +#if BASISU_USE_HIGH_PRECISION_COLOR_DISTANCE const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f; const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f; @@ -617,11 +800,61 @@ namespace basisu } return d; +#elif 1 + int dr = e1.r - e2.r; + int dg = e1.g - e2.g; + int db = e1.b - e2.b; + + int delta_l = dr * 27 + dg * 92 + db * 9; + int delta_cr = dr * 128 - delta_l; + int delta_cb = db * 128 - delta_l; + + uint32_t id = ((uint32_t)(delta_l * delta_l) >> 7U) + + ((((uint32_t)(delta_cr * delta_cr) >> 7U) * 26U) >> 7U) + + ((((uint32_t)(delta_cb * delta_cb) >> 7U) * 3U) >> 7U); + + if (alpha) + { + int da = (e1.a - e2.a) << 7; + id += ((uint32_t)(da * da) >> 7U); + } + + return id; +#else + int dr = e1.r - e2.r; + int dg = e1.g - e2.g; + int db = e1.b - e2.b; + + int64_t delta_l = dr * 27 + dg * 92 + db * 9; + int64_t delta_cr = dr * 128 - delta_l; + int64_t delta_cb = db * 128 - delta_l; + + int64_t id = ((delta_l * delta_l) * 128) + + ((delta_cr * delta_cr) * 26) + + ((delta_cb * delta_cb) * 3); + + if (alpha) + { + int64_t da = (e1.a - e2.a); + id += (da * da) * 128; + } + + int d = (id + 8192) >> 14; + + return d; +#endif } else return color_distance(e1, e2, alpha); } + static inline uint32_t color_distance_la(const color_rgba& a, const color_rgba& b) + { + const int dl = a.r - b.r; + const int da = a.a - b.a; + return dl * dl + da * da; + } + // String helpers inline int string_find_right(const std::string& filename, char c) @@ -929,7 +1162,7 @@ namespace basisu float m_priority; }; - std::vector<entry> m_heap; + basisu::vector<entry> m_heap; uint32_t m_size; // Push down entry at index @@ -961,7 +1194,7 @@ namespace basisu public: typedef TrainingVectorType training_vec_type; typedef std::pair<TrainingVectorType, uint64_t> training_vec_with_weight; - typedef std::vector< training_vec_with_weight > array_of_weighted_training_vecs; + typedef basisu::vector< training_vec_with_weight > array_of_weighted_training_vecs; tree_vector_quant() : m_next_codebook_index(0) @@ -981,7 +1214,7 @@ namespace basisu const array_of_weighted_training_vecs &get_training_vecs() const { return m_training_vecs; } array_of_weighted_training_vecs &get_training_vecs() { return m_training_vecs; } - void retrieve(std::vector< std::vector<uint32_t> > &codebook) const + void retrieve(basisu::vector< basisu::vector<uint32_t> > &codebook) const { for (uint32_t i = 0; i < m_nodes.size(); i++) { @@ -994,7 +1227,7 @@ namespace basisu } } - void retrieve(std::vector<TrainingVectorType> &codebook) const + void retrieve(basisu::vector<TrainingVectorType> &codebook) const { for (uint32_t i = 0; i < m_nodes.size(); i++) { @@ -1007,7 +1240,7 @@ namespace basisu } } - void retrieve(uint32_t max_clusters, std::vector<uint_vec> &codebook) const + void retrieve(uint32_t max_clusters, basisu::vector<uint_vec> &codebook) const { uint_vec node_stack; node_stack.reserve(512); @@ -1054,7 +1287,7 @@ namespace basisu priority_queue var_heap; var_heap.init(max_size, 0, m_nodes[0].m_var); - std::vector<uint32_t> l_children, r_children; + basisu::vector<uint32_t> l_children, r_children; // Now split the worst nodes l_children.reserve(m_training_vecs.size() + 1); @@ -1092,7 +1325,7 @@ namespace basisu inline tsvq_node() : m_weight(0), m_origin(cZero), m_left_index(-1), m_right_index(-1), m_codebook_index(-1) { } // vecs is erased - inline void set(const TrainingVectorType &org, uint64_t weight, float var, std::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); } + inline void set(const TrainingVectorType &org, uint64_t weight, float var, basisu::vector<uint32_t> &vecs) { m_origin = org; m_weight = weight; m_var = var; m_training_vecs.swap(vecs); } inline bool is_leaf() const { return m_left_index < 0; } @@ -1100,11 +1333,11 @@ namespace basisu uint64_t m_weight; TrainingVectorType m_origin; int32_t m_left_index, m_right_index; - std::vector<uint32_t> m_training_vecs; + basisu::vector<uint32_t> m_training_vecs; int m_codebook_index; }; - typedef std::vector<tsvq_node> tsvq_node_vec; + typedef basisu::vector<tsvq_node> tsvq_node_vec; tsvq_node_vec m_nodes; array_of_weighted_training_vecs m_training_vecs; @@ -1139,7 +1372,7 @@ namespace basisu return root; } - bool split_node(uint32_t node_index, priority_queue &var_heap, std::vector<uint32_t> &l_children, std::vector<uint32_t> &r_children) + bool split_node(uint32_t node_index, priority_queue &var_heap, basisu::vector<uint32_t> &l_children, basisu::vector<uint32_t> &r_children) { TrainingVectorType l_child_org, r_child_org; uint64_t l_weight = 0, r_weight = 0; @@ -1239,7 +1472,7 @@ namespace basisu bool prep_split(const tsvq_node &node, TrainingVectorType &l_child_result, TrainingVectorType &r_child_result) const { - const uint32_t N = TrainingVectorType::num_elements; + //const uint32_t N = TrainingVectorType::num_elements; if (2 == node.m_training_vecs.size()) { @@ -1304,7 +1537,7 @@ namespace basisu if (largest_axis_index < 0) return false; - std::vector<float> keys(node.m_training_vecs.size()); + basisu::vector<float> keys(node.m_training_vecs.size()); for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) keys[i] = m_training_vecs[node.m_training_vecs[i]].first[largest_axis_index]; @@ -1352,8 +1585,8 @@ namespace basisu } bool refine_split(const tsvq_node &node, - TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, std::vector<uint32_t> &l_children, - TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, std::vector<uint32_t> &r_children) const + TrainingVectorType &l_child, uint64_t &l_weight, float &l_var, basisu::vector<uint32_t> &l_children, + TrainingVectorType &r_child, uint64_t &r_weight, float &r_var, basisu::vector<uint32_t> &r_children) const { l_children.reserve(node.m_training_vecs.size()); r_children.reserve(node.m_training_vecs.size()); @@ -1466,8 +1699,8 @@ namespace basisu template<typename Quantizer> bool generate_hierarchical_codebook_threaded_internal(Quantizer& q, uint32_t max_codebook_size, uint32_t max_parent_codebook_size, - std::vector<uint_vec>& codebook, - std::vector<uint_vec>& parent_codebook, + basisu::vector<uint_vec>& codebook, + basisu::vector<uint_vec>& parent_codebook, uint32_t max_threads, bool limit_clusterizers, job_pool *pJob_pool) { codebook.resize(0); @@ -1493,7 +1726,7 @@ namespace basisu if (!q.generate(max_threads)) return false; - std::vector<uint_vec> initial_codebook; + basisu::vector<uint_vec> initial_codebook; q.retrieve(initial_codebook); @@ -1512,12 +1745,14 @@ namespace basisu bool success_flags[cMaxThreads]; clear_obj(success_flags); - std::vector<uint_vec> local_clusters[cMaxThreads]; - std::vector<uint_vec> local_parent_clusters[cMaxThreads]; + basisu::vector<uint_vec> local_clusters[cMaxThreads]; + basisu::vector<uint_vec> local_parent_clusters[cMaxThreads]; for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++) { +#ifndef __EMSCRIPTEN__ pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] { +#endif Quantizer& lq = quantizers[thread_iter]; uint_vec& cluster_indices = initial_codebook[thread_iter]; @@ -1558,11 +1793,15 @@ namespace basisu } } +#ifndef __EMSCRIPTEN__ } ); +#endif } // thread_iter +#ifndef __EMSCRIPTEN__ pJob_pool->wait_for_all(); +#endif uint32_t total_clusters = 0, total_parent_clusters = 0; @@ -1598,8 +1837,8 @@ namespace basisu template<typename Quantizer> bool generate_hierarchical_codebook_threaded(Quantizer& q, uint32_t max_codebook_size, uint32_t max_parent_codebook_size, - std::vector<uint_vec>& codebook, - std::vector<uint_vec>& parent_codebook, + basisu::vector<uint_vec>& codebook, + basisu::vector<uint_vec>& parent_codebook, uint32_t max_threads, job_pool *pJob_pool) { typedef bit_hasher<typename Quantizer::training_vec_type> training_vec_bit_hasher; @@ -1629,7 +1868,7 @@ namespace basisu Quantizer group_quant; typedef typename group_hash::const_iterator group_hash_const_iter; - std::vector<group_hash_const_iter> unique_vec_iters; + basisu::vector<group_hash_const_iter> unique_vec_iters; unique_vec_iters.reserve(unique_vecs.size()); for (auto iter = unique_vecs.begin(); iter != unique_vecs.end(); ++iter) @@ -1644,7 +1883,7 @@ namespace basisu debug_printf("Limit clusterizers: %u\n", limit_clusterizers); - std::vector<uint_vec> group_codebook, group_parent_codebook; + basisu::vector<uint_vec> group_codebook, group_parent_codebook; bool status = generate_hierarchical_codebook_threaded_internal(group_quant, max_codebook_size, max_parent_codebook_size, group_codebook, @@ -1693,7 +1932,7 @@ namespace basisu class histogram { - std::vector<uint32_t> m_hist; + basisu::vector<uint32_t> m_hist; public: histogram(uint32_t size = 0) { init(size); } @@ -1754,7 +1993,8 @@ namespace basisu struct sym_freq { - uint16_t m_key, m_sym_index; + uint32_t m_key; + uint16_t m_sym_index; }; sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); @@ -1835,7 +2075,7 @@ namespace basisu { if (m_bit_buffer_size) { - m_total_bits += 8; + m_total_bits += 8 - (m_bit_buffer_size & 7); append_byte(static_cast<uint8_t>(m_bit_buffer)); m_bit_buffer = 0; @@ -2107,6 +2347,12 @@ namespace basisu resize(w, h, p); } + image(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) : + m_width(0), m_height(0), m_pitch(0) + { + init(pImage, width, height, comps); + } + image(const image &other) : m_width(0), m_height(0), m_pitch(0) { @@ -2155,6 +2401,47 @@ namespace basisu return *this; } + void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) + { + assert(comps >= 1 && comps <= 4); + + resize(width, height); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const uint8_t *pSrc = &pImage[(x + y * width) * comps]; + color_rgba &dst = (*this)(x, y); + + if (comps == 1) + { + dst.r = pSrc[0]; + dst.g = pSrc[0]; + dst.b = pSrc[0]; + dst.a = 255; + } + else if (comps == 2) + { + dst.r = pSrc[0]; + dst.g = pSrc[0]; + dst.b = pSrc[0]; + dst.a = pSrc[1]; + } + else + { + dst.r = pSrc[0]; + dst.g = pSrc[1]; + dst.b = pSrc[2]; + if (comps == 4) + dst.a = pSrc[3]; + else + dst.a = 255; + } + } + } + } + image &fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba &c) { for (uint32_t iy = 0; iy < h; iy++) @@ -2163,6 +2450,14 @@ namespace basisu return *this; } + image& fill_box_alpha(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const color_rgba& c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_clipped_alpha(x + ix, y + iy, c); + return *this; + } + image &crop_dup_borders(uint32_t w, uint32_t h) { const uint32_t orig_w = m_width, orig_h = m_height; @@ -2252,6 +2547,13 @@ namespace basisu return *this; } + inline image& set_clipped_alpha(int x, int y, const color_rgba& c) + { + if ((static_cast<uint32_t>(x) < m_width) && (static_cast<uint32_t>(y) < m_height)) + (*this)(x, y).m_comps[3] = c.m_comps[3]; + return *this; + } + // Very straightforward blit with full clipping. Not fast, but it works. image &blit(const image &src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) { @@ -2376,6 +2678,8 @@ namespace basisu } return *this; } + + void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); private: uint32_t m_width, m_height, m_pitch; // all in pixels @@ -2384,7 +2688,7 @@ namespace basisu // Float images - typedef std::vector<vec4F> vec4F_vec; + typedef basisu::vector<vec4F> vec4F_vec; class imagef { @@ -2635,10 +2939,27 @@ namespace basisu }; // Image saving/loading/resampling - + + bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr); bool load_png(const char* pFilename, image& img); inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); } + bool load_bmp(const char* pFilename, image& img); + inline bool load_bmp(const std::string &filename, image &img) { return load_bmp(filename.c_str(), img); } + + bool load_tga(const char* pFilename, image& img); + inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); } + + bool load_jpg(const char *pFilename, image& img); + inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } + + // Currently loads .BMP, .PNG, or .TGA. + bool load_image(const char* pFilename, image& img); + inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } + + uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); + uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); + enum { cImageSaveGrayscale = 1, @@ -2697,7 +3018,7 @@ namespace basisu template<typename T> class vector2D { - typedef std::vector<T> TVec; + typedef basisu::vector<T> TVec; uint32_t m_width, m_height; TVec m_values; @@ -2800,7 +3121,7 @@ namespace basisu } void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1); - + } // namespace basisu diff --git a/thirdparty/basis_universal/encoder/basisu_etc.cpp b/thirdparty/basis_universal/encoder/basisu_etc.cpp new file mode 100644 index 0000000000..232e8965b0 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_etc.cpp @@ -0,0 +1,1593 @@ +// basis_etc.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_etc.h" + +#if BASISU_SUPPORT_SSE +#define CPPSPMD_NAME(a) a##_sse41 +#include "basisu_kernels_declares.h" +#endif + +#define BASISU_DEBUG_ETC_ENCODER 0 +#define BASISU_DEBUG_ETC_ENCODER_DEEPER 0 + +namespace basisu +{ + const int8_t g_etc2_eac_tables[16][8] = + { + { -3, -6, -9, -15, 2, 5, 8, 14 }, { -3, -7, -10, -13, 2, 6, 9, 12 }, { -2, -5, -8, -13, 1, 4, 7, 12 }, { -2, -4, -6, -13, 1, 3, 5, 12 }, + { -3, -6, -8, -12, 2, 5, 7, 11 }, { -3, -7, -9, -11, 2, 6, 8, 10 }, { -4, -7, -8, -11, 3, 6, 7, 10 }, { -3, -5, -8, -11, 2, 4, 7, 10 }, + { -2, -6, -8, -10, 1, 5, 7, 9 }, { -2, -5, -8, -10, 1, 4, 7, 9 }, { -2, -4, -8, -10, 1, 3, 7, 9 }, { -2, -5, -7, -10, 1, 4, 6, 9 }, + { -3, -4, -7, -10, 2, 3, 6, 9 }, { -1, -2, -3, -10, 0, 1, 2, 9 }, { -4, -6, -8, -9, 3, 5, 7, 8 }, { -3, -5, -7, -9, 2, 4, 6, 8 } + }; + + const int8_t g_etc2_eac_tables8[16][8] = + { + { -24, -48, -72, -120, 16, 40, 64, 112 }, { -24,-56,-80,-104,16,48,72,96 }, { -16,-40,-64,-104,8,32,56,96 }, { -16,-32,-48,-104,8,24,40,96 }, + { -24,-48,-64,-96,16,40,56,88 }, { -24,-56,-72,-88,16,48,64,80 }, { -32,-56,-64,-88,24,48,56,80 }, { -24,-40,-64,-88,16,32,56,80 }, + { -16,-48,-64,-80,8,40,56,72 }, { -16,-40,-64,-80,8,32,56,72 }, { -16,-32,-64,-80,8,24,56,72 }, { -16,-40,-56,-80,8,32,48,72 }, + { -24,-32,-56,-80,16,24,48,72 }, { -8,-16,-24,-80,0,8,16,72 }, { -32,-48,-64,-72,24,40,56,64 }, { -24,-40,-56,-72,16,32,48,64 } + }; + + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. + static uint16_t g_etc1_inverse_lookup[2 * 8 * 4][256]; // [ diff/inten_table/selector][desired_color ] + + // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. + // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) + static const uint16_t g_etc1_color8_to_etc_block_config_0_255[2][33] = + { + { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF }, + { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF }, + }; + + // Really only [254][11]. + static const uint16_t g_etc1_color8_to_etc_block_config_1_to_254[254][12] = + { + { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E, 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, { + 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306, 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112, + 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707, 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B, + 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605, 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF + }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214, 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A, + 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, { 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, + 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D, 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805, + 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, { + 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521, 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523, + 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, + 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, { 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, + 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, + 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917, 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E, + 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725, 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139, + 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, { 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A, + 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437, 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500, + 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, { 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, + 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, { + 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F, 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, + 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, { 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, + 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434, 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01, + 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21, 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, + 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E, 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, + 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, { 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, { + 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33, + 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B, 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, { + 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, + 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536, 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A, + 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, { + 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820, + 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031, 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, { + 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, + 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029, + 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832, 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D, + 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF + }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, { 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331, + 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513, + 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, { + 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, { 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905, + 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D, + 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621, 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18, + 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919, 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625, + 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F, 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936, + 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A, 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, { + 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913, 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, { + 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20, 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C, + 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, { 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06, + 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, { 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26, + 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18, 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03, + 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929, 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23, + 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, + 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E, 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18, + 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01, 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16, + 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B, 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, + 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34, 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11, 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF }, + }; + + static uint32_t etc1_decode_value(uint32_t diff, uint32_t inten, uint32_t selector, uint32_t packed_c) + { + const uint32_t limit = diff ? 32 : 16; + BASISU_NOTE_UNUSED(limit); + assert((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); + int c; + if (diff) + c = (packed_c >> 2) | (packed_c << 3); + else + c = packed_c | (packed_c << 4); + c += g_etc1_inten_tables[inten][selector]; + c = clamp<int>(c, 0, 255); + return c; + } + + void pack_etc1_solid_color_init() + { + for (uint32_t diff = 0; diff < 2; diff++) + { + const uint32_t limit = diff ? 32 : 16; + + for (uint32_t inten = 0; inten < 8; inten++) + { + for (uint32_t selector = 0; selector < 4; selector++) + { + const uint32_t inverse_table_index = diff + (inten << 1) + (selector << 4); + for (uint32_t color = 0; color < 256; color++) + { + uint32_t best_error = UINT32_MAX, best_packed_c = 0; + for (uint32_t packed_c = 0; packed_c < limit; packed_c++) + { + int v = etc1_decode_value(diff, inten, selector, packed_c); + uint32_t err = (uint32_t)labs(v - static_cast<int>(color)); + if (err < best_error) + { + best_error = err; + best_packed_c = packed_c; + if (!best_error) + break; + } + } + assert(best_error <= 255); + g_etc1_inverse_lookup[inverse_table_index][color] = static_cast<uint16_t>(best_packed_c | (best_error << 8)); + } + } + } + } + } + + // Packs solid color blocks efficiently using a set of small precomputed tables. + // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. + uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor) + { + assert(g_etc1_inverse_lookup[0][255]); + + static uint32_t s_next_comp[4] = { 1, 2, 0, 1 }; + + uint32_t best_error = UINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint32_t i = 0; i < 3; i++) + { + const uint32_t c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = clamp<int>(pColor[i] + delta, 0, 255); + + const uint16_t* pTable; + if (!c_plus_delta) + pTable = g_etc1_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_etc1_color8_to_etc_block_config_0_255[1]; + else + pTable = g_etc1_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint32_t x = *pTable++; + +#ifdef _DEBUG + const uint32_t diff = x & 1; + const uint32_t inten = (x >> 1) & 7; + const uint32_t selector = (x >> 4) & 3; + const uint32_t p0 = (x >> 8) & 255; + assert(etc1_decode_value(diff, inten, selector, p0) == (uint32_t)c_plus_delta); +#endif + + const uint16_t* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16_t p1 = pInverse_table[c1]; + uint16_t p2 = pInverse_table[c2]; + const uint32_t trial_error = square(c_plus_delta - pColor[i]) + square(p1 >> 8) + square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } + found_perfect_match: + + const uint32_t diff = best_x & 1; + const uint32_t inten = (best_x >> 1) & 7; + + block.m_bytes[3] = static_cast<uint8_t>(((inten | (inten << 3)) << 2) | (diff << 1)); + + const uint32_t etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; + *reinterpret_cast<uint16_t*>(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; + *reinterpret_cast<uint16_t*>(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; + + const uint32_t best_packed_c0 = (best_x >> 8) & 255; + if (diff) + { + block.m_bytes[best_i] = static_cast<uint8_t>(best_packed_c0 << 3); + block.m_bytes[s_next_comp[best_i]] = static_cast<uint8_t>(best_packed_c1 << 3); + block.m_bytes[s_next_comp[best_i + 1]] = static_cast<uint8_t>(best_packed_c2 << 3); + } + else + { + block.m_bytes[best_i] = static_cast<uint8_t>(best_packed_c0 | (best_packed_c0 << 4)); + block.m_bytes[s_next_comp[best_i]] = static_cast<uint8_t>(best_packed_c1 | (best_packed_c1 << 4)); + block.m_bytes[s_next_comp[best_i + 1]] = static_cast<uint8_t>(best_packed_c2 | (best_packed_c2 << 4)); + } + + return best_error; + } + + const uint32_t BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE = 165; + + static const struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] = + { + { { 0, 0, 0, 8 } },{ { 0, 5, 2, 1 } },{ { 0, 6, 1, 1 } },{ { 0, 7, 0, 1 } },{ { 0, 7, 1, 0 } }, + { { 0, 0, 8, 0 } },{ { 0, 0, 3, 5 } },{ { 0, 1, 7, 0 } },{ { 0, 0, 4, 4 } },{ { 0, 0, 2, 6 } }, + { { 0, 0, 7, 1 } },{ { 0, 0, 1, 7 } },{ { 0, 0, 5, 3 } },{ { 1, 6, 0, 1 } },{ { 0, 0, 6, 2 } }, + { { 0, 2, 6, 0 } },{ { 2, 4, 2, 0 } },{ { 0, 3, 5, 0 } },{ { 3, 3, 1, 1 } },{ { 4, 2, 0, 2 } }, + { { 1, 5, 2, 0 } },{ { 0, 5, 3, 0 } },{ { 0, 6, 2, 0 } },{ { 2, 4, 1, 1 } },{ { 5, 1, 0, 2 } }, + { { 6, 1, 1, 0 } },{ { 3, 3, 0, 2 } },{ { 6, 0, 0, 2 } },{ { 0, 8, 0, 0 } },{ { 6, 1, 0, 1 } }, + { { 0, 1, 6, 1 } },{ { 1, 6, 1, 0 } },{ { 4, 1, 3, 0 } },{ { 0, 2, 5, 1 } },{ { 5, 0, 3, 0 } }, + { { 5, 3, 0, 0 } },{ { 0, 1, 5, 2 } },{ { 0, 3, 4, 1 } },{ { 2, 5, 1, 0 } },{ { 1, 7, 0, 0 } }, + { { 0, 1, 4, 3 } },{ { 6, 0, 2, 0 } },{ { 0, 4, 4, 0 } },{ { 2, 6, 0, 0 } },{ { 0, 2, 4, 2 } }, + { { 0, 5, 1, 2 } },{ { 0, 6, 0, 2 } },{ { 3, 5, 0, 0 } },{ { 0, 4, 3, 1 } },{ { 3, 4, 1, 0 } }, + { { 4, 3, 1, 0 } },{ { 1, 5, 0, 2 } },{ { 0, 3, 3, 2 } },{ { 1, 4, 1, 2 } },{ { 0, 4, 2, 2 } }, + { { 2, 3, 3, 0 } },{ { 4, 4, 0, 0 } },{ { 1, 2, 4, 1 } },{ { 0, 5, 0, 3 } },{ { 0, 1, 3, 4 } }, + { { 1, 5, 1, 1 } },{ { 1, 4, 2, 1 } },{ { 1, 3, 2, 2 } },{ { 5, 2, 1, 0 } },{ { 1, 3, 3, 1 } }, + { { 0, 1, 2, 5 } },{ { 1, 1, 5, 1 } },{ { 0, 3, 2, 3 } },{ { 2, 5, 0, 1 } },{ { 3, 2, 2, 1 } }, + { { 2, 3, 0, 3 } },{ { 1, 4, 3, 0 } },{ { 2, 2, 1, 3 } },{ { 6, 2, 0, 0 } },{ { 1, 0, 6, 1 } }, + { { 3, 3, 2, 0 } },{ { 7, 1, 0, 0 } },{ { 3, 1, 4, 0 } },{ { 0, 2, 3, 3 } },{ { 0, 4, 1, 3 } }, + { { 0, 4, 0, 4 } },{ { 0, 1, 0, 7 } },{ { 2, 0, 5, 1 } },{ { 2, 0, 4, 2 } },{ { 3, 0, 2, 3 } }, + { { 2, 2, 4, 0 } },{ { 2, 2, 3, 1 } },{ { 4, 0, 3, 1 } },{ { 3, 2, 3, 0 } },{ { 2, 3, 2, 1 } }, + { { 1, 3, 4, 0 } },{ { 7, 0, 1, 0 } },{ { 3, 0, 4, 1 } },{ { 1, 0, 5, 2 } },{ { 8, 0, 0, 0 } }, + { { 3, 0, 1, 4 } },{ { 4, 1, 1, 2 } },{ { 4, 0, 2, 2 } },{ { 1, 2, 5, 0 } },{ { 4, 2, 1, 1 } }, + { { 3, 4, 0, 1 } },{ { 2, 0, 3, 3 } },{ { 5, 0, 1, 2 } },{ { 5, 0, 0, 3 } },{ { 2, 4, 0, 2 } }, + { { 2, 1, 4, 1 } },{ { 4, 0, 1, 3 } },{ { 2, 1, 5, 0 } },{ { 4, 2, 2, 0 } },{ { 4, 0, 4, 0 } }, + { { 1, 0, 4, 3 } },{ { 1, 4, 0, 3 } },{ { 3, 0, 3, 2 } },{ { 4, 3, 0, 1 } },{ { 0, 1, 1, 6 } }, + { { 1, 3, 1, 3 } },{ { 0, 2, 2, 4 } },{ { 2, 0, 2, 4 } },{ { 5, 1, 1, 1 } },{ { 3, 0, 5, 0 } }, + { { 2, 3, 1, 2 } },{ { 3, 0, 0, 5 } },{ { 0, 3, 1, 4 } },{ { 5, 0, 2, 1 } },{ { 2, 1, 3, 2 } }, + { { 2, 0, 6, 0 } },{ { 3, 1, 3, 1 } },{ { 5, 1, 2, 0 } },{ { 1, 0, 3, 4 } },{ { 1, 1, 6, 0 } }, + { { 4, 0, 0, 4 } },{ { 2, 0, 1, 5 } },{ { 0, 3, 0, 5 } },{ { 1, 3, 0, 4 } },{ { 4, 1, 2, 1 } }, + { { 1, 2, 3, 2 } },{ { 3, 1, 0, 4 } },{ { 5, 2, 0, 1 } },{ { 1, 2, 2, 3 } },{ { 3, 2, 1, 2 } }, + { { 2, 2, 2, 2 } },{ { 6, 0, 1, 1 } },{ { 1, 2, 1, 4 } },{ { 1, 1, 4, 2 } },{ { 3, 2, 0, 3 } }, + { { 1, 2, 0, 5 } },{ { 1, 0, 7, 0 } },{ { 3, 1, 2, 2 } },{ { 1, 0, 2, 5 } },{ { 2, 0, 0, 6 } }, + { { 2, 1, 1, 4 } },{ { 2, 2, 0, 4 } },{ { 1, 1, 3, 3 } },{ { 7, 0, 0, 1 } },{ { 1, 0, 0, 7 } }, + { { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } }, + { { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } } + }; + + const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = + { + { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, + { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } + }; + + const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; + + // [flip][subblock][pixel_index] + const etc_coord2 g_etc1_pixel_coords[2][2][8] = + { + { + { + { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + }, + { + { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + } + }, + { + { + { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + }, + { + { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + }, + } + }; + + // [flip][subblock][pixel_index] + const uint32_t g_etc1_pixel_indices[2][2][8] = + { + { + { + 0 + 4 * 0, 0 + 4 * 1, 0 + 4 * 2, 0 + 4 * 3, + 1 + 4 * 0, 1 + 4 * 1, 1 + 4 * 2, 1 + 4 * 3 + }, + { + 2 + 4 * 0, 2 + 4 * 1, 2 + 4 * 2, 2 + 4 * 3, + 3 + 4 * 0, 3 + 4 * 1, 3 + 4 * 2, 3 + 4 * 3 + } + }, + { + { + 0 + 4 * 0, 1 + 4 * 0, 2 + 4 * 0, 3 + 4 * 0, + 0 + 4 * 1, 1 + 4 * 1, 2 + 4 * 1, 3 + 4 * 1 + }, + { + 0 + 4 * 2, 1 + 4 * 2, 2 + 4 * 2, 3 + 4 * 2, + 0 + 4 * 3, 1 + 4 * 3, 2 + 4 * 3, 3 + 4 * 3 + }, + } + }; + + uint16_t etc_block::pack_color5(const color_rgba& color, bool scaled, uint32_t bias) + { + return pack_color5(color.r, color.g, color.b, scaled, bias); + } + + uint16_t etc_block::pack_color5(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) + { + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 31U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = minimum(r, 31U); + g = minimum(g, 31U); + b = minimum(b, 31U); + + return static_cast<uint16_t>(b | (g << 5U) | (r << 10U)); + } + + color_rgba etc_block::unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color5 & 31U; + uint32_t g = (packed_color5 >> 5U) & 31U; + uint32_t r = (packed_color5 >> 10U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); + } + + void etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, bool scaled) + { + result = unpack_color5(packed_color5, scaled, 255); + } + + void etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled) + { + color_rgba c(unpack_color5(packed_color5, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + bool etc_block::unpack_color5(color_rgba& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + color_rgba_i16 dc(unpack_delta3(packed_delta3)); + + int b = (packed_color5 & 31U) + dc.b; + int g = ((packed_color5 >> 5U) & 31U) + dc.g; + int r = ((packed_color5 >> 10U) & 31U) + dc.r; + + bool success = true; + if (static_cast<uint32_t>(r | g | b) > 31U) + { + success = false; + r = clamp<int>(r, 0, 31); + g = clamp<int>(g, 0, 31); + b = clamp<int>(b, 0, 31); + } + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + result.set_noclamp_rgba(r, g, b, minimum(alpha, 255U)); + return success; + } + + bool etc_block::unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + color_rgba result; + const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); + r = result.r; + g = result.g; + b = result.b; + return success; + } + + uint16_t etc_block::pack_delta3(const color_rgba_i16& color) + { + return pack_delta3(color.r, color.g, color.b); + } + + uint16_t etc_block::pack_delta3(int r, int g, int b) + { + assert((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); + assert((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); + assert((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); + if (r < 0) r += 8; + if (g < 0) g += 8; + if (b < 0) b += 8; + return static_cast<uint16_t>(b | (g << 3) | (r << 6)); + } + + color_rgba_i16 etc_block::unpack_delta3(uint16_t packed_delta3) + { + int r = (packed_delta3 >> 6) & 7; + int g = (packed_delta3 >> 3) & 7; + int b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + return color_rgba_i16(r, g, b, 255); + } + + void etc_block::unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + } + + uint16_t etc_block::pack_color4(const color_rgba& color, bool scaled, uint32_t bias) + { + return pack_color4(color.r, color.g, color.b, scaled, bias); + } + + uint16_t etc_block::pack_color4(uint32_t r, uint32_t g, uint32_t b, bool scaled, uint32_t bias) + { + if (scaled) + { + r = (r * 15U + bias) / 255U; + g = (g * 15U + bias) / 255U; + b = (b * 15U + bias) / 255U; + } + + r = minimum(r, 15U); + g = minimum(g, 15U); + b = minimum(b, 15U); + + return static_cast<uint16_t>(b | (g << 4U) | (r << 8U)); + } + + color_rgba etc_block::unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color4 & 15U; + uint32_t g = (packed_color4 >> 4U) & 15U; + uint32_t r = (packed_color4 >> 8U) & 15U; + + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } + + return color_rgba(cNoClamp, r, g, b, minimum(alpha, 255U)); + } + + void etc_block::unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled) + { + color_rgba c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + void etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + unpack_color5(r, g, b, packed_color5, true); + + const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + } + + bool etc_block::get_diff_subblock_colors(color_rgba* pDst, uint16_t packed_color5, uint16_t packed_delta3, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); + + const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + + return success; + } + + void etc_block::get_abs_subblock_colors(color_rgba* pDst, uint16_t packed_color4, uint32_t table_idx) + { + assert(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint32_t r, g, b; + unpack_color4(r, g, b, packed_color4, true); + + const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0, 255); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1, 255); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2, 255); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3, 255); + } + + bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) + { + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint32_t table_index0 = block.get_inten_table(0); + const uint32_t table_index1 = block.get_inten_table(1); + + color_rgba subblock_colors0[4]; + color_rgba subblock_colors1[4]; + + if (diff_flag) + { + const uint16_t base_color5 = block.get_base5_color(); + const uint16_t delta_color3 = block.get_delta3_color(); + etc_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + + if (!etc_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) + return false; + } + else + { + const uint16_t base_color4_0 = block.get_base4_color(0); + etc_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + + const uint16_t base_color4_1 = block.get_base4_color(1); + etc_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + if (preserve_alpha) + { + if (flip_flag) + { + for (uint32_t y = 0; y < 2; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); + pDst += 4; + } + + for (uint32_t y = 2; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + } + else + { + if (flip_flag) + { + // 0000 + // 0000 + // 1111 + // 1111 + for (uint32_t y = 0; y < 2; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors0[block.get_selector(2, y)]; + pDst[3] = subblock_colors0[block.get_selector(3, y)]; + pDst += 4; + } + + for (uint32_t y = 2; y < 4; y++) + { + pDst[0] = subblock_colors1[block.get_selector(0, y)]; + pDst[1] = subblock_colors1[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + else + { + // 0011 + // 0011 + // 0011 + // 0011 + for (uint32_t y = 0; y < 4; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + } + + return true; + } + + inline int extend_6_to_8(uint32_t n) + { + return (n << 2) | (n >> 4); + } + + inline int extend_7_to_8(uint32_t n) + { + return (n << 1) | (n >> 6); + } + + inline int extend_4_to_8(uint32_t n) + { + return (n << 4) | n; + } + + uint64_t etc_block::evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index) const + { + color_rgba unpacked_block[16]; + + unpack_etc1(*this, unpacked_block); + + uint64_t total_error = 0; + + if (subblock_index < 0) + { + for (uint32_t i = 0; i < 16; i++) + total_error += color_distance(perceptual, pBlock_pixels[i], unpacked_block[i], false); + } + else + { + const bool flip_bit = get_flip_bit(); + + for (uint32_t i = 0; i < 8; i++) + { + const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; + + total_error += color_distance(perceptual, pBlock_pixels[idx], unpacked_block[idx], false); + } + } + + return total_error; + } + + void etc_block::get_subblock_pixels(color_rgba* pPixels, int subblock_index) const + { + if (subblock_index < 0) + unpack_etc1(*this, pPixels); + else + { + color_rgba unpacked_block[16]; + + unpack_etc1(*this, unpacked_block); + + const bool flip_bit = get_flip_bit(); + + for (uint32_t i = 0; i < 8; i++) + { + const uint32_t idx = g_etc1_pixel_indices[flip_bit][subblock_index][i]; + + pPixels[i] = unpacked_block[idx]; + } + } + } + + bool etc1_optimizer::compute() + { + assert(m_pResult->m_pSelectors); + + if (m_pParams->m_pForce_selectors) + { + assert(m_pParams->m_quality >= cETCQualitySlow); + if (m_pParams->m_quality < cETCQualitySlow) + return false; + } + + const uint32_t n = m_pParams->m_num_src_pixels; + + if (m_pParams->m_cluster_fit) + { + if (m_pParams->m_quality == cETCQualityFast) + compute_internal_cluster_fit(4); + else if (m_pParams->m_quality == cETCQualityMedium) + compute_internal_cluster_fit(16); + else if (m_pParams->m_quality == cETCQualitySlow) + compute_internal_cluster_fit(64); + else + compute_internal_cluster_fit(BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE); + } + else + compute_internal_neighborhood(m_br, m_bg, m_bb); + + if (!m_best_solution.m_valid) + { + m_pResult->m_error = UINT32_MAX; + return false; + } + + //const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; + const uint8_t* pSelectors = m_pParams->m_pForce_selectors ? m_pParams->m_pForce_selectors : &m_best_solution.m_selectors[0]; + +#if defined(DEBUG) || defined(_DEBUG) + { + // sanity check the returned error + color_rgba block_colors[4]; + m_best_solution.m_coords.get_block_colors(block_colors); + + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + uint64_t actual_error = 0; + + bool perceptual; + if (m_pParams->m_quality >= cETCQualityMedium) + perceptual = m_pParams->m_perceptual; + else + perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; + + for (uint32_t i = 0; i < n; i++) + actual_error += color_distance(perceptual, pSrc_pixels[i], block_colors[pSelectors[i]], false); + + assert(actual_error == m_best_solution.m_error); + } +#endif + + m_pResult->m_error = m_best_solution.m_error; + + m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; + m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; + + m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; + memcpy(m_pResult->m_pSelectors, pSelectors, n); + m_pResult->m_n = n; + + return true; + } + + void etc1_optimizer::refine_solution(uint32_t max_refinement_trials) + { + // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. + // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: + // The goal is: + // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 + // Rearranging this: + // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 + // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 + // So what this means: + // optimal_block_color = avg_input - avg_inten_delta + // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. + // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. + // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. + + const uint32_t n = m_pParams->m_num_src_pixels; + + for (uint32_t refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) + { + const uint8_t* pSelectors = &m_best_solution.m_selectors[0]; + const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); + for (uint32_t r = 0; r < n; r++) + { + const uint32_t s = *pSelectors++; + const int yd_temp = pInten_table[s]; + // Compute actual delta being applied to each pixel, taking into account clamping. + delta_sum_r += clamp<int>(base_color.r + yd_temp, 0, 255) - base_color.r; + delta_sum_g += clamp<int>(base_color.g + yd_temp, 0, 255) - base_color.g; + delta_sum_b += clamp<int>(base_color.b + yd_temp, 0, 255) - base_color.b; + } + + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + break; + + const float avg_delta_r_f = static_cast<float>(delta_sum_r) / n; + const float avg_delta_g_f = static_cast<float>(delta_sum_g) / n; + const float avg_delta_b_f = static_cast<float>(delta_sum_b) / n; + const int br1 = clamp<int>(static_cast<int32_t>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = clamp<int>(static_cast<int32_t>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = clamp<int>(static_cast<int32_t>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Refinement trial %u, avg_delta %f %f %f\n", refinement_trial, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); +#endif + + if (!evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution)) + break; + + } // refinement_trial + } + + void etc1_optimizer::compute_internal_neighborhood(int scan_r, int scan_g, int scan_b) + { + if (m_best_solution.m_error == 0) + return; + + //const uint32_t n = m_pParams->m_num_src_pixels; + const int scan_delta_size = m_pParams->m_scan_delta_size; + + // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. + // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. + for (int zdi = 0; zdi < scan_delta_size; zdi++) + { + const int zd = m_pParams->m_pScan_deltas[zdi]; + const int mbb = scan_b + zd; + if (mbb < 0) continue; else if (mbb > m_limit) break; + + for (int ydi = 0; ydi < scan_delta_size; ydi++) + { + const int yd = m_pParams->m_pScan_deltas[ydi]; + const int mbg = scan_g + yd; + if (mbg < 0) continue; else if (mbg > m_limit) break; + + for (int xdi = 0; xdi < scan_delta_size; xdi++) + { + const int xd = m_pParams->m_pScan_deltas[xdi]; + const int mbr = scan_r + xd; + if (mbr < 0) continue; else if (mbr > m_limit) break; + + etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); + + if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) + continue; + + if (m_pParams->m_refinement) + { + refine_solution((m_pParams->m_quality == cETCQualityFast) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2)); + } + + } // xdi + } // ydi + } // zdi + } + + void etc1_optimizer::compute_internal_cluster_fit(uint32_t total_perms_to_try) + { + if ((!m_best_solution.m_valid) || ((m_br != m_best_solution.m_coords.m_unscaled_color.r) || (m_bg != m_best_solution.m_coords.m_unscaled_color.g) || (m_bb != m_best_solution.m_coords.m_unscaled_color.b))) + { + evaluate_solution(etc1_solution_coordinates(m_br, m_bg, m_bb, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); + } + + if ((m_best_solution.m_error == 0) || (!m_best_solution.m_valid)) + return; + + for (uint32_t i = 0; i < total_perms_to_try; i++) + { + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + + const int *pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + const color_rgba base_color(m_best_solution.m_coords.get_scaled_color()); + + const uint8_t *pNum_selectors = g_cluster_fit_order_tab[i].m_v; + + for (uint32_t q = 0; q < 4; q++) + { + const int yd_temp = pInten_table[q]; + + delta_sum_r += pNum_selectors[q] * (clamp<int>(base_color.r + yd_temp, 0, 255) - base_color.r); + delta_sum_g += pNum_selectors[q] * (clamp<int>(base_color.g + yd_temp, 0, 255) - base_color.g); + delta_sum_b += pNum_selectors[q] * (clamp<int>(base_color.b + yd_temp, 0, 255) - base_color.b); + } + + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + continue; + + const float avg_delta_r_f = static_cast<float>(delta_sum_r) / 8; + const float avg_delta_g_f = static_cast<float>(delta_sum_g) / 8; + const float avg_delta_b_f = static_cast<float>(delta_sum_b) / 8; + + const int br1 = clamp<int>(static_cast<int32_t>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = clamp<int>(static_cast<int32_t>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = clamp<int>(static_cast<int32_t>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Second refinement trial %u, avg_delta %f %f %f\n", i, avg_delta_r_f, avg_delta_g_f, avg_delta_b_f); +#endif + + evaluate_solution(etc1_solution_coordinates(br1, bg1, bb1, 0, m_pParams->m_use_color4), m_trial_solution, &m_best_solution); + + if (m_best_solution.m_error == 0) + break; + } + } + + void etc1_optimizer::init(const params& params, results& result) + { + m_pParams = ¶ms; + m_pResult = &result; + + const uint32_t n = m_pParams->m_num_src_pixels; + + m_selectors.resize(n); + m_best_selectors.resize(n); + m_temp_selectors.resize(n); + m_trial_solution.m_selectors.resize(n); + m_best_solution.m_selectors.resize(n); + + m_limit = m_pParams->m_use_color4 ? 15 : 31; + + vec3F avg_color(0.0f); + + m_luma.resize(n); + m_sorted_luma_indices.resize(n); + m_sorted_luma.resize(n); + + int min_r = 255, min_g = 255, min_b = 255; + int max_r = 0, max_g = 0, max_b = 0; + + for (uint32_t i = 0; i < n; i++) + { + const color_rgba& c = m_pParams->m_pSrc_pixels[i]; + + min_r = basisu::minimum<int>(min_r, c.r); + min_g = basisu::minimum<int>(min_g, c.g); + min_b = basisu::minimum<int>(min_b, c.b); + + max_r = basisu::maximum<int>(max_r, c.r); + max_g = basisu::maximum<int>(max_g, c.g); + max_b = basisu::maximum<int>(max_b, c.b); + + const vec3F fc(c.r, c.g, c.b); + + avg_color += fc; + + m_luma[i] = static_cast<uint16_t>(c.r + c.g + c.b); + m_sorted_luma_indices[i] = i; + } + avg_color /= static_cast<float>(n); + m_avg_color = avg_color; + m_max_comp_spread = basisu::maximum(basisu::maximum(max_r - min_r, max_g - min_g), max_b - min_b); + + m_br = clamp<int>(static_cast<uint32_t>(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); + m_bg = clamp<int>(static_cast<uint32_t>(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); + m_bb = clamp<int>(static_cast<uint32_t>(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Avg block color: %u %u %u\n", m_br, m_bg, m_bb); +#endif + + if (m_pParams->m_quality == cETCQualityFast) + { + indirect_sort(n, &m_sorted_luma_indices[0], &m_luma[0]); + + m_pSorted_luma = &m_sorted_luma[0]; + m_pSorted_luma_indices = &m_sorted_luma_indices[0]; + + for (uint32_t i = 0; i < n; i++) + m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; + } + + m_best_solution.m_coords.clear(); + m_best_solution.m_valid = false; + m_best_solution.m_error = UINT64_MAX; + + clear_obj(m_solutions_tried); + } + + // Return false if we've probably already tried this solution, true if we have definitely not. + bool etc1_optimizer::check_for_redundant_solution(const etc1_solution_coordinates& coords) + { + // Hash first 3 bytes of color (RGB) + uint32_t kh = hash_hsieh((uint8_t*)&coords.m_unscaled_color.r, 3); + + uint32_t h0 = kh & cSolutionsTriedHashMask; + uint32_t h1 = (kh >> cSolutionsTriedHashBits) & cSolutionsTriedHashMask; + + // Simple Bloom filter lookup with k=2 + if ( ((m_solutions_tried[h0 >> 3] & (1 << (h0 & 7))) != 0) && + ((m_solutions_tried[h1 >> 3] & (1 << (h1 & 7))) != 0) ) + return false; + + m_solutions_tried[h0 >> 3] |= (1 << (h0 & 7)); + m_solutions_tried[h1 >> 3] |= (1 << (h1 & 7)); + + return true; + } + + static uint8_t g_eval_dist_tables[8][256] = + { + // 99% threshold + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,}, + { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,}, + { 1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,1,1,1,1,1,0,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,}, + { 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,}, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,} + }; + + bool etc1_optimizer::evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (!check_for_redundant_solution(coords)) + return false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval solution: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); +#endif + + trial_solution.m_valid = false; + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; + const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; + const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); +#endif + return false; + } + } + + const color_rgba base_color(coords.get_scaled_color()); + + const uint32_t n = m_pParams->m_num_src_pixels; + assert(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = INT64_MAX; + + const uint8_t *pSelectors_to_use = m_pParams->m_pForce_selectors; + + for (uint32_t inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) + { + if (m_pParams->m_quality <= cETCQualityMedium) + { + if (!g_eval_dist_tables[inten_table][m_max_comp_spread]) + continue; + } +#if 0 + if (m_pParams->m_quality <= cETCQualityMedium) + { + // For tables 5-7, if the max component spread falls within certain ranges, skip the inten table. Statistically they are extremely unlikely to result in lower error. + if (inten_table == 7) + { + if (m_max_comp_spread < 42) + continue; + } + else if (inten_table == 6) + { + if ((m_max_comp_spread >= 12) && (m_max_comp_spread <= 31)) + continue; + } + else if (inten_table == 5) + { + if ((m_max_comp_spread >= 13) && (m_max_comp_spread <= 21)) + continue; + } + } +#endif + + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + color_rgba block_colors[4]; + for (uint32_t s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); + } + + uint64_t total_error = 0; + + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + + if (!g_cpu_supports_sse41) + { + for (uint32_t c = 0; c < n; c++) + { + const color_rgba& src_pixel = *pSrc_pixels++; + + uint32_t best_selector_index = 0; + uint32_t best_error = 0; + + if (pSelectors_to_use) + { + best_selector_index = pSelectors_to_use[c]; + best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[best_selector_index], false); + } + else + { + best_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[0], false); + + uint32_t trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[1], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 1; + } + + trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[2], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 2; + } + + trial_error = color_distance(m_pParams->m_perceptual, src_pixel, block_colors[3], false); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 3; + } + } + + m_temp_selectors[c] = static_cast<uint8_t>(best_selector_index); + + total_error += best_error; + if (total_error >= trial_solution.m_error) + break; + } + } + else + { +#if BASISU_SUPPORT_SSE + if (pSelectors_to_use) + { + if (m_pParams->m_perceptual) + perceptual_distance_rgb_4_N_sse41((int64_t*)&total_error, pSelectors_to_use, block_colors, pSrc_pixels, n, trial_solution.m_error); + else + linear_distance_rgb_4_N_sse41((int64_t*)&total_error, pSelectors_to_use, block_colors, pSrc_pixels, n, trial_solution.m_error); + } + else + { + if (m_pParams->m_perceptual) + find_selectors_perceptual_rgb_4_N_sse41((int64_t*)&total_error, &m_temp_selectors[0], block_colors, pSrc_pixels, n, trial_solution.m_error); + else + find_selectors_linear_rgb_4_N_sse41((int64_t*)&total_error, &m_temp_selectors[0], block_colors, pSrc_pixels, n, trial_solution.m_error); + } +#endif + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); +#endif + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (!check_for_redundant_solution(coords)) + return false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval solution fast: %u %u %u\n", coords.m_unscaled_color.r, coords.m_unscaled_color.g, coords.m_unscaled_color.b); +#endif + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = (int)coords.m_unscaled_color.r - (int)m_pParams->m_base_color5.r; + const int dg = (int)coords.m_unscaled_color.g - (int)m_pParams->m_base_color5.g; + const int db = (int)coords.m_unscaled_color.b - (int)m_pParams->m_base_color5.b; + + if ((minimum(dr, dg, db) < cETC1ColorDeltaMin) || (maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + trial_solution.m_valid = false; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval failed due to constraint from %u %u %u\n", m_pParams->m_base_color5.r, m_pParams->m_base_color5.g, m_pParams->m_base_color5.b); +#endif + return false; + } + } + + const color_rgba base_color(coords.get_scaled_color()); + + const uint32_t n = m_pParams->m_num_src_pixels; + assert(trial_solution.m_selectors.size() == n); + + trial_solution.m_error = UINT64_MAX; + + const bool perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; + + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + uint32_t block_inten[4]; + color_rgba block_colors[4]; + for (uint32_t s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + color_rgba block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 255); + block_colors[s] = block_color; + block_inten[s] = block_color.r + block_color.g + block_color.b; + } + + // evaluate_solution_fast() enforces/assumes a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. + // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. + // 0 1 2 3 + // 01 12 23 + const uint32_t block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; + + uint64_t total_error = 0; + const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; + + if (perceptual) + { + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint32_t min_error = iabs((int)block_inten[0] - (int)m_pSorted_luma[n - 1]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(true, block_colors[0], pSrc_pixels[c], false); + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint32_t min_error = iabs((int)m_pSorted_luma[0] - (int)block_inten[3]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(true, block_colors[3], pSrc_pixels[c], false); + } + else + { + if (!g_cpu_supports_sse41) + { + uint32_t cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint32_t y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + if (++cur_selector > 2) + goto done; + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast<uint8_t>(cur_selector); + total_error += color_distance(true, block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); + } + done: + while (c < n) + { + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += color_distance(true, block_colors[3], pSrc_pixels[sorted_pixel_index], false); + ++c; + } + } + else + { +#if BASISU_SUPPORT_SSE + uint32_t cur_selector = 0, c; + + for (c = 0; c < n; c++) + { + const uint32_t y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + { + if (++cur_selector > 2) + goto done3; + } + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast<uint8_t>(cur_selector); + } + done3: + + while (c < n) + { + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + ++c; + } + + int64_t block_error; + perceptual_distance_rgb_4_N_sse41(&block_error, &m_temp_selectors[0], block_colors, pSrc_pixels, n, INT64_MAX); + total_error += block_error; +#endif + } + } + } + else + { + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint32_t min_error = iabs((int)block_inten[0] - (int)m_pSorted_luma[n - 1]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(block_colors[0], pSrc_pixels[c], false); + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint32_t min_error = iabs((int)m_pSorted_luma[0] - (int)block_inten[3]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint32_t c = 0; c < n; c++) + total_error += color_distance(block_colors[3], pSrc_pixels[c], false); + } + else + { + uint32_t cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint32_t y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + if (++cur_selector > 2) + goto done2; + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast<uint8_t>(cur_selector); + total_error += color_distance(block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); + } + done2: + while (c < n) + { + const uint32_t sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += color_distance(block_colors[3], pSrc_pixels[sorted_pixel_index], false); + ++c; + } + } + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + trial_solution.m_selectors.swap(m_temp_selectors); + trial_solution.m_valid = true; + if (!total_error) + break; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + +#if BASISU_DEBUG_ETC_ENCODER_DEEPER + printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); +#endif + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + uint64_t pack_eac_a8(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) + { + results.m_selectors.resize(num_pixels); + results.m_selectors_temp.resize(num_pixels); + + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < num_pixels; i++) + { + const uint32_t a = pPixels[i]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + results.m_base = min_alpha; + results.m_table = 13; + results.m_multiplier = 1; + for (uint32_t i = 0; i < num_pixels; i++) + results.m_selectors[i] = 4; + return 0; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + uint64_t best_err = UINT64_MAX; + + for (uint32_t table = 0; table < 16; table++) + { + if ((table_mask & (1U << table)) == 0) + continue; + + const float range = (float)(g_etc2_eac_tables[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + const int center = (int)roundf(lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)); + + const int base_min = clamp255(center - base_search_rad); + const int base_max = clamp255(center + base_search_rad); + + const int mul = (int)roundf(alpha_range / range); + const int mul_low = clamp<int>(mul - mul_search_rad, 1, 15); + const int mul_high = clamp<int>(mul + mul_search_rad, 1, 15); + + for (int base = base_min; base <= base_max; base++) + { + for (int multiplier = mul_low; multiplier <= mul_high; multiplier++) + { + uint64_t total_err = 0; + + for (uint32_t i = 0; i < num_pixels; i++) + { + const int a = pPixels[i]; + + uint32_t best_s_err = UINT32_MAX; + uint32_t best_s = 0; + for (uint32_t s = 0; s < 8; s++) + { + const int v = clamp255((int)multiplier * g_etc2_eac_tables[table][s] + (int)base); + + uint32_t err = iabs(a - v); + if (err < best_s_err) + { + best_s_err = err; + best_s = s; + } + } + + results.m_selectors_temp[i] = static_cast<uint8_t>(best_s); + + total_err += best_s_err * best_s_err; + if (total_err >= best_err) + break; + } + + if (total_err < best_err) + { + best_err = total_err; + results.m_base = base; + results.m_multiplier = multiplier; + results.m_table = table; + results.m_selectors.swap(results.m_selectors_temp); + if (!best_err) + return best_err; + } + + } // table + + } // multiplier + + } // base + + return best_err; + } + + void pack_eac_a8(eac_a8_block* pBlock, const uint8_t* pPixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) + { + pack_eac_a8_results results; + pack_eac_a8(results, pPixels, 16, base_search_rad, mul_search_rad, table_mask); + + pBlock->m_base = results.m_base; + pBlock->m_multiplier = results.m_multiplier; + pBlock->m_table = results.m_table; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + pBlock->set_selector(x, y, results.m_selectors[x + y * 4]); + } + +} // namespace basisu diff --git a/thirdparty/basis_universal/basisu_etc.h b/thirdparty/basis_universal/encoder/basisu_etc.h index a202d01f6e..1e3ece43b8 100644 --- a/thirdparty/basis_universal/basisu_etc.h +++ b/thirdparty/basis_universal/encoder/basisu_etc.h @@ -1,5 +1,5 @@ // basis_etc.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,9 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #include "basisu_enc.h" -#include <set> namespace basisu { @@ -116,7 +115,7 @@ namespace basisu { assert((ofs + num) <= 64U); assert(num && (num < 32U)); - return (read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); + return (uint32_t)(read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); } inline void set_general_bits(uint32_t ofs, uint32_t num, uint32_t bits) @@ -266,6 +265,27 @@ namespace basisu p[-2] |= (msb << byte_bit_ofs); } + // Selector "etc1_val" ranges from 0-3 and is a direct (raw) ETC1 selector. + inline void set_raw_selector(uint32_t x, uint32_t y, uint32_t etc1_val) + { + assert((x | y | etc1_val) < 4); + const uint32_t bit_index = x * 4 + y; + + uint8_t* p = &m_bytes[7 - (bit_index >> 3)]; + + const uint32_t byte_bit_ofs = bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + inline uint32_t get_raw_selector_bits() const { return m_bytes[4] | (m_bytes[5] << 8) | (m_bytes[6] << 16) | (m_bytes[7] << 24); @@ -622,6 +642,23 @@ namespace basisu return true; } + bool set_block_color5_clamp(const color_rgba &c0_unscaled, const color_rgba &c1_unscaled) + { + set_diff_bit(true); + set_base5_color(pack_color5(c0_unscaled, false)); + + int dr = c1_unscaled.r - c0_unscaled.r; + int dg = c1_unscaled.g - c0_unscaled.g; + int db = c1_unscaled.b - c0_unscaled.b; + + dr = clamp<int>(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + dg = clamp<int>(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + db = clamp<int>(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + + set_delta3_color(pack_delta3(dr, dg, db)); + + return true; + } color_rgba get_selector_color(uint32_t x, uint32_t y, uint32_t s) const { color_rgba block_colors[4]; @@ -720,7 +757,7 @@ namespace basisu } }; - typedef std::vector<etc_block> etc_block_vec; + typedef basisu::vector<etc_block> etc_block_vec; // Returns false if the unpack fails (could be bogus data or ETC2) bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha = false); @@ -844,10 +881,10 @@ namespace basisu bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); } const int* pInten_table = g_etc1_inten_tables[m_inten_table]; - pBlock_colors[0].set((uint8_t)(br + pInten_table[0]), (uint8_t)(bg + pInten_table[0]), (uint8_t)(bb + pInten_table[0]), 255); - pBlock_colors[1].set((uint8_t)(br + pInten_table[1]), (uint8_t)(bg + pInten_table[1]), (uint8_t)(bb + pInten_table[1]), 255); - pBlock_colors[2].set((uint8_t)(br + pInten_table[2]), (uint8_t)(bg + pInten_table[2]), (uint8_t)(bb + pInten_table[2]), 255); - pBlock_colors[3].set((uint8_t)(br + pInten_table[3]), (uint8_t)(bg + pInten_table[3]), (uint8_t)(bb + pInten_table[3]), 255); + pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0], 255); + pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1], 255); + pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2], 255); + pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3], 255); } color_rgba m_unscaled_color; @@ -914,9 +951,6 @@ namespace basisu m_refinement = true; m_pForce_selectors = nullptr; - - m_pEval_solution_override = nullptr; - m_pEval_solution_override_data = nullptr; } uint32_t m_num_src_pixels; @@ -932,9 +966,6 @@ namespace basisu bool m_refinement; const uint8_t* m_pForce_selectors; - - evaluate_solution_override_func m_pEval_solution_override; - void *m_pEval_solution_override_data; }; struct results @@ -970,7 +1001,7 @@ namespace basisu } etc1_solution_coordinates m_coords; - std::vector<uint8_t> m_selectors; + basisu::vector<uint8_t> m_selectors; uint64_t m_error; bool m_valid; @@ -1001,33 +1032,36 @@ namespace basisu vec3F m_avg_color; int m_br, m_bg, m_bb; - std::vector<uint16_t> m_luma; - std::vector<uint32_t> m_sorted_luma; - std::vector<uint32_t> m_sorted_luma_indices; + int m_max_comp_spread; + basisu::vector<uint16_t> m_luma; + basisu::vector<uint32_t> m_sorted_luma; + basisu::vector<uint32_t> m_sorted_luma_indices; const uint32_t* m_pSorted_luma_indices; uint32_t* m_pSorted_luma; - std::vector<uint8_t> m_selectors; - std::vector<uint8_t> m_best_selectors; + basisu::vector<uint8_t> m_selectors; + basisu::vector<uint8_t> m_best_selectors; potential_solution m_best_solution; potential_solution m_trial_solution; - std::vector<uint8_t> m_temp_selectors; - - std::set<uint32_t> m_solutions_tried; + basisu::vector<uint8_t> m_temp_selectors; + enum { cSolutionsTriedHashBits = 10, cTotalSolutionsTriedHashSize = 1 << cSolutionsTriedHashBits, cSolutionsTriedHashMask = cTotalSolutionsTriedHashSize - 1 }; + uint8_t m_solutions_tried[cTotalSolutionsTriedHashSize / 8]; + void get_nearby_inten_tables(uint32_t idx, int &first_inten_table, int &last_inten_table) { first_inten_table = maximum<int>(idx - 1, 0); last_inten_table = minimum<int>(cETC1IntenModifierValues, idx + 1); } - + + bool check_for_redundant_solution(const etc1_solution_coordinates& coords); bool evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); inline bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) { - if (m_pParams->m_quality >= cETCQualitySlow) + if (m_pParams->m_quality >= cETCQualityMedium) return evaluate_solution_slow(coords, trial_solution, pBest_solution); else return evaluate_solution_fast(coords, trial_solution, pBest_solution); @@ -1042,5 +1076,77 @@ namespace basisu { etc1_optimizer m_optimizer; }; + + void pack_etc1_solid_color_init(); + uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor); + + // ETC EAC + extern const int8_t g_etc2_eac_tables[16][8]; + extern const int8_t g_etc2_eac_tables8[16][8]; + + const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7; + + struct eac_a8_block + { + uint16_t m_base : 8; + uint16_t m_table : 4; + uint16_t m_multiplier : 4; + + uint8_t m_selectors[6]; + + inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const + { + assert((x < 4) && (y < 4)); + return static_cast<uint32_t>((selector_bits >> (45 - (y + x * 4) * 3)) & 7); + } + + inline uint64_t get_selector_bits() const + { + uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) | ((uint64_t)m_selectors[2] << 24) | ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5]; + return pixels; + } + + inline void set_selector_bits(uint64_t pixels) + { + m_selectors[0] = (uint8_t)(pixels >> 40); + m_selectors[1] = (uint8_t)(pixels >> 32); + m_selectors[2] = (uint8_t)(pixels >> 24); + m_selectors[3] = (uint8_t)(pixels >> 16); + m_selectors[4] = (uint8_t)(pixels >> 8); + m_selectors[5] = (uint8_t)(pixels); + } + + void set_selector(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 8)); + + const uint32_t ofs = 45 - (y + x * 4) * 3; + + uint64_t pixels = get_selector_bits(); + + pixels &= ~(7ULL << ofs); + pixels |= (static_cast<uint64_t>(s) << ofs); + + set_selector_bits(pixels); + } + }; + + struct etc2_rgba_block + { + eac_a8_block m_alpha; + etc_block m_rgb; + }; + struct pack_eac_a8_results + { + uint32_t m_base; + uint32_t m_table; + uint32_t m_multiplier; + uint8_vec m_selectors; + uint8_vec m_selectors_temp; + }; + + uint64_t pack_eac_a8(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); + void pack_eac_a8(eac_a8_block* pBlock, const uint8_t* pPixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); + } // namespace basisu diff --git a/thirdparty/basis_universal/basisu_frontend.cpp b/thirdparty/basis_universal/encoder/basisu_frontend.cpp index 6f7a9bf889..324fc8e447 100644 --- a/thirdparty/basis_universal/basisu_frontend.cpp +++ b/thirdparty/basis_universal/encoder/basisu_frontend.cpp @@ -1,5 +1,5 @@ // basisu_frontend.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,11 +17,16 @@ // This code originally supported full ETC1 and ETC1S, so there's some legacy stuff to be cleaned up in here. // Add endpoint tiling support (where we force adjacent blocks to use the same endpoints during quantization), for a ~10% or more increase in bitrate at same SSIM. The backend already supports this. // -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #include "basisu_frontend.h" #include <unordered_set> #include <unordered_map> +#if BASISU_SUPPORT_SSE +#define CPPSPMD_NAME(a) a##_sse41 +#include "basisu_kernels_declares.h" +#endif + #define BASISU_FRONTEND_VERIFY(c) do { if (!(c)) handle_verify_failure(__LINE__); } while(0) namespace basisu @@ -29,10 +34,11 @@ namespace basisu const uint32_t cMaxCodebookCreationThreads = 8; const uint32_t BASISU_MAX_ENDPOINT_REFINEMENT_STEPS = 3; - const uint32_t BASISU_MAX_SELECTOR_REFINEMENT_STEPS = 3; + //const uint32_t BASISU_MAX_SELECTOR_REFINEMENT_STEPS = 3; const uint32_t BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE = 16; - const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE = 16; + const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 = 32; + const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT = 16; // TODO - How to handle internal verifies in the basisu lib static inline void handle_verify_failure(int line) @@ -57,14 +63,14 @@ namespace basisu uint32_t tv = size / sizeof(vec6F_quantizer::training_vec_with_weight); - std::vector<vec6F_quantizer::training_vec_with_weight> v(tv); + basisu::vector<vec6F_quantizer::training_vec_with_weight> v(tv); fread(&v[0], 1, sizeof(v[0]) * tv, pFile); for (uint32_t i = 0; i < tv; i++) m_endpoint_clusterizer.add_training_vec(v[i].first, v[i].second); m_endpoint_clusterizer.generate(16128); - std::vector<uint_vec> codebook; + basisu::vector<uint_vec> codebook; m_endpoint_clusterizer.retrieve(codebook); printf("Generated %u entries\n", (uint32_t)codebook.size()); @@ -78,6 +84,7 @@ namespace basisu { if (!p.m_pGlobal_sel_codebook) { + debug_printf("basisu_frontend::init: No global sel codebook!\n"); assert(0); return false; } @@ -128,11 +135,19 @@ namespace basisu case 2: { m_endpoint_refinement = true; + m_use_hierarchical_endpoint_codebooks = true; + m_use_hierarchical_selector_codebooks = true; + + break; + } + case 3: + { + m_endpoint_refinement = true; m_use_hierarchical_endpoint_codebooks = false; m_use_hierarchical_selector_codebooks = false; break; } - case 3: + case 4: { m_endpoint_refinement = true; m_use_hierarchical_endpoint_codebooks = true; @@ -141,7 +156,7 @@ namespace basisu m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS; break; } - case 4: + case 5: { m_endpoint_refinement = true; m_use_hierarchical_endpoint_codebooks = false; @@ -150,7 +165,8 @@ namespace basisu m_num_selector_codebook_iterations = BASISU_MAX_ENDPOINT_REFINEMENT_STEPS; break; } - case 5: + case 6: + default: { m_endpoint_refinement = true; m_use_hierarchical_endpoint_codebooks = false; @@ -180,106 +196,113 @@ namespace basisu init_etc1_images(); - init_endpoint_training_vectors(); - - generate_endpoint_clusters(); - - for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < m_num_endpoint_codebook_iterations; refine_endpoint_step++) + if (m_params.m_pGlobal_codebooks) { - BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); - - if (refine_endpoint_step) - { - introduce_new_endpoint_clusters(); - } - - generate_endpoint_codebook(refine_endpoint_step); - - if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization)) - { - char buf[256]; - snprintf(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step); - dump_endpoint_clusterization_visualization(buf, false); - } - - bool early_out = false; + init_global_codebooks(); + } + else + { + init_endpoint_training_vectors(); - if (m_endpoint_refinement) + generate_endpoint_clusters(); + + for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < m_num_endpoint_codebook_iterations; refine_endpoint_step++) { - //dump_endpoint_clusterization_visualization("endpoint_clusters_before_refinement.png"); - - if (!refine_endpoint_clusterization()) - early_out = true; + BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); - if ((m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) && (!refine_endpoint_step) && (m_num_endpoint_codebook_iterations == 1)) + if (refine_endpoint_step) { - eliminate_redundant_or_empty_endpoint_clusters(); - generate_endpoint_codebook(refine_endpoint_step); + introduce_new_endpoint_clusters(); } + generate_endpoint_codebook(refine_endpoint_step); + if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization)) { char buf[256]; - snprintf(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step); - + snprintf(buf, sizeof(buf), "endpoint_cluster_vis_pre_%u.png", refine_endpoint_step); dump_endpoint_clusterization_visualization(buf, false); - snprintf(buf, sizeof(buf), "endpoint_cluster_colors_vis_post_%u.png", refine_endpoint_step); + } + + bool early_out = false; - dump_endpoint_clusterization_visualization(buf, true); + if (m_endpoint_refinement) + { + //dump_endpoint_clusterization_visualization("endpoint_clusters_before_refinement.png"); + + if (!refine_endpoint_clusterization()) + early_out = true; + + if ((m_params.m_tex_type == basist::cBASISTexTypeVideoFrames) && (!refine_endpoint_step) && (m_num_endpoint_codebook_iterations == 1)) + { + eliminate_redundant_or_empty_endpoint_clusters(); + generate_endpoint_codebook(refine_endpoint_step); + } + + if ((m_params.m_debug_images) && (m_params.m_dump_endpoint_clusterization)) + { + char buf[256]; + snprintf(buf, sizeof(buf), "endpoint_cluster_vis_post_%u.png", refine_endpoint_step); + + dump_endpoint_clusterization_visualization(buf, false); + snprintf(buf, sizeof(buf), "endpoint_cluster_colors_vis_post_%u.png", refine_endpoint_step); + + dump_endpoint_clusterization_visualization(buf, true); + } } - } - eliminate_redundant_or_empty_endpoint_clusters(); + eliminate_redundant_or_empty_endpoint_clusters(); - if (m_params.m_debug_stats) - debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size()); + if (m_params.m_debug_stats) + debug_printf("Total endpoint clusters: %u\n", (uint32_t)m_endpoint_clusters.size()); - if (early_out) - break; - } + if (early_out) + break; + } - BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); + BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); - generate_block_endpoint_clusters(); + generate_block_endpoint_clusters(); - create_initial_packed_texture(); + create_initial_packed_texture(); - generate_selector_clusters(); + generate_selector_clusters(); - if (m_use_hierarchical_selector_codebooks) - compute_selector_clusters_within_each_parent_cluster(); + if (m_use_hierarchical_selector_codebooks) + compute_selector_clusters_within_each_parent_cluster(); - if (m_params.m_compression_level == 0) - { - create_optimized_selector_codebook(0); + if (m_params.m_compression_level == 0) + { + create_optimized_selector_codebook(0); - find_optimal_selector_clusters_for_each_block(); + find_optimal_selector_clusters_for_each_block(); - introduce_special_selector_clusters(); - } - else - { - const uint32_t num_refine_selector_steps = m_params.m_pGlobal_sel_codebook ? 1 : m_num_selector_codebook_iterations; - for (uint32_t refine_selector_steps = 0; refine_selector_steps < num_refine_selector_steps; refine_selector_steps++) + introduce_special_selector_clusters(); + } + else { - create_optimized_selector_codebook(refine_selector_steps); + const uint32_t num_refine_selector_steps = m_params.m_pGlobal_sel_codebook ? 1 : m_num_selector_codebook_iterations; + for (uint32_t refine_selector_steps = 0; refine_selector_steps < num_refine_selector_steps; refine_selector_steps++) + { + create_optimized_selector_codebook(refine_selector_steps); - find_optimal_selector_clusters_for_each_block(); + find_optimal_selector_clusters_for_each_block(); - introduce_special_selector_clusters(); + introduce_special_selector_clusters(); - if ((m_params.m_compression_level >= 3) || (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) - { - if (!refine_block_endpoints_given_selectors()) - break; + if ((m_params.m_compression_level >= 4) || (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) + { + if (!refine_block_endpoints_given_selectors()) + break; + } } } - } - - optimize_selector_codebook(); + + optimize_selector_codebook(); - if (m_params.m_debug_stats) - debug_printf("Total selector clusters: %u\n", (uint32_t)m_selector_cluster_indices.size()); + if (m_params.m_debug_stats) + debug_printf("Total selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size()); + } finalize(); @@ -294,6 +317,259 @@ namespace basisu return true; } + bool basisu_frontend::init_global_codebooks() + { + const basist::basisu_lowlevel_etc1s_transcoder* pTranscoder = m_params.m_pGlobal_codebooks; + + const basist::basisu_lowlevel_etc1s_transcoder::endpoint_vec& endpoints = pTranscoder->get_endpoints(); + const basist::basisu_lowlevel_etc1s_transcoder::selector_vec& selectors = pTranscoder->get_selectors(); + + m_endpoint_cluster_etc_params.resize(endpoints.size()); + for (uint32_t i = 0; i < endpoints.size(); i++) + { + m_endpoint_cluster_etc_params[i].m_inten_table[0] = endpoints[i].m_inten5; + m_endpoint_cluster_etc_params[i].m_inten_table[1] = endpoints[i].m_inten5; + + m_endpoint_cluster_etc_params[i].m_color_unscaled[0].set(endpoints[i].m_color5.r, endpoints[i].m_color5.g, endpoints[i].m_color5.b, 255); + m_endpoint_cluster_etc_params[i].m_color_used[0] = true; + m_endpoint_cluster_etc_params[i].m_valid = true; + } + + m_optimized_cluster_selectors.resize(selectors.size()); + for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++) + { + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + m_optimized_cluster_selectors[i].set_selector(x, y, selectors[i].get_selector(x, y)); + } + + m_block_endpoint_clusters_indices.resize(m_total_blocks); + + m_orig_encoded_blocks.resize(m_total_blocks); + + m_block_selector_cluster_index.resize(m_total_blocks); + +#if 0 + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N); + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job([this, first_index, last_index] { +#endif + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const etc_block& blk = m_etc1_blocks_etc1s[block_index]; + + const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + + etc_block trial_blk; + trial_blk.set_block_color5_etc1s(blk.m_color_unscaled[0]); + trial_blk.set_flip_bit(true); + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + + for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++) + { + trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits()); + + const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual); + if (cur_err < best_err) + { + best_err = cur_err; + best_index = i; + if (!cur_err) + break; + } + + } // block_index + + m_block_selector_cluster_index[block_index] = best_index; + } + +#ifndef __EMSCRIPTEN__ + }); +#endif + + } + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->wait_for_all(); +#endif + + m_encoded_blocks.resize(m_total_blocks); + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + const uint32_t selector_index = m_block_selector_cluster_index[block_index]; + + etc_block& blk = m_encoded_blocks[block_index]; + + blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]); + blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]); + blk.set_flip_bit(true); + blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits()); + } +#endif + + // HACK HACK + const uint32_t NUM_PASSES = 3; + for (uint32_t pass = 0; pass < NUM_PASSES; pass++) + { + debug_printf("init_global_codebooks: pass %u\n", pass); + + const uint32_t N = 128; + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N); + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] { +#endif + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const etc_block& blk = pass ? m_encoded_blocks[block_index] : m_etc1_blocks_etc1s[block_index]; + const uint32_t blk_raw_selector_bits = blk.get_raw_selector_bits(); + + etc_block trial_blk(blk); + trial_blk.set_raw_selector_bits(blk_raw_selector_bits); + trial_blk.set_flip_bit(true); + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + etc_block best_block(trial_blk); + + for (uint32_t i = 0; i < m_endpoint_cluster_etc_params.size(); i++) + { + if (m_endpoint_cluster_etc_params[i].m_inten_table[0] > blk.get_inten_table(0)) + continue; + + trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[i].m_color_unscaled[0]); + trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[i].m_inten_table[0]); + + const color_rgba* pSource_pixels = get_source_pixel_block(block_index).get_ptr(); + uint64_t cur_err; + if (!pass) + cur_err = trial_blk.determine_selectors(pSource_pixels, m_params.m_perceptual); + else + cur_err = trial_blk.evaluate_etc1_error(pSource_pixels, m_params.m_perceptual); + + if (cur_err < best_err) + { + best_err = cur_err; + best_index = i; + best_block = trial_blk; + + if (!cur_err) + break; + } + } + + m_block_endpoint_clusters_indices[block_index][0] = best_index; + m_block_endpoint_clusters_indices[block_index][1] = best_index; + + m_orig_encoded_blocks[block_index] = best_block; + + } // block_index + +#ifndef __EMSCRIPTEN__ + }); +#endif + + } + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->wait_for_all(); +#endif + + m_endpoint_clusters.resize(0); + m_endpoint_clusters.resize(endpoints.size()); + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t endpoint_cluster_index = m_block_endpoint_clusters_indices[block_index][0]; + m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2); + m_endpoint_clusters[endpoint_cluster_index].push_back(block_index * 2 + 1); + } + + m_block_selector_cluster_index.resize(m_total_blocks); + + for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N); + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job([this, first_index, last_index] { +#endif + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const uint32_t block_endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + + etc_block trial_blk; + trial_blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_color_unscaled[0]); + trial_blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[block_endpoint_index].m_inten_table[0]); + trial_blk.set_flip_bit(true); + + uint64_t best_err = UINT64_MAX; + uint32_t best_index = 0; + + for (uint32_t i = 0; i < m_optimized_cluster_selectors.size(); i++) + { + trial_blk.set_raw_selector_bits(m_optimized_cluster_selectors[i].get_raw_selector_bits()); + + const uint64_t cur_err = trial_blk.evaluate_etc1_error(get_source_pixel_block(block_index).get_ptr(), m_params.m_perceptual); + if (cur_err < best_err) + { + best_err = cur_err; + best_index = i; + if (!cur_err) + break; + } + + } // block_index + + m_block_selector_cluster_index[block_index] = best_index; + } + +#ifndef __EMSCRIPTEN__ + }); +#endif + + } + +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->wait_for_all(); +#endif + + m_encoded_blocks.resize(m_total_blocks); + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) + { + const uint32_t endpoint_index = m_block_endpoint_clusters_indices[block_index][0]; + const uint32_t selector_index = m_block_selector_cluster_index[block_index]; + + etc_block& blk = m_encoded_blocks[block_index]; + + blk.set_block_color5_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_color_unscaled[0]); + blk.set_inten_tables_etc1s(m_endpoint_cluster_etc_params[endpoint_index].m_inten_table[0]); + blk.set_flip_bit(true); + blk.set_raw_selector_bits(m_optimized_cluster_selectors[selector_index].get_raw_selector_bits()); + } + + } // pass + + m_selector_cluster_block_indices.resize(selectors.size()); + for (uint32_t block_index = 0; block_index < m_etc1_blocks_etc1s.size(); block_index++) + m_selector_cluster_block_indices[m_block_selector_cluster_index[block_index]].push_back(block_index); + + return true; + } + void basisu_frontend::introduce_special_selector_clusters() { debug_printf("introduce_special_selector_clusters\n"); @@ -302,7 +578,7 @@ namespace basisu return; uint32_t total_blocks_relocated = 0; - const uint32_t initial_selector_clusters = (uint32_t)m_selector_cluster_indices.size(); + const uint32_t initial_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size(); bool_vec block_relocated_flags(m_total_blocks); @@ -328,7 +604,7 @@ namespace basisu m_optimized_cluster_selectors.push_back(blk); - vector_ensure_element_is_valid(m_selector_cluster_indices, new_selector_cluster_index); + vector_ensure_element_is_valid(m_selector_cluster_block_indices, new_selector_cluster_index); for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { @@ -357,14 +633,14 @@ namespace basisu // Change the block to use the new cluster m_block_selector_cluster_index[block_index] = new_selector_cluster_index; - m_selector_cluster_indices[new_selector_cluster_index].push_back(block_index); + m_selector_cluster_block_indices[new_selector_cluster_index].push_back(block_index); block_relocated_flags[block_index] = true; #if 0 - int j = vector_find(m_selector_cluster_indices[old_selector_cluster_index], block_index); + int j = vector_find(m_selector_cluster_block_indices[old_selector_cluster_index], block_index); if (j >= 0) - m_selector_cluster_indices[old_selector_cluster_index].erase(m_selector_cluster_indices[old_selector_cluster_index].begin() + j); + m_selector_cluster_block_indices[old_selector_cluster_index].erase(m_selector_cluster_block_indices[old_selector_cluster_index].begin() + j); #endif total_blocks_relocated++; @@ -381,7 +657,7 @@ namespace basisu for (int selector_cluster_index = 0; selector_cluster_index < (int)initial_selector_clusters; selector_cluster_index++) { - uint_vec& block_indices = m_selector_cluster_indices[selector_cluster_index]; + uint_vec& block_indices = m_selector_cluster_block_indices[selector_cluster_index]; uint32_t dst_ofs = 0; @@ -399,6 +675,7 @@ namespace basisu debug_printf("Total blocks relocated to new flat selector clusters: %u\n", total_blocks_relocated); } + // This method will change the number and ordering of the selector codebook clusters. void basisu_frontend::optimize_selector_codebook() { debug_printf("optimize_selector_codebook\n"); @@ -436,15 +713,17 @@ namespace basisu new_to_old.push_back(i); } + debug_printf("Original selector clusters: %u, new cluster selectors: %u\n", orig_total_selector_clusters, total_new_entries); + for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++) { BASISU_FRONTEND_VERIFY((old_to_new[m_block_selector_cluster_index[i]] >= 0) && (old_to_new[m_block_selector_cluster_index[i]] < (int)total_new_entries)); m_block_selector_cluster_index[i] = old_to_new[m_block_selector_cluster_index[i]]; } - std::vector<etc_block> new_optimized_cluster_selectors(m_optimized_cluster_selectors.size() ? total_new_entries : 0); + basisu::vector<etc_block> new_optimized_cluster_selectors(m_optimized_cluster_selectors.size() ? total_new_entries : 0); basist::etc1_global_selector_codebook_entry_id_vec new_optimized_cluster_selector_global_cb_ids(m_optimized_cluster_selector_global_cb_ids.size() ? total_new_entries : 0); - std::vector<uint_vec> new_selector_cluster_indices(m_selector_cluster_indices.size() ? total_new_entries : 0); + basisu::vector<uint_vec> new_selector_cluster_indices(m_selector_cluster_block_indices.size() ? total_new_entries : 0); bool_vec new_selector_cluster_uses_global_cb(m_selector_cluster_uses_global_cb.size() ? total_new_entries : 0); for (uint32_t i = 0; i < total_new_entries; i++) @@ -455,24 +734,40 @@ namespace basisu if (m_optimized_cluster_selector_global_cb_ids.size()) new_optimized_cluster_selector_global_cb_ids[i] = m_optimized_cluster_selector_global_cb_ids[new_to_old[i]]; - if (m_selector_cluster_indices.size()) - new_selector_cluster_indices[i] = m_selector_cluster_indices[new_to_old[i]]; + //if (m_selector_cluster_block_indices.size()) + // new_selector_cluster_indices[i] = m_selector_cluster_block_indices[new_to_old[i]]; if (m_selector_cluster_uses_global_cb.size()) new_selector_cluster_uses_global_cb[i] = m_selector_cluster_uses_global_cb[new_to_old[i]]; } + for (uint32_t i = 0; i < m_block_selector_cluster_index.size(); i++) + { + new_selector_cluster_indices[m_block_selector_cluster_index[i]].push_back(i); + } + m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors); m_optimized_cluster_selector_global_cb_ids.swap(new_optimized_cluster_selector_global_cb_ids); - m_selector_cluster_indices.swap(new_selector_cluster_indices); + m_selector_cluster_block_indices.swap(new_selector_cluster_indices); m_selector_cluster_uses_global_cb.swap(new_selector_cluster_uses_global_cb); - + + // This isn't strictly necessary - doing it for completeness/future sanity. + if (m_selector_clusters_within_each_parent_cluster.size()) + { + for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++) + for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++) + m_selector_clusters_within_each_parent_cluster[i][j] = old_to_new[m_selector_clusters_within_each_parent_cluster[i][j]]; + } + debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries); } void basisu_frontend::init_etc1_images() { debug_printf("basisu_frontend::init_etc1_images\n"); + + interval_timer tm; + tm.start(); m_etc1_blocks_etc1s.resize(m_total_blocks); @@ -481,8 +776,10 @@ namespace basisu { const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N); - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { @@ -494,6 +791,8 @@ namespace basisu if (m_params.m_compression_level == 0) optimizer_params.m_quality = cETCQualityFast; + else if (m_params.m_compression_level == 1) + optimizer_params.m_quality = cETCQualityMedium; else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) optimizer_params.m_quality = cETCQualityUber; @@ -506,8 +805,9 @@ namespace basisu optimizer_results.m_n = 16; optimizer.init(optimizer_params, optimizer_results); - optimizer.compute(); - + if (!optimizer.compute()) + BASISU_FRONTEND_VERIFY(false); + etc_block &blk = m_etc1_blocks_etc1s[block_index]; memset(&blk, 0, sizeof(blk)); @@ -520,10 +820,17 @@ namespace basisu blk.set_selector(x, y, selectors[x + y * 4]); } +#ifndef __EMSCRIPTEN__ } ); +#endif + } - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif + + debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs()); } void basisu_frontend::init_endpoint_training_vectors() @@ -540,7 +847,9 @@ namespace basisu const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N); +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { @@ -562,11 +871,15 @@ namespace basisu } // block_index; +#ifndef __EMSCRIPTEN__ } ); +#endif } // block_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif } void basisu_frontend::generate_endpoint_clusters() @@ -649,7 +962,7 @@ namespace basisu for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++) { - const std::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index]; + const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index]; for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -707,17 +1020,19 @@ namespace basisu const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index] { +#endif for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { - const std::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index]; + const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index]; assert(cluster_indices.size()); for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { - std::vector<color_rgba> cluster_pixels(8); + basisu::vector<color_rgba> cluster_pixels(8); const uint32_t block_index = cluster_indices[cluster_indices_iter] >> 1; const uint32_t subblock_index = cluster_indices[cluster_indices_iter] & 1; @@ -775,10 +1090,15 @@ namespace basisu } } // cluster_index +#ifndef __EMSCRIPTEN__ } ); +#endif + } // cluster_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif vector_sort(m_subblock_endpoint_quant_err_vec); } @@ -837,7 +1157,7 @@ namespace basisu continue; #endif - const uint32_t new_endpoint_cluster_index = (uint32_t)m_endpoint_clusters.size(); + //const uint32_t new_endpoint_cluster_index = (uint32_t)m_endpoint_clusters.size(); enlarge_vector(m_endpoint_clusters, 1)->push_back(training_vector_index); enlarge_vector(m_endpoint_cluster_etc_params, 1); @@ -893,18 +1213,20 @@ namespace basisu { const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, step ] { +#endif for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { - const std::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index]; + const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index]; BASISU_FRONTEND_VERIFY(cluster_indices.size()); const uint32_t total_pixels = (uint32_t)cluster_indices.size() * 8; - std::vector<color_rgba> cluster_pixels(total_pixels); + basisu::vector<color_rgba> cluster_pixels(total_pixels); for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -935,20 +1257,21 @@ namespace basisu cluster_optimizer_params.m_use_color4 = false; cluster_optimizer_params.m_perceptual = m_params.m_perceptual; - if (m_params.m_compression_level == 0) + if (m_params.m_compression_level <= 1) cluster_optimizer_params.m_quality = cETCQualityMedium; else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) cluster_optimizer_params.m_quality = cETCQualityUber; etc1_optimizer::results cluster_optimizer_results; - std::vector<uint8_t> cluster_selectors(total_pixels); + basisu::vector<uint8_t> cluster_selectors(total_pixels); cluster_optimizer_results.m_n = total_pixels; cluster_optimizer_results.m_pSelectors = &cluster_selectors[0]; optimizer.init(cluster_optimizer_params, cluster_optimizer_results); - optimizer.compute(); + if (!optimizer.compute()) + BASISU_FRONTEND_VERIFY(false); new_subblock_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled; new_subblock_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table; @@ -1012,20 +1335,24 @@ namespace basisu } // cluster_index +#ifndef __EMSCRIPTEN__ } ); +#endif } // cluster_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif } bool basisu_frontend::check_etc1s_constraints() const { - std::vector<vec2U> block_clusters(m_total_blocks); + basisu::vector<vec2U> block_clusters(m_total_blocks); for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++) { - const std::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index]; + const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index]; for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -1053,11 +1380,11 @@ namespace basisu if (m_use_hierarchical_endpoint_codebooks) compute_endpoint_clusters_within_each_parent_cluster(); - std::vector<vec2U> block_clusters(m_total_blocks); + basisu::vector<vec2U> block_clusters(m_total_blocks); for (int cluster_index = 0; cluster_index < static_cast<int>(m_endpoint_clusters.size()); cluster_index++) { - const std::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index]; + const basisu::vector<uint32_t>& cluster_indices = m_endpoint_clusters[cluster_index]; for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -1081,19 +1408,19 @@ namespace basisu const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N); +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &best_cluster_indices, &block_clusters] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { - const bool is_flipped = true; - const uint32_t cluster_index = block_clusters[block_index][0]; BASISU_FRONTEND_VERIFY(cluster_index == block_clusters[block_index][1]); - const color_rgba *subblock_pixels = get_source_pixel_block(block_index).get_ptr(); + const color_rgba *pSubblock_pixels = get_source_pixel_block(block_index).get_ptr(); const uint32_t num_subblock_pixels = 16; - uint64_t best_cluster_err = UINT64_MAX; + uint64_t best_cluster_err = INT64_MAX; uint32_t best_cluster_index = 0; const uint32_t block_parent_endpoint_cluster_index = m_block_parent_endpoint_cluster.size() ? m_block_parent_endpoint_cluster[block_index] : 0; @@ -1116,19 +1443,20 @@ namespace basisu // Can't assign it here - may result in too much error when selector quant occurs if (cluster_etc_inten > m_endpoint_cluster_etc_params[cluster_index].m_inten_table[0]) { - total_err = UINT64_MAX; + total_err = INT64_MAX; goto skip_cluster; } etc_block::get_block_colors5(subblock_colors, cluster_etc_base_color, cluster_etc_inten); - + +#if 0 for (uint32_t p = 0; p < num_subblock_pixels; p++) { uint64_t best_err = UINT64_MAX; for (uint32_t r = low_selector; r <= high_selector; r++) { - uint64_t err = color_distance(m_params.m_perceptual, subblock_pixels[p], subblock_colors[r], false); + uint64_t err = color_distance(m_params.m_perceptual, pSubblock_pixels[p], subblock_colors[r], false); best_err = minimum(best_err, err); if (!best_err) break; @@ -1138,6 +1466,64 @@ namespace basisu if (total_err > best_cluster_err) break; } // p +#else + if (m_params.m_perceptual) + { + if (!g_cpu_supports_sse41) + { + for (uint32_t p = 0; p < num_subblock_pixels; p++) + { + uint64_t best_err = UINT64_MAX; + + for (uint32_t r = low_selector; r <= high_selector; r++) + { + uint64_t err = color_distance(true, pSubblock_pixels[p], subblock_colors[r], false); + best_err = minimum(best_err, err); + if (!best_err) + break; + } + + total_err += best_err; + if (total_err > best_cluster_err) + break; + } // p + } + else + { +#if BASISU_SUPPORT_SSE + find_lowest_error_perceptual_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err); +#endif + } + } + else + { + if (!g_cpu_supports_sse41) + { + for (uint32_t p = 0; p < num_subblock_pixels; p++) + { + uint64_t best_err = UINT64_MAX; + + for (uint32_t r = low_selector; r <= high_selector; r++) + { + uint64_t err = color_distance(false, pSubblock_pixels[p], subblock_colors[r], false); + best_err = minimum(best_err, err); + if (!best_err) + break; + } + + total_err += best_err; + if (total_err > best_cluster_err) + break; + } // p + } + else + { +#if BASISU_SUPPORT_SSE + find_lowest_error_linear_rgb_4_N_sse41((int64_t*)&total_err, subblock_colors, pSubblock_pixels, num_subblock_pixels, best_cluster_err); +#endif + } + } +#endif skip_cluster: if ((total_err < best_cluster_err) || @@ -1154,14 +1540,18 @@ namespace basisu best_cluster_indices[block_index] = best_cluster_index; } // block_index - + +#ifndef __EMSCRIPTEN__ } ); +#endif } // block_index_iter - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif - std::vector<typename std::vector<uint32_t> > optimized_endpoint_clusters(m_endpoint_clusters.size()); + basisu::vector<typename basisu::vector<uint32_t> > optimized_endpoint_clusters(m_endpoint_clusters.size()); uint32_t total_subblocks_reassigned = 0; for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) @@ -1199,8 +1589,8 @@ namespace basisu indirect_sort((uint32_t)m_endpoint_clusters.size(), &sorted_endpoint_cluster_indices[0], &m_endpoint_cluster_etc_params[0]); - std::vector<std::vector<uint32_t> > new_endpoint_clusters(m_endpoint_clusters.size()); - std::vector<endpoint_cluster_etc_params> new_subblock_etc_params(m_endpoint_clusters.size()); + basisu::vector<basisu::vector<uint32_t> > new_endpoint_clusters(m_endpoint_clusters.size()); + basisu::vector<endpoint_cluster_etc_params> new_subblock_etc_params(m_endpoint_clusters.size()); for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) { @@ -1264,7 +1654,9 @@ namespace basisu const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N); +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { @@ -1288,12 +1680,16 @@ namespace basisu blk.determine_selectors(pSource_pixels, m_params.m_perceptual); } // block_index - + +#ifndef __EMSCRIPTEN__ } ); +#endif } // block_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif m_orig_encoded_blocks = m_encoded_blocks; } @@ -1302,9 +1698,9 @@ namespace basisu { uint_vec block_selector_cluster_indices(m_total_blocks); - for (int cluster_index = 0; cluster_index < static_cast<int>(m_selector_cluster_indices.size()); cluster_index++) + for (int cluster_index = 0; cluster_index < static_cast<int>(m_selector_cluster_block_indices.size()); cluster_index++) { - const std::vector<uint32_t>& cluster_indices = m_selector_cluster_indices[cluster_index]; + const basisu::vector<uint32_t>& cluster_indices = m_selector_cluster_block_indices[cluster_index]; for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { @@ -1317,7 +1713,7 @@ namespace basisu } // cluster_index m_selector_clusters_within_each_parent_cluster.resize(0); - m_selector_clusters_within_each_parent_cluster.resize(m_selector_parent_cluster_indices.size()); + m_selector_clusters_within_each_parent_cluster.resize(m_selector_parent_cluster_block_indices.size()); for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { @@ -1355,7 +1751,9 @@ namespace basisu const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N); +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { @@ -1382,47 +1780,54 @@ namespace basisu } // block_index +#ifndef __EMSCRIPTEN__ } ); +#endif } // block_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif vec16F_clusterizer selector_clusterizer; for (uint32_t i = 0; i < m_total_blocks; i++) selector_clusterizer.add_training_vec(training_vecs[i].first, training_vecs[i].second); - const uint32_t parent_codebook_size = (m_params.m_max_selector_clusters >= 256) ? BASISU_SELECTOR_PARENT_CODEBOOK_SIZE : 0; + const int selector_parent_codebook_size = (m_params.m_compression_level <= 1) ? BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 : BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT; + const uint32_t parent_codebook_size = (m_params.m_max_selector_clusters >= 256) ? selector_parent_codebook_size : 0; + debug_printf("Using selector parent codebook size %u\n", parent_codebook_size); uint32_t max_threads = 0; max_threads = m_params.m_multithreaded ? minimum<int>(std::thread::hardware_concurrency(), cMaxCodebookCreationThreads) : 0; bool status = generate_hierarchical_codebook_threaded(selector_clusterizer, m_params.m_max_selector_clusters, m_use_hierarchical_selector_codebooks ? parent_codebook_size : 0, - m_selector_cluster_indices, - m_selector_parent_cluster_indices, + m_selector_cluster_block_indices, + m_selector_parent_cluster_block_indices, max_threads, m_params.m_pJob_pool); BASISU_FRONTEND_VERIFY(status); if (m_use_hierarchical_selector_codebooks) { - if (!m_selector_parent_cluster_indices.size()) + if (!m_selector_parent_cluster_block_indices.size()) { - m_selector_parent_cluster_indices.resize(0); - m_selector_parent_cluster_indices.resize(1); + m_selector_parent_cluster_block_indices.resize(0); + m_selector_parent_cluster_block_indices.resize(1); for (uint32_t i = 0; i < m_total_blocks; i++) - m_selector_parent_cluster_indices[0].push_back(i); + m_selector_parent_cluster_block_indices[0].push_back(i); } - BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE <= UINT8_MAX); + BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 <= UINT8_MAX); + BASISU_ASSUME(BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT <= UINT8_MAX); m_block_parent_selector_cluster.resize(0); m_block_parent_selector_cluster.resize(m_total_blocks); vector_set_all(m_block_parent_selector_cluster, 0xFF); - for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_selector_parent_cluster_indices.size(); parent_cluster_index++) + for (uint32_t parent_cluster_index = 0; parent_cluster_index < m_selector_parent_cluster_block_indices.size(); parent_cluster_index++) { - const uint_vec &cluster = m_selector_parent_cluster_indices[parent_cluster_index]; + const uint_vec &cluster = m_selector_parent_cluster_block_indices[parent_cluster_index]; for (uint32_t j = 0; j < cluster.size(); j++) m_block_parent_selector_cluster[cluster[j]] = static_cast<uint8_t>(parent_cluster_index); } @@ -1432,9 +1837,9 @@ namespace basisu } // Ensure that all the blocks within each cluster are all in the same parent cluster, or something is very wrong. - for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_indices.size(); cluster_index++) + for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_block_indices.size(); cluster_index++) { - const uint_vec &cluster = m_selector_cluster_indices[cluster_index]; + const uint_vec &cluster = m_selector_cluster_block_indices[cluster_index]; uint32_t parent_cluster_index = 0; for (uint32_t j = 0; j < cluster.size(); j++) @@ -1452,14 +1857,16 @@ namespace basisu } } - debug_printf("Total selector clusters: %u, total parent selector clusters: %u\n", (uint32_t)m_selector_cluster_indices.size(), (uint32_t)m_selector_parent_cluster_indices.size()); + debug_printf("Total selector clusters: %u, total parent selector clusters: %u\n", (uint32_t)m_selector_cluster_block_indices.size(), (uint32_t)m_selector_parent_cluster_block_indices.size()); } void basisu_frontend::create_optimized_selector_codebook(uint32_t iter) { debug_printf("create_optimized_selector_codebook\n"); - const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_indices.size(); + const uint32_t total_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size(); + + debug_printf("Total selector clusters (from m_selector_cluster_block_indices.size()): %u\n", (uint32_t)m_selector_cluster_block_indices.size()); m_optimized_cluster_selectors.resize(total_selector_clusters); @@ -1474,12 +1881,14 @@ namespace basisu { const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum<uint32_t>((uint32_t)total_selector_clusters, cluster_index_iter + N); - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &total_clusters_processed, &total_selector_clusters] { +#endif for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { - const std::vector<uint32_t> &cluster_block_indices = m_selector_cluster_indices[cluster_index]; + const basisu::vector<uint32_t> &cluster_block_indices = m_selector_cluster_block_indices[cluster_index]; if (!cluster_block_indices.size()) continue; @@ -1528,11 +1937,15 @@ namespace basisu } // cluster_index +#ifndef __EMSCRIPTEN__ } ); +#endif } // cluster_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif } else { @@ -1552,12 +1965,14 @@ namespace basisu { const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum<uint32_t>((uint32_t)total_selector_clusters, cluster_index_iter + N); - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &uses_hybrid_sel_codebook, &total_clusters_processed, &total_selector_clusters] { +#endif for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { - const std::vector<uint32_t> &cluster_block_indices = m_selector_cluster_indices[cluster_index]; + const basisu::vector<uint32_t> &cluster_block_indices = m_selector_cluster_block_indices[cluster_index]; if (!cluster_block_indices.size()) continue; @@ -1667,29 +2082,33 @@ namespace basisu } // cluster_index +#ifndef __EMSCRIPTEN__ } ); +#endif } // cluster_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif } // if (m_params.m_pGlobal_sel_codebook) - + if (m_params.m_debug_images) { uint32_t max_selector_cluster_size = 0; - for (uint32_t i = 0; i < m_selector_cluster_indices.size(); i++) - max_selector_cluster_size = maximum<uint32_t>(max_selector_cluster_size, (uint32_t)m_selector_cluster_indices[i].size()); + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) + max_selector_cluster_size = maximum<uint32_t>(max_selector_cluster_size, (uint32_t)m_selector_cluster_block_indices[i].size()); if ((max_selector_cluster_size * 5) < 32768) { const uint32_t x_spacer_len = 16; - image selector_cluster_vis(x_spacer_len + max_selector_cluster_size * 5, (uint32_t)m_selector_cluster_indices.size() * 5); + image selector_cluster_vis(x_spacer_len + max_selector_cluster_size * 5, (uint32_t)m_selector_cluster_block_indices.size() * 5); - for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_indices.size(); selector_cluster_index++) + for (uint32_t selector_cluster_index = 0; selector_cluster_index < m_selector_cluster_block_indices.size(); selector_cluster_index++) { - const std::vector<uint32_t> &cluster_block_indices = m_selector_cluster_indices[selector_cluster_index]; + const basisu::vector<uint32_t> &cluster_block_indices = m_selector_cluster_block_indices[selector_cluster_index]; for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) @@ -1717,31 +2136,56 @@ namespace basisu void basisu_frontend::find_optimal_selector_clusters_for_each_block() { debug_printf("find_optimal_selector_clusters_for_each_block\n"); - + + // Sanity checks + BASISU_FRONTEND_VERIFY(m_selector_cluster_block_indices.size() == m_optimized_cluster_selectors.size()); + for (uint32_t i = 0; i < m_selector_clusters_within_each_parent_cluster.size(); i++) + { + for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++) + { + BASISU_FRONTEND_VERIFY(m_selector_clusters_within_each_parent_cluster[i][j] < m_optimized_cluster_selectors.size()); + } + } + m_block_selector_cluster_index.resize(m_total_blocks); - + if (m_params.m_compression_level == 0) { // Don't do anything, just leave the blocks in their original selector clusters. - for (uint32_t i = 0; i < m_selector_cluster_indices.size(); i++) + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) { - for (uint32_t j = 0; j < m_selector_cluster_indices[i].size(); j++) - m_block_selector_cluster_index[m_selector_cluster_indices[i][j]] = i; + for (uint32_t j = 0; j < m_selector_cluster_block_indices[i].size(); j++) + m_block_selector_cluster_index[m_selector_cluster_block_indices[i][j]] = i; } } else { - std::vector< std::vector<uint32_t> > new_cluster_indices; - + // Note that this method may leave some empty clusters (i.e. arrays with no block indices), including at the end. + basisu::vector< basisu::vector<uint32_t> > new_cluster_indices(m_optimized_cluster_selectors.size()); + // For each block: Determine which quantized selectors best encode that block, given its quantized endpoints. + basisu::vector<uint8_t> unpacked_optimized_cluster_selectors(16 * m_optimized_cluster_selectors.size()); + for (uint32_t cluster_index = 0; cluster_index < m_optimized_cluster_selectors.size(); cluster_index++) + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + unpacked_optimized_cluster_selectors[cluster_index * 16 + y * 4 + x] = (uint8_t)m_optimized_cluster_selectors[cluster_index].get_selector(x, y); + } + } + } + const uint32_t N = 1024; for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) { const uint32_t first_index = block_index_iter; const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N); - m_params.m_pJob_pool->add_job( [this, first_index, last_index, &new_cluster_indices] { +#ifndef __EMSCRIPTEN__ + m_params.m_pJob_pool->add_job( [this, first_index, last_index, &new_cluster_indices, &unpacked_optimized_cluster_selectors] { +#endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) { @@ -1752,20 +2196,32 @@ namespace basisu color_rgba trial_block_colors[4]; blk.get_block_colors(trial_block_colors, 0); - uint64_t best_cluster_err = UINT64_MAX; + // precompute errors for the i-th block pixel and selector sel: [sel][i] + uint32_t trial_errors[4][16]; + + for (int sel = 0; sel < 4; ++sel) + { + for (int i = 0; i < 16; ++i) + { + trial_errors[sel][i] = color_distance(m_params.m_perceptual, pBlock_pixels[i], trial_block_colors[sel], false); + } + } + + uint64_t best_cluster_err = INT64_MAX; uint32_t best_cluster_index = 0; const uint32_t parent_selector_cluster = m_block_parent_selector_cluster.size() ? m_block_parent_selector_cluster[block_index] : 0; const uint_vec *pCluster_indices = m_selector_clusters_within_each_parent_cluster.size() ? &m_selector_clusters_within_each_parent_cluster[parent_selector_cluster] : nullptr; - const uint32_t total_clusters = m_use_hierarchical_selector_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_selector_cluster_indices.size(); + const uint32_t total_clusters = m_use_hierarchical_selector_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_selector_cluster_block_indices.size(); +#if 0 for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++) { const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter; const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index]; - + uint64_t trial_err = 0; for (int y = 0; y < 4; y++) { @@ -1778,18 +2234,82 @@ namespace basisu goto early_out; } } - + if (trial_err < best_cluster_err) { best_cluster_err = trial_err; best_cluster_index = cluster_index; - if (!best_cluster_err) + if (!best_cluster_err) break; } early_out: ; } +#else + if (m_params.m_perceptual) + { + for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++) + { + const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter; + //const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index]; + + uint64_t trial_err = 0; + + for (int i = 0; i < 16; i++) + { + const uint32_t sel = unpacked_optimized_cluster_selectors[cluster_index * 16 + i]; + + trial_err += trial_errors[sel][i]; + if (trial_err > best_cluster_err) + goto early_out; + } + + if (trial_err < best_cluster_err) + { + best_cluster_err = trial_err; + best_cluster_index = cluster_index; + if (!best_cluster_err) + break; + } + + early_out: + ; + + } // cluster_iter + } + else + { + for (uint32_t cluster_iter = 0; cluster_iter < total_clusters; cluster_iter++) + { + const uint32_t cluster_index = m_use_hierarchical_selector_codebooks ? (*pCluster_indices)[cluster_iter] : cluster_iter; + //const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index]; + + uint64_t trial_err = 0; + + for (int i = 0; i < 16; i++) + { + const uint32_t sel = unpacked_optimized_cluster_selectors[cluster_index * 16 + i]; + + trial_err += trial_errors[sel][i]; + if (trial_err > best_cluster_err) + goto early_out2; + } + + if (trial_err < best_cluster_err) + { + best_cluster_err = trial_err; + best_cluster_index = cluster_index; + if (!best_cluster_err) + break; + } + + early_out2: + ; + + } // cluster_iter + } +#endif blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits()); @@ -1804,17 +2324,21 @@ namespace basisu } // block_index +#ifndef __EMSCRIPTEN__ } ); +#endif } // block_index_iter - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); - - m_selector_cluster_indices.swap(new_cluster_indices); +#endif + + m_selector_cluster_block_indices.swap(new_cluster_indices); } - for (uint32_t i = 0; i < m_selector_cluster_indices.size(); i++) - vector_sort(m_selector_cluster_indices[i]); + for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++) + vector_sort(m_selector_cluster_block_indices[i]); } // TODO: Remove old ETC1 specific stuff, and thread this. @@ -1842,7 +2366,7 @@ namespace basisu const uint_vec &subblocks = subblock_params.m_subblocks; //uint32_t total_pixels = subblock.m_subblocks.size() * 8; - std::vector<color_rgba> subblock_colors[2]; // [use_individual_mode] + basisu::vector<color_rgba> subblock_colors[2]; // [use_individual_mode] uint8_vec subblock_selectors[2]; uint64_t cur_subblock_err[2] = { 0, 0 }; @@ -1882,7 +2406,7 @@ namespace basisu clear_obj(cluster_optimizer_results); - std::vector<uint8_t> cluster_selectors[2]; + basisu::vector<uint8_t> cluster_selectors[2]; for (uint32_t use_individual_mode = 0; use_individual_mode < 2; use_individual_mode++) { @@ -1938,7 +2462,7 @@ namespace basisu const uint32_t block_index = training_vector_index >> 1; const uint32_t subblock_index = training_vector_index & 1; - const bool is_flipped = true; + //const bool is_flipped = true; etc_block &blk = m_encoded_blocks[block_index]; @@ -2002,7 +2526,7 @@ namespace basisu if (m_params.m_debug_stats) debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined); - + return total_subblocks_refined; } @@ -2012,8 +2536,8 @@ namespace basisu uint32_t max_endpoint_cluster_size = 0; - std::vector<uint32_t> cluster_sizes(m_endpoint_clusters.size()); - std::vector<uint32_t> sorted_cluster_indices(m_endpoint_clusters.size()); + basisu::vector<uint32_t> cluster_sizes(m_endpoint_clusters.size()); + basisu::vector<uint32_t> sorted_cluster_indices(m_endpoint_clusters.size()); for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) { max_endpoint_cluster_size = maximum<uint32_t>(max_endpoint_cluster_size, (uint32_t)m_endpoint_clusters[i].size()); @@ -2100,30 +2624,33 @@ namespace basisu { debug_printf("reoptimize_remapped_endpoints\n"); - std::vector<uint_vec> new_endpoint_cluster_block_indices(m_endpoint_clusters.size()); + basisu::vector<uint_vec> new_endpoint_cluster_block_indices(m_endpoint_clusters.size()); for (uint32_t i = 0; i < new_block_endpoints.size(); i++) new_endpoint_cluster_block_indices[new_block_endpoints[i]].push_back(i); - std::vector<uint8_t> cluster_valid(new_endpoint_cluster_block_indices.size()); - std::vector<uint8_t> cluster_improved(new_endpoint_cluster_block_indices.size()); + basisu::vector<uint8_t> cluster_valid(new_endpoint_cluster_block_indices.size()); + basisu::vector<uint8_t> cluster_improved(new_endpoint_cluster_block_indices.size()); const uint32_t N = 256; for (uint32_t cluster_index_iter = 0; cluster_index_iter < new_endpoint_cluster_block_indices.size(); cluster_index_iter += N) { const uint32_t first_index = cluster_index_iter; const uint32_t last_index = minimum<uint32_t>((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N); - + +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] { +#endif + for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { - const std::vector<uint32_t>& cluster_block_indices = new_endpoint_cluster_block_indices[cluster_index]; + const basisu::vector<uint32_t>& cluster_block_indices = new_endpoint_cluster_block_indices[cluster_index]; if (!cluster_block_indices.size()) continue; const uint32_t total_pixels = (uint32_t)cluster_block_indices.size() * 16; - std::vector<color_rgba> cluster_pixels(total_pixels); + basisu::vector<color_rgba> cluster_pixels(total_pixels); uint8_vec force_selectors(total_pixels); etc_block blk; @@ -2170,16 +2697,19 @@ namespace basisu if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) cluster_optimizer_params.m_quality = cETCQualityUber; + else + cluster_optimizer_params.m_quality = cETCQualitySlow; etc1_optimizer::results cluster_optimizer_results; - std::vector<uint8_t> cluster_selectors(total_pixels); + basisu::vector<uint8_t> cluster_selectors(total_pixels); cluster_optimizer_results.m_n = total_pixels; cluster_optimizer_results.m_pSelectors = &cluster_selectors[0]; optimizer.init(cluster_optimizer_params, cluster_optimizer_results); - optimizer.compute(); + if (!optimizer.compute()) + BASISU_FRONTEND_VERIFY(false); new_endpoint_cluster_etc_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled; new_endpoint_cluster_etc_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table; @@ -2198,11 +2728,16 @@ namespace basisu cluster_valid[cluster_index] = true; } // cluster_index + +#ifndef __EMSCRIPTEN__ } ); +#endif } // cluster_index_iter +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); +#endif uint32_t total_unused_clusters = 0; uint32_t total_improved_clusters = 0; @@ -2239,7 +2774,7 @@ namespace basisu debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 1\n"); - std::vector<uint_vec> new_endpoint_clusters(total_new_endpoint_clusters); + basisu::vector<uint_vec> new_endpoint_clusters(total_new_endpoint_clusters); for (uint32_t block_index = 0; block_index < new_block_endpoints.size(); block_index++) { diff --git a/thirdparty/basis_universal/basisu_frontend.h b/thirdparty/basis_universal/encoder/basisu_frontend.h index c3f5d23c71..4ff6d40466 100644 --- a/thirdparty/basis_universal/basisu_frontend.h +++ b/thirdparty/basis_universal/encoder/basisu_frontend.h @@ -1,5 +1,5 @@ // basisu_frontend.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,7 +17,8 @@ #include "basisu_etc.h" #include "basisu_gpu_texture.h" #include "basisu_global_selector_palette_helpers.h" -#include "transcoder/basisu_file_headers.h" +#include "../transcoder/basisu_file_headers.h" +#include "../transcoder/basisu_transcoder.h" namespace basisu { @@ -34,8 +35,8 @@ namespace basisu uint32_t &operator[] (uint32_t i) { assert(i < 2); return m_comps[i]; } }; - const uint32_t BASISU_DEFAULT_COMPRESSION_LEVEL = 1; - const uint32_t BASISU_MAX_COMPRESSION_LEVEL = 5; + const uint32_t BASISU_DEFAULT_COMPRESSION_LEVEL = 2; + const uint32_t BASISU_MAX_COMPRESSION_LEVEL = 6; class basisu_frontend { @@ -72,16 +73,19 @@ namespace basisu m_perceptual(true), m_debug_stats(false), m_debug_images(false), + m_dump_endpoint_clusterization(true), + m_validate(false), + m_multithreaded(false), + m_disable_hierarchical_endpoint_codebooks(false), m_pGlobal_sel_codebook(NULL), m_num_global_sel_codebook_pal_bits(0), m_num_global_sel_codebook_mod_bits(0), m_use_hybrid_selector_codebooks(false), m_hybrid_codebook_quality_thresh(0.0f), - m_validate(false), m_tex_type(basist::cBASISTexType2D), - m_multithreaded(false), - m_disable_hierarchical_endpoint_codebooks(false), + m_pGlobal_codebooks(nullptr), + m_pJob_pool(nullptr) { } @@ -108,6 +112,7 @@ namespace basisu bool m_use_hybrid_selector_codebooks; float m_hybrid_codebook_quality_thresh; basist::basis_texture_type m_tex_type; + const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; job_pool *m_pJob_pool; }; @@ -142,7 +147,7 @@ namespace basisu bool get_endpoint_cluster_color_is_used(uint32_t cluster_index, bool individual_mode) const { return m_endpoint_cluster_etc_params[cluster_index].m_color_used[individual_mode]; } // Selector clusters - uint32_t get_total_selector_clusters() const { return static_cast<uint32_t>(m_selector_cluster_indices.size()); } + uint32_t get_total_selector_clusters() const { return static_cast<uint32_t>(m_selector_cluster_block_indices.size()); } uint32_t get_block_selector_cluster_index(uint32_t block_index) const { return m_block_selector_cluster_index[block_index]; } const etc_block &get_selector_cluster_selector_bits(uint32_t cluster_index) const { return m_optimized_cluster_selectors[cluster_index]; } @@ -150,7 +155,7 @@ namespace basisu const bool_vec &get_selector_cluster_uses_global_cb_vec() const { return m_selector_cluster_uses_global_cb; } // Returns block indices using each selector cluster - const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_indices[selector_cluster_index]; } + const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_block_indices[selector_cluster_index]; } void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); @@ -188,16 +193,16 @@ namespace basisu // For each endpoint cluster: An array of which subblock indices (block_index*2+subblock) are located in that cluster. // Array of block indices for each endpoint cluster - std::vector<uint_vec> m_endpoint_clusters; + basisu::vector<uint_vec> m_endpoint_clusters; // Array of block indices for each parent endpoint cluster - std::vector<uint_vec> m_endpoint_parent_clusters; + basisu::vector<uint_vec> m_endpoint_parent_clusters; // Each block's parent cluster index uint8_vec m_block_parent_endpoint_cluster; // Array of endpoint cluster indices for each parent endpoint cluster - std::vector<uint_vec> m_endpoint_clusters_within_each_parent_cluster; + basisu::vector<uint_vec> m_endpoint_clusters_within_each_parent_cluster; struct endpoint_cluster_etc_params { @@ -267,35 +272,35 @@ namespace basisu } }; - typedef std::vector<endpoint_cluster_etc_params> cluster_subblock_etc_params_vec; + typedef basisu::vector<endpoint_cluster_etc_params> cluster_subblock_etc_params_vec; // Each endpoint cluster's ETC1S parameters cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; // The endpoint cluster index used by each ETC1 subblock. - std::vector<vec2U> m_block_endpoint_clusters_indices; + basisu::vector<vec2U> m_block_endpoint_clusters_indices; // The block(s) within each selector cluster // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! - std::vector<uint_vec> m_selector_cluster_indices; + basisu::vector<uint_vec> m_selector_cluster_block_indices; // The selector bits for each selector cluster. - std::vector<etc_block> m_optimized_cluster_selectors; + basisu::vector<etc_block> m_optimized_cluster_selectors; // The block(s) within each parent selector cluster. - std::vector<uint_vec> m_selector_parent_cluster_indices; + basisu::vector<uint_vec> m_selector_parent_cluster_block_indices; // Each block's parent selector cluster uint8_vec m_block_parent_selector_cluster; // Array of selector cluster indices for each parent selector cluster - std::vector<uint_vec> m_selector_clusters_within_each_parent_cluster; + basisu::vector<uint_vec> m_selector_clusters_within_each_parent_cluster; basist::etc1_global_selector_codebook_entry_id_vec m_optimized_cluster_selector_global_cb_ids; bool_vec m_selector_cluster_uses_global_cb; // Each block's selector cluster index - std::vector<uint32_t> m_block_selector_cluster_index; + basisu::vector<uint32_t> m_block_selector_cluster_index; struct subblock_endpoint_quant_err { @@ -321,13 +326,14 @@ namespace basisu }; // The sorted subblock endpoint quant error for each endpoint cluster - std::vector<subblock_endpoint_quant_err> m_subblock_endpoint_quant_err_vec; + basisu::vector<subblock_endpoint_quant_err> m_subblock_endpoint_quant_err_vec; std::mutex m_lock; //----------------------------------------------------------------------------- void init_etc1_images(); + bool init_global_codebooks(); void init_endpoint_training_vectors(); void dump_endpoint_clusterization_visualization(const char *pFilename, bool vis_endpoint_colors); void generate_endpoint_clusters(); diff --git a/thirdparty/basis_universal/basisu_global_selector_palette_helpers.cpp b/thirdparty/basis_universal/encoder/basisu_global_selector_palette_helpers.cpp index 102fc24980..102fc24980 100644 --- a/thirdparty/basis_universal/basisu_global_selector_palette_helpers.cpp +++ b/thirdparty/basis_universal/encoder/basisu_global_selector_palette_helpers.cpp diff --git a/thirdparty/basis_universal/basisu_global_selector_palette_helpers.h b/thirdparty/basis_universal/encoder/basisu_global_selector_palette_helpers.h index 32692c516b..7c35439df8 100644 --- a/thirdparty/basis_universal/basisu_global_selector_palette_helpers.h +++ b/thirdparty/basis_universal/encoder/basisu_global_selector_palette_helpers.h @@ -14,9 +14,9 @@ // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #include "basisu_etc.h" -#include "transcoder/basisu_global_selector_palette.h" +#include "../transcoder/basisu_global_selector_palette.h" namespace basisu { @@ -36,7 +36,7 @@ namespace basisu void clear() { clear_obj(*this); } }; - typedef std::vector<pixel_block> pixel_block_vec; + typedef basisu::vector<pixel_block> pixel_block_vec; uint64_t etc1_global_selector_codebook_find_best_entry(const basist::etc1_global_selector_codebook &codebook, uint32_t num_src_pixel_blocks, const pixel_block *pSrc_pixel_blocks, const etc_block *pBlock_endpoints, diff --git a/thirdparty/basis_universal/basisu_gpu_texture.cpp b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp index 117668c5e2..3f9fb67bdd 100644 --- a/thirdparty/basis_universal/basisu_gpu_texture.cpp +++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp @@ -1,5 +1,5 @@ // basisu_gpu_texture.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,38 +16,10 @@ #include "basisu_enc.h" #include "basisu_pvrtc1_4.h" #include "basisu_astc_decomp.h" +#include "basisu_bc7enc.h" namespace basisu { - const int8_t g_etc2_eac_tables[16][8] = - { - { -3, -6, -9, -15, 2, 5, 8, 14 }, { -3, -7, -10, -13, 2, 6, 9, 12 }, { -2, -5, -8, -13, 1, 4, 7, 12 }, { -2, -4, -6, -13, 1, 3, 5, 12 }, - { -3, -6, -8, -12, 2, 5, 7, 11 }, { -3, -7, -9, -11, 2, 6, 8, 10 }, { -4, -7, -8, -11, 3, 6, 7, 10 }, { -3, -5, -8, -11, 2, 4, 7, 10 }, - { -2, -6, -8, -10, 1, 5, 7, 9 }, { -2, -5, -8, -10, 1, 4, 7, 9 }, { -2, -4, -8, -10, 1, 3, 7, 9 }, { -2, -5, -7, -10, 1, 4, 6, 9 }, - { -3, -4, -7, -10, 2, 3, 6, 9 }, { -1, -2, -3, -10, 0, 1, 2, 9 }, { -4, -6, -8, -9, 3, 5, 7, 8 }, { -3, -5, -7, -9, 2, 4, 6, 8 } - }; - - struct eac_a8_block - { - uint16_t m_base : 8; - uint16_t m_table : 4; - uint16_t m_multiplier : 4; - - uint8_t m_selectors[6]; - - inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const - { - assert((x < 4) && (y < 4)); - return static_cast<uint32_t>((selector_bits >> (45 - (y + x * 4) * 3)) & 7); - } - - inline uint64_t get_selector_bits() const - { - uint64_t pixels = ((uint64_t)m_selectors[0] << 40) | ((uint64_t)m_selectors[1] << 32) | ((uint64_t)m_selectors[2] << 24) | ((uint64_t)m_selectors[3] << 16) | ((uint64_t)m_selectors[4] << 8) | m_selectors[5]; - return pixels; - } - }; - void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels) { static_assert(sizeof(eac_a8_block) == 8, "sizeof(eac_a8_block) == 8"); @@ -123,19 +95,18 @@ namespace basisu bc1_block::unpack_color(l, r0, g0, b0); bc1_block::unpack_color(h, r1, g1, b1); + c[0].set_noclamp_rgba(r0, g0, b0, 255); + c[1].set_noclamp_rgba(r1, g1, b1, 255); + bool used_punchthrough = false; if (l > h) { - c[0].set_noclamp_rgba(r0, g0, b0, 255); - c[1].set_noclamp_rgba(r1, g1, b1, 255); c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); } else { - c[0].set_noclamp_rgba(r0, g0, b0, 255); - c[1].set_noclamp_rgba(r1, g1, b1, 255); c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); c[3].set_noclamp_rgba(0, 0, 0, 0); used_punchthrough = true; @@ -165,6 +136,142 @@ namespace basisu return used_punchthrough; } + bool unpack_bc1_nv(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) + { + static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8"); + + const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits); + + const uint32_t l = pBlock->get_low_color(); + const uint32_t h = pBlock->get_high_color(); + + color_rgba c[4]; + + int r0 = (l >> 11) & 31; + int g0 = (l >> 5) & 63; + int b0 = l & 31; + int r1 = (h >> 11) & 31; + int g1 = (h >> 5) & 63; + int b1 = h & 31; + + c[0].b = (uint8_t)((3 * b0 * 22) / 8); + c[0].g = (uint8_t)((g0 << 2) | (g0 >> 4)); + c[0].r = (uint8_t)((3 * r0 * 22) / 8); + c[0].a = 0xFF; + + c[1].r = (uint8_t)((3 * r1 * 22) / 8); + c[1].g = (uint8_t)((g1 << 2) | (g1 >> 4)); + c[1].b = (uint8_t)((3 * b1 * 22) / 8); + c[1].a = 0xFF; + + int gdiff = c[1].g - c[0].g; + + bool used_punchthrough = false; + + if (l > h) + { + c[2].r = (uint8_t)(((2 * r0 + r1) * 22) / 8); + c[2].g = (uint8_t)(((256 * c[0].g + gdiff/4 + 128 + gdiff * 80) / 256)); + c[2].b = (uint8_t)(((2 * b0 + b1) * 22) / 8); + c[2].a = 0xFF; + + c[3].r = (uint8_t)(((2 * r1 + r0) * 22) / 8); + c[3].g = (uint8_t)((256 * c[1].g - gdiff/4 + 128 - gdiff * 80) / 256); + c[3].b = (uint8_t)(((2 * b1 + b0) * 22) / 8); + c[3].a = 0xFF; + } + else + { + c[2].r = (uint8_t)(((r0 + r1) * 33) / 8); + c[2].g = (uint8_t)((256 * c[0].g + gdiff/4 + 128 + gdiff * 128) / 256); + c[2].b = (uint8_t)(((b0 + b1) * 33) / 8); + c[2].a = 0xFF; + + c[3].set_noclamp_rgba(0, 0, 0, 0); + used_punchthrough = true; + } + + if (set_alpha) + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[3] = c[pBlock->get_selector(3, y)]; + } + } + else + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); + } + } + + return used_punchthrough; + } + + static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; } + static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; } + + bool unpack_bc1_amd(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha) + { + const bc1_block *pBlock = static_cast<const bc1_block *>(pBlock_bits); + + const uint32_t l = pBlock->get_low_color(); + const uint32_t h = pBlock->get_high_color(); + + color_rgba c[4]; + + uint32_t r0, g0, b0, r1, g1, b1; + bc1_block::unpack_color(l, r0, g0, b0); + bc1_block::unpack_color(h, r1, g1, b1); + + c[0].set_noclamp_rgba(r0, g0, b0, 255); + c[1].set_noclamp_rgba(r1, g1, b1, 255); + + bool used_punchthrough = false; + + if (l > h) + { + c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); + c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); + } + else + { + c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); + c[3].set_noclamp_rgba(0, 0, 0, 0); + used_punchthrough = true; + } + + if (set_alpha) + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[3] = c[pBlock->get_selector(3, y)]; + } + } + else + { + for (uint32_t y = 0; y < 4; y++, pPixels += 4) + { + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); + } + } + + return used_punchthrough; + } + struct bc4_block { enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 }; @@ -292,7 +399,7 @@ namespace basisu if (mode) { - c[1].set(std::max(0, c[0].r - (c[3].r >> 2)), std::max(0, c[0].g - (c[3].g >> 2)), std::max(0, c[0].b - (c[3].b >> 2)), 255); + c[1].set(basisu::maximum(0, c[0].r - (c[3].r >> 2)), basisu::maximum(0, c[0].g - (c[3].g >> 2)), basisu::maximum(0, c[0].b - (c[3].b >> 2)), 255); c[2] = c[0]; c[0].set(0, 0, 0, 255); } @@ -317,6 +424,191 @@ namespace basisu } } + // BC7 mode 0-7 decompression. + // Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. + + static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; } + static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; } + + static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - basist::g_bc7_weights2[w]) + h * basist::g_bc7_weights2[w] + 32) >> 6; } + static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - basist::g_bc7_weights3[w]) + h * basist::g_bc7_weights3[w] + 32) >> 6; } + static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - basist::g_bc7_weights4[w]) + h * basist::g_bc7_weights4[w] + 32) >> 6; } + static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits) + { + assert(l <= 255 && h <= 255); + switch (bits) + { + case 2: return bc7_interp2(l, h, w); + case 3: return bc7_interp3(l, h, w); + case 4: return bc7_interp4(l, h, w); + default: + break; + } + return 0; + } + + bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + //const uint32_t SUBSETS = 3; + const uint32_t ENDPOINTS = 6; + const uint32_t COMPS = 3; + const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; + const uint32_t PBITS = (mode == 0) ? 6 : 0; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[6]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_third_subset_1[part]) || (i == basist::g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); + + color_rgba block_colors[3][8]; + for (uint32_t s = 0; s < 3; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < 3; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = 255; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[basist::g_bc7_partition3[part * 16 + i]][weights[i]]; + + return true; + } + + bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + //const uint32_t SUBSETS = 2; + const uint32_t ENDPOINTS = 4; + const uint32_t COMPS = (mode == 7) ? 4 : 3; + const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); + const uint32_t PBITS = (mode == 1) ? 2 : 4; + const uint32_t SHARED_PBITS = (mode == 1) ? true : false; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[4]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); + + color_rgba block_colors[2][8]; + for (uint32_t s = 0; s < 2; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < COMPS; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[basist::g_bc7_partition2[part * 16 + i]][weights[i]]; + + return true; + } + + bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + { + const uint32_t ENDPOINTS = 2; + const uint32_t COMPS = 4; + const uint32_t WEIGHT_BITS = 2; + const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; + const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; + //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast<const uint8_t*>(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2); + const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0; + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; + + uint32_t weights[16], a_weights[16]; + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0)); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + color_rgba block_colors[8]; + for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) + for (uint32_t c = 0; c < 3; c++) + block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]); + + for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) + block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]); + + for (uint32_t i = 0; i < 16; i++) + { + pPixels[i] = block_colors[weights[i]]; + pPixels[i].a = block_colors[a_weights[i]].a; + if (comp_rot >= 1) + std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]); + } + + return true; + } + struct bc7_mode_6 { struct @@ -364,9 +656,6 @@ namespace basisu }; }; - static const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; - - // The transcoder only outputs mode 6 at the moment, so this is easy. bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) { static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16"); @@ -388,7 +677,7 @@ namespace basisu color_rgba vals[16]; for (uint32_t i = 0; i < 16; i++) { - const uint32_t w = g_bc7_weights4[i]; + const uint32_t w = basist::g_bc7_weights4[i]; const uint32_t iw = 64 - w; vals[i].set_noclamp_rgba( (r0 * iw + r1 * w + 32) >> 6, @@ -420,183 +709,37 @@ namespace basisu return true; } - static inline uint32_t get_block_bits(const uint8_t* pBytes, uint32_t bit_ofs, uint32_t bits_wanted) + bool unpack_bc7(const void *pBlock, color_rgba *pPixels) { - assert(bits_wanted < 32); + const uint32_t first_byte = static_cast<const uint8_t*>(pBlock)[0]; - uint32_t v = 0; - uint32_t total_bits = 0; - - while (total_bits < bits_wanted) + for (uint32_t mode = 0; mode <= 7; mode++) { - uint32_t k = pBytes[bit_ofs >> 3]; - k >>= (bit_ofs & 7); - uint32_t num_bits_in_byte = 8 - (bit_ofs & 7); - - v |= (k << total_bits); - total_bits += num_bits_in_byte; - bit_ofs += num_bits_in_byte; - } - - return v & ((1 << bits_wanted) - 1); - } - - struct bc7_mode_5 - { - union - { - struct + if (first_byte & (1U << mode)) { - uint64_t m_mode : 6; - uint64_t m_rot : 2; - - uint64_t m_r0 : 7; - uint64_t m_r1 : 7; - uint64_t m_g0 : 7; - uint64_t m_g1 : 7; - uint64_t m_b0 : 7; - uint64_t m_b1 : 7; - uint64_t m_a0 : 8; - uint64_t m_a1_0 : 6; - - } m_lo; - - uint64_t m_lo_bits; - }; - - union - { - struct - { - uint64_t m_a1_1 : 2; - - // bit 2 - uint64_t m_c00 : 1; - uint64_t m_c10 : 2; - uint64_t m_c20 : 2; - uint64_t m_c30 : 2; - - uint64_t m_c01 : 2; - uint64_t m_c11 : 2; - uint64_t m_c21 : 2; - uint64_t m_c31 : 2; - - uint64_t m_c02 : 2; - uint64_t m_c12 : 2; - uint64_t m_c22 : 2; - uint64_t m_c32 : 2; - - uint64_t m_c03 : 2; - uint64_t m_c13 : 2; - uint64_t m_c23 : 2; - uint64_t m_c33 : 2; - - // bit 33 - uint64_t m_a00 : 1; - uint64_t m_a10 : 2; - uint64_t m_a20 : 2; - uint64_t m_a30 : 2; - - uint64_t m_a01 : 2; - uint64_t m_a11 : 2; - uint64_t m_a21 : 2; - uint64_t m_a31 : 2; - - uint64_t m_a02 : 2; - uint64_t m_a12 : 2; - uint64_t m_a22 : 2; - uint64_t m_a32 : 2; - - uint64_t m_a03 : 2; - uint64_t m_a13 : 2; - uint64_t m_a23 : 2; - uint64_t m_a33 : 2; - - } m_hi; - - uint64_t m_hi_bits; - }; - - color_rgba get_low_color() const - { - return color_rgba(cNoClamp, - (int)((m_lo.m_r0 << 1) | (m_lo.m_r0 >> 6)), - (int)((m_lo.m_g0 << 1) | (m_lo.m_g0 >> 6)), - (int)((m_lo.m_b0 << 1) | (m_lo.m_b0 >> 6)), - m_lo.m_a0); - } - - color_rgba get_high_color() const - { - return color_rgba(cNoClamp, - (int)((m_lo.m_r1 << 1) | (m_lo.m_r1 >> 6)), - (int)((m_lo.m_g1 << 1) | (m_lo.m_g1 >> 6)), - (int)((m_lo.m_b1 << 1) | (m_lo.m_b1 >> 6)), - (int)m_lo.m_a1_0 | ((int)m_hi.m_a1_1 << 6)); - } - - void get_block_colors(color_rgba* pColors) const - { - const color_rgba low_color(get_low_color()); - const color_rgba high_color(get_high_color()); - - for (uint32_t i = 0; i < 4; i++) - { - static const uint32_t s_bc7_weights2[4] = { 0, 21, 43, 64 }; - - pColors[i].set_noclamp_rgba( - (low_color.r * (64 - s_bc7_weights2[i]) + high_color.r * s_bc7_weights2[i] + 32) >> 6, - (low_color.g * (64 - s_bc7_weights2[i]) + high_color.g * s_bc7_weights2[i] + 32) >> 6, - (low_color.b * (64 - s_bc7_weights2[i]) + high_color.b * s_bc7_weights2[i] + 32) >> 6, - (low_color.a * (64 - s_bc7_weights2[i]) + high_color.a * s_bc7_weights2[i] + 32) >> 6); + switch (mode) + { + case 0: + case 2: + return unpack_bc7_mode0_2(mode, pBlock, pPixels); + case 1: + case 3: + case 7: + return unpack_bc7_mode1_3_7(mode, pBlock, pPixels); + case 4: + case 5: + return unpack_bc7_mode4_5(mode, pBlock, pPixels); + case 6: + return unpack_bc7_mode6(pBlock, pPixels); + default: + break; + } } - } - - uint32_t get_selector(uint32_t idx, bool alpha) const - { - const uint32_t size = (idx == 0) ? 1 : 2; - - uint32_t ofs = alpha ? 97 : 66; - - if (idx) - ofs += 1 + 2 * (idx - 1); - - return get_block_bits(reinterpret_cast<const uint8_t*>(this), ofs, size); - } - }; - - bool unpack_bc7_mode5(const void* pBlock_bits, color_rgba* pPixels) - { - static_assert(sizeof(bc7_mode_5) == 16, "sizeof(bc7_mode_5) == 16"); - - const bc7_mode_5& block = *static_cast<const bc7_mode_5*>(pBlock_bits); - - if (block.m_lo.m_mode != (1 << 5)) - return false; - - color_rgba block_colors[4]; - block.get_block_colors(block_colors); - - const uint32_t rot = block.m_lo.m_rot; - - for (uint32_t i = 0; i < 16; i++) - { - const uint32_t cs = block.get_selector(i, false); - - color_rgba c(block_colors[cs]); - - const uint32_t as = block.get_selector(i, true); - c.a = block_colors[as].a; - - if (rot > 0) - std::swap(c[3], c[rot - 1]); - - pPixels[i] = c; } - return true; + return false; } - + struct fxt1_block { union @@ -903,13 +1046,14 @@ namespace basisu etc2_eac_r11 m_c[2]; }; - static void unpack_etc2_eac_r(const etc2_eac_r11* p, color_rgba* pPixels, uint32_t c) + void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c) { - const uint64_t sels = p->get_sels(); + const etc2_eac_r11* pBlock = static_cast<const etc2_eac_r11*>(p); + const uint64_t sels = pBlock->get_sels(); - const int base = (int)p->m_base * 8 + 4; - const int mul = p->m_mul ? ((int)p->m_mul * 8) : 1; - const int table = (int)p->m_table; + const int base = (int)pBlock->m_base * 8 + 4; + const int mul = pBlock->m_mul ? ((int)pBlock->m_mul * 8) : 1; + const int table = (int)pBlock->m_table; for (uint32_t y = 0; y < 4; y++) { @@ -923,7 +1067,8 @@ namespace basisu val = clamp<int>(val, 0, 2047); // Convert to 8-bits with rounding - pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1024) / 2047); + //pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1024) / 2047); + pPixels[x + y * 4].m_comps[c] = static_cast<uint8_t>((val * 255 + 1023) / 2047); } // x } // y @@ -939,6 +1084,11 @@ namespace basisu } } + void unpack_uastc(const void* p, color_rgba* pPixels) + { + basist::unpack_uastc(*static_cast<const basist::uastc_block*>(p), (basist::color32 *)pPixels, false); + } + // Unpacks to RGBA, R, RG, or A bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels) { @@ -949,6 +1099,16 @@ namespace basisu unpack_bc1(pBlock, pPixels, true); break; } + case texture_format::cBC1_NV: + { + unpack_bc1_nv(pBlock, pPixels, true); + break; + } + case texture_format::cBC1_AMD: + { + unpack_bc1_amd(pBlock, pPixels, true); + break; + } case texture_format::cBC3: { return unpack_bc3(pBlock, pPixels); @@ -966,14 +1126,7 @@ namespace basisu } case texture_format::cBC7: { - // We only support modes 5 and 6. - if (!unpack_bc7_mode5(pBlock, pPixels)) - { - if (!unpack_bc7_mode6(pBlock, pPixels)) - return false; - } - - break; + return unpack_bc7(pBlock, pPixels); } // Full ETC2 color blocks (planar/T/H modes) is currently unsupported in basisu, but we do support ETC2 with alpha (using ETC1 for color) case texture_format::cETC2_RGB: @@ -1032,6 +1185,11 @@ namespace basisu unpack_etc2_eac_rg(pBlock, pPixels); break; } + case texture_format::cUASTC4x4: + { + unpack_uastc(pBlock, pPixels); + break; + } default: { assert(0); @@ -1113,6 +1271,7 @@ namespace basisu KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02, KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0, KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0, + KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = 0x94CC, // TODO - Use proper value! KTX_ATC_RGB_AMD = 0x8C92, KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE, KTX_COMPRESSED_RGB_FXT1_3DFX = 0x86B0, @@ -1143,7 +1302,7 @@ namespace basisu }; // Input is a texture array of mipmapped gpu_image's: gpu_images[array_index][level_index] - bool create_ktx_texture_file(uint8_vec &ktx_data, const std::vector<gpu_image_vec>& gpu_images, bool cubemap_flag) + bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag) { if (!gpu_images.size()) { @@ -1220,6 +1379,8 @@ namespace basisu switch (fmt) { case texture_format::cBC1: + case texture_format::cBC1_NV: + case texture_format::cBC1_AMD: { internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT; break; @@ -1305,6 +1466,12 @@ namespace basisu base_internal_fmt = KTX_RG; break; } + case texture_format::cUASTC4x4: + { + internal_fmt = KTX_COMPRESSED_RGBA_UASTC_4x4_KHR; + base_internal_fmt = KTX_RGBA; + break; + } case texture_format::cFXT1_RGB: { internal_fmt = KTX_COMPRESSED_RGB_FXT1_3DFX; @@ -1378,7 +1545,7 @@ namespace basisu return true; } - bool write_compressed_texture_file(const char* pFilename, const std::vector<gpu_image_vec>& g, bool cubemap_flag) + bool write_compressed_texture_file(const char* pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag) { std::string extension(string_tolower(string_get_extension(pFilename))); @@ -1410,12 +1577,12 @@ namespace basisu bool write_compressed_texture_file(const char* pFilename, const gpu_image& g) { - std::vector<gpu_image_vec> v; + basisu::vector<gpu_image_vec> v; enlarge_vector(v, 1)->push_back(g); return write_compressed_texture_file(pFilename, v, false); } - const uint32_t OUT_FILE_MAGIC = 'TEXC'; + //const uint32_t OUT_FILE_MAGIC = 'TEXC'; struct out_file_header { packed_uint<4> m_magic; @@ -1428,7 +1595,11 @@ namespace basisu bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi) { out_file_header hdr; - hdr.m_magic = OUT_FILE_MAGIC; + //hdr.m_magic = OUT_FILE_MAGIC; + hdr.m_magic.m_bytes[0] = 67; + hdr.m_magic.m_bytes[1] = 88; + hdr.m_magic.m_bytes[2] = 69; + hdr.m_magic.m_bytes[3] = 84; hdr.m_pad = 0; hdr.m_width = gi.get_blocks_x() * 8; hdr.m_height = gi.get_blocks_y() * 4; diff --git a/thirdparty/basis_universal/basisu_gpu_texture.h b/thirdparty/basis_universal/encoder/basisu_gpu_texture.h index 8a49757ca7..619926f5f9 100644 --- a/thirdparty/basis_universal/basisu_gpu_texture.h +++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.h @@ -1,5 +1,5 @@ // basisu_gpu_texture.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,13 +13,12 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #include "basisu_etc.h" namespace basisu { - // GPU texture image - + // GPU texture "image" class gpu_image { public: @@ -115,17 +114,17 @@ namespace basisu uint64_vec m_blocks; }; - typedef std::vector<gpu_image> gpu_image_vec; + typedef basisu::vector<gpu_image> gpu_image_vec; // KTX file writing - bool create_ktx_texture_file(uint8_vec &ktx_data, const std::vector<gpu_image_vec>& gpu_images, bool cubemap_flag); + bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag); - bool write_compressed_texture_file(const char *pFilename, const std::vector<gpu_image_vec>& g, bool cubemap_flag); + bool write_compressed_texture_file(const char *pFilename, const basisu::vector<gpu_image_vec>& g, bool cubemap_flag); inline bool write_compressed_texture_file(const char *pFilename, const gpu_image_vec &g) { - std::vector<gpu_image_vec> a; + basisu::vector<gpu_image_vec> a; a.push_back(g); return write_compressed_texture_file(pFilename, a, false); } @@ -133,22 +132,23 @@ namespace basisu bool write_compressed_texture_file(const char *pFilename, const gpu_image &g); bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi); - // GPU texture block unpacking + // GPU texture block unpacking void unpack_etc2_eac(const void *pBlock_bits, color_rgba *pPixels); bool unpack_bc1(const void *pBlock_bits, color_rgba *pPixels, bool set_alpha); void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride); bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels); void unpack_bc5(const void *pBlock_bits, color_rgba *pPixels); bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels); - bool unpack_bc7_mode5(const void* pBlock_bits, color_rgba* pPixels); + bool unpack_bc7(const void* pBlock_bits, color_rgba* pPixels); void unpack_atc(const void* pBlock_bits, color_rgba* pPixels); bool unpack_fxt1(const void* p, color_rgba* pPixels); bool unpack_pvrtc2(const void* p, color_rgba* pPixels); + void unpack_etc2_eac_r(const void *p, color_rgba* pPixels, uint32_t c); void unpack_etc2_eac_rg(const void* p, color_rgba* pPixels); - // unpack_block() is only capable of unpacking texture data created by the transcoder. - // For some texture formats (like BC7, or ETC2) it's not a complete implementation. + // unpack_block() is primarily intended to unpack texture data created by the transcoder. + // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not a complete implementation. bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels); } // namespace basisu diff --git a/thirdparty/basis_universal/encoder/basisu_kernels_declares.h b/thirdparty/basis_universal/encoder/basisu_kernels_declares.h new file mode 100644 index 0000000000..e24bdd7978 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_kernels_declares.h @@ -0,0 +1,25 @@ +// basisu_kernels_declares.h +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if BASISU_SUPPORT_SSE +void CPPSPMD_NAME(perceptual_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); +void CPPSPMD_NAME(linear_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); + +void CPPSPMD_NAME(find_selectors_perceptual_rgb_4_N)(int64_t* pDistance, uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); +void CPPSPMD_NAME(find_selectors_linear_rgb_4_N)(int64_t* pDistance, uint8_t* pSelectors, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err); + +void CPPSPMD_NAME(find_lowest_error_perceptual_rgb_4_N)(int64_t* pDistance, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error); +void CPPSPMD_NAME(find_lowest_error_linear_rgb_4_N)(int64_t* pDistance, const basisu::color_rgba* pBlock_colors, const basisu::color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error); +#endif diff --git a/thirdparty/basis_universal/encoder/basisu_kernels_imp.h b/thirdparty/basis_universal/encoder/basisu_kernels_imp.h new file mode 100644 index 0000000000..046880517b --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_kernels_imp.h @@ -0,0 +1,584 @@ +// basisu_kernels_imp.h - Do not directly include +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using namespace CPPSPMD; + +namespace CPPSPMD_NAME(basisu_kernels_namespace) +{ + struct perceptual_distance_rgb_4_N : spmd_kernel + { + void _call(int64_t* pDistance, + const uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + __m128i block_colors[4]; + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + block_colors[i] = load_rgba32(&pBlock_colors[i]); + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + int s0 = pSelectors[i], s1 = pSelectors[i + 1], s2 = pSelectors[i + 2], s3 = pSelectors[i + 3]; + + vint base_r, base_g, base_b, base_a; + if ((s0 == s1) && (s0 == s2) && (s0 == s3)) + { + store_all(base_r, block_colors_r[s0]); + store_all(base_g, block_colors_g[s0]); + store_all(base_b, block_colors_b[s0]); + } + else + { + __m128i k0 = block_colors[s0], k1 = block_colors[s1], k2 = block_colors[s2], k3 = block_colors[s3]; + transpose4x4(base_r.m_value, base_g.m_value, base_b.m_value, base_a.m_value, k0, k1, k2, k3); + } + + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint delta_l = dr * 27 + dg * 92 + db * 9; + vint delta_cr = dr * 128 - delta_l; + vint delta_cb = db * 128 - delta_l; + + vint id = ((delta_l * delta_l) >> 7) + + ((((delta_cr * delta_cr) >> 7) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 7) * 3) >> 7); + + *pDistance += reduce_add(id); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int sel = pSelectors[i]; + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int delta_l = dr * 27 + dg * 92 + db * 9; + int delta_cr = dr * 128 - delta_l; + int delta_cb = db * 128 - delta_l; + + int id = ((delta_l * delta_l) >> 7) + + ((((delta_cr * delta_cr) >> 7) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 7) * 3) >> 7); + + *pDistance += id; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct linear_distance_rgb_4_N : spmd_kernel + { + void _call(int64_t* pDistance, + const uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + __m128i block_colors[4]; + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + block_colors[i] = load_rgba32(&pBlock_colors[i]); + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + int s0 = pSelectors[i], s1 = pSelectors[i + 1], s2 = pSelectors[i + 2], s3 = pSelectors[i + 3]; + + vint base_r, base_g, base_b, base_a; + if ((s0 == s1) && (s0 == s2) && (s0 == s3)) + { + store_all(base_r, block_colors_r[s0]); + store_all(base_g, block_colors_g[s0]); + store_all(base_b, block_colors_b[s0]); + } + else + { + __m128i k0 = block_colors[s0], k1 = block_colors[s1], k2 = block_colors[s2], k3 = block_colors[s3]; + transpose4x4(base_r.m_value, base_g.m_value, base_b.m_value, base_a.m_value, k0, k1, k2, k3); + } + + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint id = dr * dr + dg * dg + db * db; + + *pDistance += reduce_add(id); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int sel = pSelectors[i]; + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int id = dr * dr + dg * dg + db * db; + + *pDistance += id; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct find_selectors_perceptual_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint delta_l = dr * 27 + dg * 92 + db * 9; + vint delta_cr = dr * 128 - delta_l; + vint delta_cb = db * 128 - delta_l; + + vint id = VINT_SHIFT_RIGHT(delta_l * delta_l, 7) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cr * delta_cr, 7) * 26, 7) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cb * delta_cb, 7) * 3, 7); + + return id; + } + + void _call(int64_t* pDistance, + uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + const __m128i shuf = _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 12, 8, 4, 0); + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + vint sels = spmd_ternaryi(min_dist == dist0, 0, spmd_ternaryi(min_dist == dist1, 1, spmd_ternaryi(min_dist == dist2, 2, 3))); + + __m128i vsels = shuffle_epi8(sels.m_value, shuf); + storeu_si32((void *)(pSelectors + i), vsels); + + *pDistance += reduce_add(min_dist); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX, best_sel = 0; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int delta_l = dr * 27 + dg * 92 + db * 9; + int delta_cr = dr * 128 - delta_l; + int delta_cb = db * 128 - delta_l; + + int id = ((delta_l * delta_l) >> 7) + + ((((delta_cr * delta_cr) >> 7) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 7) * 3) >> 7); + if (id < best_err) + { + best_err = id; + best_sel = sel; + } + } + + pSelectors[i] = (uint8_t)best_sel; + + *pDistance += best_err; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct find_selectors_linear_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint id = dr * dr + dg * dg + db * db; + return id; + } + + void _call(int64_t* pDistance, + uint8_t* pSelectors, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_err) + { + assert(early_out_err >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + const __m128i shuf = _mm_set_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, 12, 8, 4, 0); + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + vint sels = spmd_ternaryi(min_dist == dist0, 0, spmd_ternaryi(min_dist == dist1, 1, spmd_ternaryi(min_dist == dist2, 2, 3))); + + __m128i vsels = shuffle_epi8(sels.m_value, shuf); + storeu_si32((void *)(pSelectors + i), vsels); + + *pDistance += reduce_add(min_dist); + if (*pDistance >= early_out_err) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX, best_sel = 0; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int id = dr * dr + dg * dg + db * db; + if (id < best_err) + { + best_err = id; + best_sel = sel; + } + } + + pSelectors[i] = (uint8_t)best_sel; + + *pDistance += best_err; + if (*pDistance >= early_out_err) + return; + } + } + }; + + struct find_lowest_error_perceptual_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint delta_l = dr * 27 + dg * 92 + db * 9; + vint delta_cr = dr * 128 - delta_l; + vint delta_cb = db * 128 - delta_l; + + vint id = VINT_SHIFT_RIGHT(delta_l * delta_l, 7) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cr * delta_cr, 7) * 26, 7) + + VINT_SHIFT_RIGHT(VINT_SHIFT_RIGHT(delta_cb * delta_cb, 7) * 3, 7); + + return id; + } + + void _call(int64_t* pDistance, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_error) + { + assert(early_out_error >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + *pDistance += reduce_add(min_dist); + if (*pDistance > early_out_error) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int delta_l = dr * 27 + dg * 92 + db * 9; + int delta_cr = dr * 128 - delta_l; + int delta_cb = db * 128 - delta_l; + + int id = ((delta_l * delta_l) >> 7) + + ((((delta_cr * delta_cr) >> 7) * 26) >> 7) + + ((((delta_cb * delta_cb) >> 7) * 3) >> 7); + + if (id < best_err) + { + best_err = id; + } + } + + *pDistance += best_err; + if (*pDistance > early_out_error) + return; + } + } + }; + + struct find_lowest_error_linear_rgb_4_N : spmd_kernel + { + inline vint compute_dist( + const vint& base_r, const vint& base_g, const vint& base_b, + const vint& r, const vint& g, const vint& b) + { + vint dr = base_r - r; + vint dg = base_g - g; + vint db = base_b - b; + + vint id = dr * dr + dg * dg + db * db; + + return id; + } + + void _call(int64_t* pDistance, + const color_rgba* pBlock_colors, + const color_rgba* pSrc_pixels, uint32_t n, + int64_t early_out_error) + { + assert(early_out_error >= 0); + + *pDistance = 0; + + vint block_colors_r[4], block_colors_g[4], block_colors_b[4]; + for (uint32_t i = 0; i < 4; i++) + { + store_all(block_colors_r[i], (int)pBlock_colors[i].r); + store_all(block_colors_g[i], (int)pBlock_colors[i].g); + store_all(block_colors_b[i], (int)pBlock_colors[i].b); + } + + uint32_t i; + + for (i = 0; (i + 4) <= n; i += 4) + { + __m128i c0 = load_rgba32(&pSrc_pixels[i + 0]), c1 = load_rgba32(&pSrc_pixels[i + 1]), c2 = load_rgba32(&pSrc_pixels[i + 2]), c3 = load_rgba32(&pSrc_pixels[i + 3]); + + vint r, g, b, a; + transpose4x4(r.m_value, g.m_value, b.m_value, a.m_value, c0, c1, c2, c3); + + vint dist0 = compute_dist(block_colors_r[0], block_colors_g[0], block_colors_b[0], r, g, b); + vint dist1 = compute_dist(block_colors_r[1], block_colors_g[1], block_colors_b[1], r, g, b); + vint dist2 = compute_dist(block_colors_r[2], block_colors_g[2], block_colors_b[2], r, g, b); + vint dist3 = compute_dist(block_colors_r[3], block_colors_g[3], block_colors_b[3], r, g, b); + + vint min_dist = min(min(min(dist0, dist1), dist2), dist3); + + *pDistance += reduce_add(min_dist); + if (*pDistance > early_out_error) + return; + } + + for (; i < n; i++) + { + int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + int best_err = INT_MAX; + for (int sel = 0; sel < 4; sel++) + { + int base_r = pBlock_colors[sel].r, base_g = pBlock_colors[sel].g, base_b = pBlock_colors[sel].b; + + int dr = base_r - r; + int dg = base_g - g; + int db = base_b - b; + + int id = dr * dr + dg * dg + db * db; + + if (id < best_err) + { + best_err = id; + } + } + + *pDistance += best_err; + if (*pDistance > early_out_error) + return; + } + } + }; + +} // namespace + +using namespace CPPSPMD_NAME(basisu_kernels_namespace); + +void CPPSPMD_NAME(perceptual_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< perceptual_distance_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(linear_distance_rgb_4_N)(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< linear_distance_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(find_selectors_perceptual_rgb_4_N)(int64_t *pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< find_selectors_perceptual_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(find_selectors_linear_rgb_4_N)(int64_t* pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) +{ + spmd_call< find_selectors_linear_rgb_4_N >(pDistance, pSelectors, pBlock_colors, pSrc_pixels, n, early_out_err); +} + +void CPPSPMD_NAME(find_lowest_error_perceptual_rgb_4_N)(int64_t* pDistance, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error) +{ + spmd_call< find_lowest_error_perceptual_rgb_4_N >(pDistance, pBlock_colors, pSrc_pixels, n, early_out_error); +} + +void CPPSPMD_NAME(find_lowest_error_linear_rgb_4_N)(int64_t* pDistance, const color_rgba* pBlock_colors, const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error) +{ + spmd_call< find_lowest_error_linear_rgb_4_N >(pDistance, pBlock_colors, pSrc_pixels, n, early_out_error); +} + diff --git a/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp b/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp new file mode 100644 index 0000000000..12d2321f20 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_kernels_sse.cpp @@ -0,0 +1,161 @@ +// basisu_kernels_sse.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_enc.h" + +#if BASISU_SUPPORT_SSE + +#define CPPSPMD_SSE2 0 + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#if !defined(_MSC_VER) + #if __AVX__ || __AVX2__ || __AVX512F__ + #error Please check your compiler options + #endif + + #if CPPSPMD_SSE2 + #if __SSE4_1__ || __SSE3__ || __SSE4_2__ || __SSSE3__ + #error SSE4.1/SSE3/SSE4.2/SSSE3 cannot be enabled to use this file + #endif + #else + #if !__SSE4_1__ || !__SSE3__ || __SSE4_2__ || !__SSSE3__ + #error Please check your compiler options + #endif + #endif +#endif + +#include "cppspmd_sse.h" + +#include "cppspmd_type_aliases.h" + +using namespace basisu; + +#include "basisu_kernels_declares.h" +#include "basisu_kernels_imp.h" + +namespace basisu +{ + +struct cpu_info +{ + cpu_info() { memset(this, 0, sizeof(*this)); } + + bool m_has_fpu; + bool m_has_mmx; + bool m_has_sse; + bool m_has_sse2; + bool m_has_sse3; + bool m_has_ssse3; + bool m_has_sse41; + bool m_has_sse42; + bool m_has_avx; + bool m_has_avx2; + bool m_has_pclmulqdq; +}; + +static void extract_x86_flags(cpu_info &info, uint32_t ecx, uint32_t edx) +{ + info.m_has_fpu = (edx & (1 << 0)) != 0; + info.m_has_mmx = (edx & (1 << 23)) != 0; + info.m_has_sse = (edx & (1 << 25)) != 0; + info.m_has_sse2 = (edx & (1 << 26)) != 0; + info.m_has_sse3 = (ecx & (1 << 0)) != 0; + info.m_has_ssse3 = (ecx & (1 << 9)) != 0; + info.m_has_sse41 = (ecx & (1 << 19)) != 0; + info.m_has_sse42 = (ecx & (1 << 20)) != 0; + info.m_has_pclmulqdq = (ecx & (1 << 1)) != 0; + info.m_has_avx = (ecx & (1 << 28)) != 0; +} + +static void extract_x86_extended_flags(cpu_info &info, uint32_t ebx) +{ + info.m_has_avx2 = (ebx & (1 << 5)) != 0; +} + +#ifndef _MSC_VER +static void do_cpuid(uint32_t eax, uint32_t ecx, uint32_t* regs) +{ + uint32_t ebx = 0, edx = 0; + +#if defined(__PIC__) && defined(__i386__) + __asm__("movl %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=D"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); +#else + __asm__("cpuid;" : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx)); +#endif + + regs[0] = eax; regs[1] = ebx; regs[2] = ecx; regs[3] = edx; +} +#endif + +static void get_cpuinfo(cpu_info &info) +{ + int regs[4]; + +#ifdef _MSC_VER + __cpuid(regs, 0); +#else + do_cpuid(0, 0, (uint32_t *)regs); +#endif + + const uint32_t max_eax = regs[0]; + + if (max_eax >= 1U) + { +#ifdef _MSC_VER + __cpuid(regs, 1); +#else + do_cpuid(1, 0, (uint32_t*)regs); +#endif + extract_x86_flags(info, regs[2], regs[3]); + } + + if (max_eax >= 7U) + { +#ifdef _MSC_VER + __cpuidex(regs, 7, 0); +#else + do_cpuid(7, 0, (uint32_t*)regs); +#endif + + extract_x86_extended_flags(info, regs[1]); + } +} + +void detect_sse41() +{ + cpu_info info; + get_cpuinfo(info); + + // Check for everything from SSE to SSE 4.1 + g_cpu_supports_sse41 = info.m_has_sse && info.m_has_sse2 && info.m_has_sse3 && info.m_has_ssse3 && info.m_has_sse41; +} + +} // namespace basisu +#else // #if BASISU_SUPPORT_SSE +namespace basisu +{ + +void detect_sse41() +{ +} + +} // namespace basisu +#endif // #if BASISU_SUPPORT_SSE + diff --git a/thirdparty/basis_universal/encoder/basisu_miniz.h b/thirdparty/basis_universal/encoder/basisu_miniz.h new file mode 100644 index 0000000000..8627abe893 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_miniz.h @@ -0,0 +1,2514 @@ +/* miniz.c v1.15 - deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ + + Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef MINIZ_HEADER_INCLUDED +#define MINIZ_HEADER_INCLUDED + +#include <stdlib.h> + +// Defines to completely disable specific portions of miniz.c: +// If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. + +// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. +//#define MINIZ_NO_STDIO + +// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or +// get/set file times, and the C run-time funcs that get/set times won't be called. +// The current downside is the times written to your archives will be from 1979. +//#define MINIZ_NO_TIME + +// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_APIS + +// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive API's. +//#define MINIZ_NO_ARCHIVE_WRITING_APIS + +// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. +//#define MINIZ_NO_ZLIB_APIS + +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. +//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. +// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc +// callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user +// functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. +//#define MINIZ_NO_MALLOC + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) + // TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux + #define MINIZ_NO_TIME +#endif + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) + #include <time.h> +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. +#define MINIZ_X86_OR_X64_CPU 1 +#endif + +#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. +#define MINIZ_LITTLE_ENDIAN 1 +#endif + +#if MINIZ_X86_OR_X64_CPU +// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) +// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). +#define MINIZ_HAS_64BIT_REGISTERS 1 +#endif + +namespace buminiz { + +// ------------------- zlib-style API Definitions. + +// For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! +typedef unsigned long mz_ulong; + +// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +// Compression strategies. +enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; + +// Method +#define MZ_DEFLATED 8 + +#ifndef MINIZ_NO_ZLIB_APIS + +// Heap allocation callbacks. +// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); + +#define MZ_VERSION "9.1.15" +#define MZ_VERNUM 0x91F0 +#define MZ_VER_MAJOR 9 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 15 +#define MZ_VER_SUBREVISION 0 + +// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). +enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; + +// Return status codes. MZ_PARAM_ERROR is non-standard. +enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; + +// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. +enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 }; + +// Window bits +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +// Compression/decompression stream struct. +typedef struct mz_stream_s +{ + const unsigned char *next_in; // pointer to next byte to read + unsigned int avail_in; // number of bytes available at next_in + mz_ulong total_in; // total number of bytes consumed so far + + unsigned char *next_out; // pointer to next byte to write + unsigned int avail_out; // number of bytes that can be written to next_out + mz_ulong total_out; // total number of bytes produced so far + + char *msg; // error msg (unused) + struct mz_internal_state *state; // internal state, allocated by zalloc/zfree + + mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc) + mz_free_func zfree; // optional heap free function (defaults to free) + void *opaque; // heap alloc function user pointer + + int data_type; // data_type (unused) + mz_ulong adler; // adler32 of the source or uncompressed data + mz_ulong reserved; // not used +} mz_stream; + +typedef mz_stream *mz_streamp; + +// Returns the version string of miniz.c. +const char *mz_version(void); + +// mz_deflateInit() initializes a compressor with default options: +// Parameters: +// pStream must point to an initialized mz_stream struct. +// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. +// level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. +// (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if the input parameters are bogus. +// MZ_MEM_ERROR on out of memory. +int mz_deflateInit(mz_streamp pStream, int level); + +// mz_deflateInit2() is like mz_deflate(), except with more control: +// Additional parameters: +// method must be MZ_DEFLATED +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) +// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + +// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). +int mz_deflateReset(mz_streamp pStream); + +// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. +// Return values: +// MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). +// MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) +int mz_deflate(mz_streamp pStream, int flush); + +// mz_deflateEnd() deinitializes a compressor: +// Return values: +// MZ_OK on success. +// MZ_STREAM_ERROR if the stream is bogus. +int mz_deflateEnd(mz_streamp pStream); + +// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +// Single-call compression functions mz_compress() and mz_compress2(): +// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); + +// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). +mz_ulong mz_compressBound(mz_ulong source_len); + +// Initializes a decompressor. +int mz_inflateInit(mz_streamp pStream); + +// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: +// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. +// Parameters: +// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. +// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. +// On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). +// MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. +// Return values: +// MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. +// MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. +// MZ_STREAM_ERROR if the stream is bogus. +// MZ_DATA_ERROR if the deflate stream is invalid. +// MZ_PARAM_ERROR if one of the parameters is invalid. +// MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again +// with more input data, or with more room in the output buffer (except when using single call decompression, described above). +int mz_inflate(mz_streamp pStream, int flush); + +// Deinitializes a decompressor. +int mz_inflateEnd(mz_streamp pStream); + +// Single-call decompression. +// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + +// Returns a string description of the specified error code, or NULL if the error code is invalid. +const char *mz_error(int err); + +// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. +// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + typedef unsigned char Byte; + typedef unsigned int uInt; + typedef mz_ulong uLong; + typedef Byte Bytef; + typedef uInt uIntf; + typedef char charf; + typedef int intf; + typedef void *voidpf; + typedef uLong uLongf; + typedef void *voidp; + typedef void *const voidpc; + #define Z_NULL 0 + #define Z_NO_FLUSH MZ_NO_FLUSH + #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH + #define Z_SYNC_FLUSH MZ_SYNC_FLUSH + #define Z_FULL_FLUSH MZ_FULL_FLUSH + #define Z_FINISH MZ_FINISH + #define Z_BLOCK MZ_BLOCK + #define Z_OK MZ_OK + #define Z_STREAM_END MZ_STREAM_END + #define Z_NEED_DICT MZ_NEED_DICT + #define Z_ERRNO MZ_ERRNO + #define Z_STREAM_ERROR MZ_STREAM_ERROR + #define Z_DATA_ERROR MZ_DATA_ERROR + #define Z_MEM_ERROR MZ_MEM_ERROR + #define Z_BUF_ERROR MZ_BUF_ERROR + #define Z_VERSION_ERROR MZ_VERSION_ERROR + #define Z_PARAM_ERROR MZ_PARAM_ERROR + #define Z_NO_COMPRESSION MZ_NO_COMPRESSION + #define Z_BEST_SPEED MZ_BEST_SPEED + #define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION + #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION + #define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY + #define Z_FILTERED MZ_FILTERED + #define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY + #define Z_RLE MZ_RLE + #define Z_FIXED MZ_FIXED + #define Z_DEFLATED MZ_DEFLATED + #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS + #define alloc_func mz_alloc_func + #define free_func mz_free_func + #define internal_state mz_internal_state + #define z_stream mz_stream + #define deflateInit mz_deflateInit + #define deflateInit2 mz_deflateInit2 + #define deflateReset mz_deflateReset + #define deflate mz_deflate + #define deflateEnd mz_deflateEnd + #define deflateBound mz_deflateBound + #define compress mz_compress + #define compress2 mz_compress2 + #define compressBound mz_compressBound + #define inflateInit mz_inflateInit + #define inflateInit2 mz_inflateInit2 + #define inflate mz_inflate + #define inflateEnd mz_inflateEnd + #define uncompress mz_uncompress + #define crc32 mz_crc32 + #define adler32 mz_adler32 + #define MAX_WBITS 15 + #define MAX_MEM_LEVEL 9 + #define zError mz_error + #define ZLIB_VERSION MZ_VERSION + #define ZLIB_VERNUM MZ_VERNUM + #define ZLIB_VER_MAJOR MZ_VER_MAJOR + #define ZLIB_VER_MINOR MZ_VER_MINOR + #define ZLIB_VER_REVISION MZ_VER_REVISION + #define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION + #define zlibVersion mz_version + #define zlib_version mz_version() +#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +#endif // MINIZ_NO_ZLIB_APIS + +// ------------------- Types and macros + +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef long long mz_int64; +typedef unsigned long long mz_uint64; +typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + +// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message. +#ifdef _MSC_VER + #define MZ_MACRO_END while (0, 0) +#else + #define MZ_MACRO_END while (0) +#endif + +// ------------------- Low-level Decompression API Definitions + +// Decompression flags used by tinfl_decompress(). +// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. +// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. +// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). +// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. +enum +{ + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; + +// High level decompression functions: +// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. +// On return: +// Function returns a pointer to the decompressed data, or NULL on failure. +// *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must call mz_free() on the returned block when it's no longer needed. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. +// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. +// Returns 1 on success or 0 on failure. +typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor; + +// Max size of LZ dictionary. +#define TINFL_LZ_DICT_SIZE 32768 + +// Return status. +typedef enum +{ + TINFL_STATUS_BAD_PARAM = -3, + TINFL_STATUS_ADLER32_MISMATCH = -2, + TINFL_STATUS_FAILED = -1, + TINFL_STATUS_DONE = 0, + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; + +// Initializes the decompressor to its initial state. +#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + +// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. +// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); + +// Internal/private bits follow. +enum +{ + TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; + +typedef struct +{ + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS + #define TINFL_USE_64BIT_BITBUF 1 +#endif + +#if TINFL_USE_64BIT_BITBUF + typedef mz_uint64 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (64) +#else + typedef mz_uint32 tinfl_bit_buf_t; + #define TINFL_BITBUF_SIZE (32) +#endif + +struct tinfl_decompressor_tag +{ + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; + +// ------------------- Low-level Compression API Definitions + +// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). +#define TDEFL_LESS_MEMORY 0 + +// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): +// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). +enum +{ + TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF +}; + +// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. +// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). +// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. +// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). +// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) +// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. +// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. +// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. +// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). +enum +{ + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; + +// High level compression functions: +// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). +// On entry: +// pSrc_buf, src_buf_len: Pointer and size of source block to compress. +// flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. +// The caller must free() the returned block when it's no longer needed. +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. +// Returns 0 on failure. +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +// Compresses an image to a compressed PNG file in memory. +// On entry: +// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. +// The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. +// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL +// If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). +// On return: +// Function returns a pointer to the compressed data, or NULL on failure. +// *pLen_out will be set to the size of the PNG image file. +// The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); + +// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser); + +// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 }; + +// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). +#if TDEFL_LESS_MEMORY +enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#else +enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; +#endif + +// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. +typedef enum +{ + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1, +} tdefl_status; + +// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums +typedef enum +{ + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; + +// tdefl's compression state structure. +typedef struct +{ + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; + +// Initializes the compressor. +// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. +// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. +// If pBut_buf_func is NULL the user should always call the tdefl_compress() API. +// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); + +// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. +// tdefl_compress_buffer() always consumes the entire input buffer. +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + +// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros. +#ifndef MINIZ_NO_ZLIB_APIS +// Create tdefl_compress() flags given zlib-style compression parameters. +// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) +// window_bits may be -15 (raw deflate) or 15 (zlib) +// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); +#endif // #ifndef MINIZ_NO_ZLIB_APIS + +} // namespace buminiz + +#endif // MINIZ_HEADER_INCLUDED + +// ------------------- End of Header: Implementation follows. (If you only want the header, define MINIZ_HEADER_FILE_ONLY.) + +#ifndef MINIZ_HEADER_FILE_ONLY + +#include <string.h> +#include <assert.h> + +namespace buminiz { + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16)==2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32)==4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64)==8 ? 1 : -1]; + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC + #define MZ_MALLOC(x) NULL + #define MZ_FREE(x) (void)x, ((void)0) + #define MZ_REALLOC(p, x) NULL +#else + #define MZ_MALLOC(x) malloc(x) + #define MZ_FREE(x) free(x) + #define MZ_REALLOC(p, x) realloc(p, x) +#endif + +#define MZ_MAX(a,b) (((a)>(b))?(a):(b)) +#define MZ_MIN(a,b) (((a)<(b))?(a):(b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + #define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) + #define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else + #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) + #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#ifdef _MSC_VER + #define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) + #define MZ_FORCEINLINE inline __attribute__((__always_inline__)) +#else + #define MZ_FORCEINLINE inline +#endif + +// ------------------- zlib-style API's + +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) +{ + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552; + if (!ptr) return MZ_ADLER32_INIT; + while (buf_len) { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + return (s2 << 16) + s1; +} + +// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ +mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) +{ + static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) return MZ_CRC32_INIT; + crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; } + return ~crcu32; +} + +void mz_free(void *p) +{ + MZ_FREE(p); +} + +#ifndef MINIZ_NO_ZLIB_APIS + +static void *def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); } +static void def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); } +//static void *def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); } + +const char *mz_version(void) +{ + return MZ_VERSION; +} + +int mz_deflateInit(mz_streamp pStream, int level) +{ + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); +} + +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) +{ + tdefl_compressor *pComp; + mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) + { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; +} + +int mz_deflateReset(mz_streamp pStream) +{ + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags); + return MZ_OK; +} + +int mz_deflate(mz_streamp pStream, int flush) +{ + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR; + if (!pStream->avail_out) return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; orig_total_out = pStream->total_out; + for ( ; ; ) + { + tdefl_status defl_status; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) + { + mz_status = MZ_STREAM_ERROR; + break; + } + else if (defl_status == TDEFL_STATUS_DONE) + { + mz_status = MZ_STREAM_END; + break; + } + else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) + { + if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; // Can't make forward progress without some input. + } + } + return mz_status; +} + +int mz_deflateEnd(mz_streamp pStream) +{ + if (!pStream) return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) +{ + (void)pStream; + // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) + mz_uint64 a = 128ULL + (source_len * 110ULL) / 100ULL; + mz_uint64 b = 128ULL + (mz_uint64)source_len + ((source_len / (31 * 1024)) + 1ULL) * 5ULL; + + mz_uint64 t = MZ_MAX(a, b); + if (((mz_ulong)t) != t) + t = (mz_ulong)(-1); + + return (mz_ulong)t; +} + +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) +{ + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); +} + +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); +} + +mz_ulong mz_compressBound(mz_ulong source_len) +{ + return mz_deflateBound(NULL, source_len); +} + +typedef struct +{ + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; +} inflate_state; + +int mz_inflateInit2(mz_streamp pStream, int window_bits) +{ + inflate_state *pDecomp; + if (!pStream) return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) pStream->zalloc = def_alloc_func; + if (!pStream->zfree) pStream->zfree = def_free_func; + + pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); + if (!pDecomp) return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; +} + +int mz_inflateInit(mz_streamp pStream) +{ + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); +} + +int mz_inflate(mz_streamp pStream, int flush) +{ + inflate_state* pState; + mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + + pState = (inflate_state*)pStream->state; + if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; pState->m_first_call = 0; + if (pState->m_last_status < 0) return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) + { + // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) + { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + // flush != MZ_FINISH then we must assume there's more input. + if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) + { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; + } + + for ( ; ; ) + { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; + pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. + else if (flush == MZ_FINISH) + { + // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } + else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; +} + +int mz_inflateEnd(mz_streamp pStream) +{ + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + // In case mz_ulong is 64-bits (argh I hate longs). + if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) + return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); +} + +const char *mz_error(int err) +{ + static struct { int m_err; const char *m_pDesc; } s_error_descs[] = + { + { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, + { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } + }; + mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; + return NULL; +} + +#endif //MINIZ_NO_ZLIB_APIS + +// ------------------- Low-level Decompression (completely independent from all compression API's) + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN switch(r->m_state) { case 0: +#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END +#define TINFL_CR_FINISH } + +// TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never +// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario. +#define TINFL_GET_BYTE(state_index, c) do { \ + if (pIn_buf_cur >= pIn_buf_end) { \ + for ( ; ; ) { \ + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ + TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ + if (pIn_buf_cur < pIn_buf_end) { \ + c = *pIn_buf_cur++; \ + break; \ + } \ + } else { \ + c = 0; \ + break; \ + } \ + } \ + } else c = *pIn_buf_cur++; } MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END + +// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. +// It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a +// Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the +// bit buffer contains >=15 bits (deflate's max. Huffman code size). +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ + do { \ + temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ + if (temp >= 0) { \ + code_len = temp >> 9; \ + if ((code_len) && (num_bits >= code_len)) \ + break; \ + } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \ + } TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \ + } while (num_bits < 15); + +// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read +// beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully +// decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. +// The slow path is only executed at the very end of the input buffer. +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \ + int temp; mz_uint code_len, c; \ + if (num_bits < 15) { \ + if ((pIn_buf_end - pIn_buf_cur) < 2) { \ + TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ + } else { \ + bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \ + } \ + } \ + if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ + code_len = temp >> 9, temp &= 511; \ + else { \ + code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \ + } sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END + +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) +{ + static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 }; + static const int s_length_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + static const int s_min_table_sizes[3] = { 257, 1, 4 }; + + tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; + + // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). + if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; } + + num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4))))); + if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); } + } + + do + { + TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1; + if (r->m_type == 0) + { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); } + while ((counter) && (num_bits)) + { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) + { + size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); } + while (pIn_buf_cur >= pIn_buf_end) + { + if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) + { + TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); + } + else + { + TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); + } + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; + } + } + else if (r->m_type == 3) + { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } + else + { + if (r->m_type == 1) + { + mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i; + r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for ( i = 0; i <= 143; ++i) *p++ = 8; for ( ; i <= 255; ++i) *p++ = 9; for ( ; i <= 279; ++i) *p++ = 7; for ( ; i <= 287; ++i) *p++ = 8; + } + else + { + for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; } + r->m_table_sizes[2] = 19; + } + for ( ; (int)r->m_type >= 0; r->m_type--) + { + int tree_next, tree_cur; tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; + used_syms = 0, total = 0; next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); } + if ((65536 != total) && (used_syms > 1)) + { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) + { + mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue; + cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; } + if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) + { + for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]); ) + { + mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; } + if ((dist == 16) && (!counter)) + { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) + { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); + } + } + for ( ; ; ) + { + mz_uint8 *pSrc; + for ( ; ; ) + { + if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) + { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) + break; + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = (mz_uint8)counter; + } + else + { + int sym2; mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; } +#else + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + counter = sym2; bit_buf >>= code_len; num_bits -= code_len; + if (counter & 256) + break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); + } + bit_buf >>= code_len; num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) + { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) break; + + num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; dist = s_dist_base[dist]; + if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) + { + while (counter--) + { + while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); } + *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) + { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do + { + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) + { + if (counter) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + do + { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; pSrc += 3; + } while ((int)(counter -= 3) > 2); + if ((int)counter > 0) + { + pOut_buf_cur[0] = pSrc[0]; + if ((int)counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + TINFL_CR_FINISH + +common_exit: + r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) + { + const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; + s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; + } + for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1; + s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; +} + +// Higher level helper functions. +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) break; + new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) + { + MZ_FREE(pBuf); *pOut_len = 0; return NULL; + } + pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity; + } + return pBuf; +} + +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp); + status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; +} + +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) + return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for ( ; ; ) + { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) + { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; +} + +// ------------------- Low-level Compression (independent from all decompression API's) + +// Purposely making these tables static for faster init and thread safety. +static const mz_uint16 s_tdefl_len_sym[256] = { + 257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272, + 273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276, + 277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278, + 279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280, + 281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281, + 282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282, + 283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283, + 284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 }; + +static const mz_uint8 s_tdefl_len_extra[256] = { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 }; + +static const mz_uint8 s_tdefl_small_dist_sym[512] = { + 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14, + 14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16, + 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, + 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 }; + +static const mz_uint8 s_tdefl_small_dist_extra[512] = { + 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7 }; + +static const mz_uint8 s_tdefl_large_dist_sym[128] = { + 0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26, + 26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 }; + +static const mz_uint8 s_tdefl_large_dist_extra[128] = { + 0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 }; + +// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. +typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; +static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1) +{ + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const mz_uint32* pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } + for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } + } + return pCur_syms; +} + +// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. +static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) +{ + int root, leaf, next, avbl, used, dpth; + if (n==0) return; else if (n==1) { A[0].m_key = 1; return; } + A[0].m_key += A[1].m_key; root = 0; leaf = 2; + for (next=1; next < n-1; next++) + { + if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = (mz_uint16)next; } else A[next].m_key = A[leaf++].m_key; + if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); A[root++].m_key = (mz_uint16)next; } else A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); + } + A[n-2].m_key = 0; for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1; + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) + { + while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; } + while (avbl>used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; } + avbl = 2*used; dpth++; used = 0; + } +} + +// Limits canonical Huffman code table's max code size. +enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; +static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) +{ + int i; mz_uint32 total = 0; if (code_list_len <= 1) return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } + total--; + } +} + +static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) +{ + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes); + if (static_table) + { + for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++; + } + else + { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) + { + mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; + code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } +} + +#define TDEFL_PUT_BITS(b, l) do { \ + mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \ + d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \ + while (d->m_bits_in >= 8) { \ + if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ + *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ + d->m_bit_buffer >>= 8; \ + d->m_bits_in -= 8; \ + } \ +} MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \ + if (rle_repeat_count < 3) { \ + d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ + while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ + } else { \ + d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ +} rle_repeat_count = 0; } } + +#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \ + if (rle_z_count < 3) { \ + d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ + } else if (rle_z_count <= 10) { \ + d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ + } else { \ + d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ +} rle_z_count = 0; } } + +static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static void tdefl_start_dynamic_block(tdefl_compressor *d) +{ + int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; + mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) + { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); } + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size; + } + else if (++rle_repeat_count == 6) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; ) + { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); + } +} + +static void tdefl_start_static_block(tdefl_compressor *d) +{ + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) *p++ = 8; + for ( ; i <= 255; ++i) *p++ = 9; + for ( ; i <= 279; ++i) *p++ = 7; + for ( ; i <= 287; ++i) *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); +} + +static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + + if (flags & 1) + { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + // This sequence coaxes MSVC into using cmov's vs. jmp's. + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) + return MZ_FALSE; + + *(mz_uint64*)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) + { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#else +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + if (flags & 1) + { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + if (match_dist < 512) + { + sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } + else + { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS + +static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) +{ + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); +} + +static int tdefl_flush_block(tdefl_compressor *d, int flush) +{ + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) + { + TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); + + // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. + if ( ((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size) ) + { + mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) + { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) + { + TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); + } + } + // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. + else if (!comp_block_succeeded) + { + d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) + { + if (flush == TDEFL_FINISH) + { + if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } } + } + else + { + mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) + { + if (d->m_pPut_buf_func) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } + else if (pOutput_buf_start == d->m_output_buf) + { + int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) + { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } + else + { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; +} + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p) +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; q = (const mz_uint16*)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; p = s; probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + if (!probe_len) + { + *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break; + } + else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } +} +#else +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; + for ( ; ; ) + { + for ( ; ; ) + { + if (--num_probes_left == 0) return; + #define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break; + TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; + } + if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break; + if (probe_len > match_len) + { + *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return; + c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1]; + } + } +} +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +static mz_bool tdefl_compress_fast(tdefl_compressor *d) +{ + // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. + mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) + { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) + { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break; + + while (lookahead_size >= 4) + { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; + mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32 *)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) + { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && + (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) ); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U))) + { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + else + { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; + } + } + else + { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) + { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; + return MZ_TRUE; +} +#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + +static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) +{ + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + d->m_huff_count[0][lit]++; +} + +static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) +{ + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; +} + +static mz_bool tdefl_compress_normal(tdefl_compressor *d) +{ + const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) + { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) + { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) + { + mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++; + } + } + else + { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) + { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + break; + + // Simple lazy/greedy parsing state machine. + len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) + { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) + { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1; + } + } + else + { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) + { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) + { + if (cur_match_len > d->m_saved_match_len) + { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + } + else + { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0; + } + } + else if (!cur_match_dist) + tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; + } + // Move the lookahead forward by len_to_move bytes. + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE); + // Check if it's time to flush the current LZ codes to the internal output buffer. + if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) ) + { + int n; + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; + return MZ_TRUE; +} + +static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) +{ + if (d->m_pIn_buf_size) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) + { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) +{ + if (!d) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if ( ((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf) ) + { + if (pIn_buf_size) *pIn_buf_size = 0; + if (pOut_buf_size) *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) + { + if (!tdefl_compress_fast(d)) + return d->m_prev_return_status; + } + else +#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + { + if (!tdefl_compress_normal(d)) + return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) + d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) + { + if (tdefl_flush_block(d, flush) < 0) + return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); +} + +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) +{ + MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); +} + +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1; + d->m_pIn_buf = NULL; d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0; + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) +{ + return d->m_prev_return_status; +} + +mz_uint32 tdefl_get_adler32(tdefl_compressor *d) +{ + return d->m_adler32; +} + +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; + pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); + succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); + MZ_FREE(pComp); return succeeded; +} + +typedef struct +{ + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; +} tdefl_output_buffer; + +static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) +{ + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) + { + size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE; + do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity); + pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE; + p->m_pBuf = pNew_buf; p->m_capacity = new_capacity; + } + memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size; + return MZ_TRUE; +} + +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) return MZ_FALSE; else *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL; + *pOut_len = out_buf.m_size; return out_buf.m_pBuf; +} + +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) return 0; + out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0; + return out_buf.m_size; +} + +#ifndef MINIZ_NO_ZLIB_APIS +static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + +// level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) +{ + mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; +} +#endif //MINIZ_NO_ZLIB_APIS + +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) +#endif + +// Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at +// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. +// This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip) +{ + // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. + static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0; + if (!pComp) return NULL; + MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57+MZ_MAX(64, (1+bpl)*h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; } + // write dummy header + for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); + // compress image data + tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); + for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + // write real header + *pLen_out = out_buf.m_size-41; + { + static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; + mz_uint8 pnghdr[41]={0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52, + 0,0,(mz_uint8)(w>>8),(mz_uint8)w,0,0,(mz_uint8)(h>>8),(mz_uint8)h,8,chans[num_chans],0,0,0,0,0,0,0, + (mz_uint8)(*pLen_out>>24),(mz_uint8)(*pLen_out>>16),(mz_uint8)(*pLen_out>>8),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54}; + c=(mz_uint32)mz_crc32(MZ_CRC32_INIT,pnghdr+12,17); for (i=0; i<4; ++i, c<<=8) ((mz_uint8*)(pnghdr+29))[i]=(mz_uint8)(c>>24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + // write footer (IDAT CRC-32, followed by IEND chunk) + if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT,out_buf.m_pBuf+41-4, *pLen_out+4); for (i=0; i<4; ++i, c<<=8) (out_buf.m_pBuf+out_buf.m_size-16)[i] = (mz_uint8)(c >> 24); + // compute final size of file, grab compressed data buffer and return + *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf; +} +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) +{ + // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) + return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE); +} + +#ifdef _MSC_VER +#pragma warning (pop) +#endif + +} // namespace buminiz + +#endif // MINIZ_HEADER_FILE_ONLY + diff --git a/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp new file mode 100644 index 0000000000..596fc197e6 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.cpp @@ -0,0 +1,564 @@ +// basisu_pvrtc1_4.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_pvrtc1_4.h" + +namespace basisu +{ +#if 0 + static const uint8_t g_pvrtc_5[32] = { 0,8,16,24,33,41,49,57,66,74,82,90,99,107,115,123,132,140,148,156,165,173,181,189,198,206,214,222,231,239,247,255 }; + static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 }; + static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 }; + static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 }; +#endif + + static const uint8_t g_pvrtc_5_nearest[256] = { 0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31 }; + static const uint8_t g_pvrtc_4_nearest[256] = { 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15 }; +#if 0 + static const uint8_t g_pvrtc_3_nearest[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; + static const uint8_t g_pvrtc_alpha_nearest[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8 }; +#endif + +#if 0 + static const uint8_t g_pvrtc_5_floor[256] = + { + 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, + 3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7, + 7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11, + 11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15, + 15,15,15,15,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19, + 19,19,19,19,19,20,20,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23, + 23,23,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27, + 27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31 + }; + + static const uint8_t g_pvrtc_5_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4, + 4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8, + 8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,12,12,12,12,12, + 12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,16,16,16,16, + 16,16,16,16,16,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,20,20,20, + 20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,24,24, + 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28, + 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31 + }; + + static const uint8_t g_pvrtc_4_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11, + 11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13, + 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15 + }; + + static const uint8_t g_pvrtc_4_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,10,10,10, + 10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12, + 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, + 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 + }; + + static const uint8_t g_pvrtc_3_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7 + }; + + static const uint8_t g_pvrtc_3_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 + }; + + static const uint8_t g_pvrtc_alpha_floor[256] = + { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8 + }; + + static const uint8_t g_pvrtc_alpha_ceil[256] = + { + 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8 + }; +#endif + + uint32_t pvrtc4_swizzle_uv(uint32_t width, uint32_t height, uint32_t x, uint32_t y) + { + assert((x < width) && (y < height) && basisu::is_pow2(height) && basisu::is_pow2(width)); + + uint32_t min_d = width, max_v = y; + if (height < width) + { + min_d = height; + max_v = x; + } + + // Interleave the XY LSB's + uint32_t shift_ofs = 0, swizzled = 0; + for (uint32_t s_bit = 1, d_bit = 1; s_bit < min_d; s_bit <<= 1, d_bit <<= 2, ++shift_ofs) + { + if (y & s_bit) swizzled |= d_bit; + if (x & s_bit) swizzled |= (2 * d_bit); + } + + max_v >>= shift_ofs; + + // OR in the rest of the bits from the largest dimension + swizzled |= (max_v << (2 * shift_ofs)); + + return swizzled; + } + + color_rgba pvrtc4_block::get_endpoint(uint32_t endpoint_index, bool unpack) const + { + assert(endpoint_index < 2); + const uint32_t packed = m_endpoints >> (endpoint_index * 16); + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + if (!endpoint_index) + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = (packed >> 1) & 15; + + if (unpack) + { + b = (b << 1) | (b >> 3); + } + } + else + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + } + + a = unpack ? 255 : 7; + } + else + { + // translucent 4433 or 4443 + if (!endpoint_index) + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = (packed >> 1) & 7; + + if (unpack) + { + a = (a << 1); + a = (a << 4) | a; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 2) | (b >> 1); + } + } + else + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = packed & 15; + + if (unpack) + { + a = (a << 1); + a = (a << 4) | a; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 1) | (b >> 3); + } + } + } + + if (unpack) + { + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + } + + assert((r < 256) && (g < 256) && (b < 256) && (a < 256)); + + return color_rgba(r, g, b, a); + } + + color_rgba pvrtc4_block::get_endpoint_5554(uint32_t endpoint_index) const + { + assert(endpoint_index < 2); + const uint32_t packed = m_endpoints >> (endpoint_index * 16); + + uint32_t r, g, b, a; + if (packed & 0x8000) + { + // opaque 554 or 555 + if (!endpoint_index) + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = (packed >> 1) & 15; + + b = (b << 1) | (b >> 3); + } + else + { + r = (packed >> 10) & 31; + g = (packed >> 5) & 31; + b = packed & 31; + } + + a = 15; + } + else + { + // translucent 4433 or 4443 + if (!endpoint_index) + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = (packed >> 1) & 7; + + a = a << 1; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 2) | (b >> 1); + } + else + { + a = (packed >> 12) & 7; + r = (packed >> 8) & 15; + g = (packed >> 4) & 15; + b = packed & 15; + + a = a << 1; + + r = (r << 1) | (r >> 3); + g = (g << 1) | (g >> 3); + b = (b << 1) | (b >> 3); + } + } + + assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); + + return color_rgba(r, g, b, a); + } + + bool pvrtc4_image::get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const + { + assert((x < m_width) && (y < m_height)); + + int block_x0 = (static_cast<int>(x) - 2) >> 2; + int block_x1 = block_x0 + 1; + int block_y0 = (static_cast<int>(y) - 2) >> 2; + int block_y1 = block_y0 + 1; + + block_x0 = posmod(block_x0, m_block_width); + block_x1 = posmod(block_x1, m_block_width); + block_y0 = posmod(block_y0, m_block_height); + block_y1 = posmod(block_y1, m_block_height); + + pColors[0] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + pColors[3] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) + { + for (uint32_t c = 0; c < 4; c++) + { + uint32_t m = (pColors[0][c] + pColors[3][c]) / 2; + pColors[1][c] = static_cast<uint8_t>(m); + pColors[2][c] = static_cast<uint8_t>(m); + } + pColors[2][3] = 0; + return true; + } + + for (uint32_t c = 0; c < 4; c++) + { + pColors[1][c] = static_cast<uint8_t>((pColors[0][c] * 5 + pColors[3][c] * 3) / 8); + pColors[2][c] = static_cast<uint8_t>((pColors[0][c] * 3 + pColors[3][c] * 5) / 8); + } + + return false; + } + + color_rgba pvrtc4_image::get_pixel(uint32_t x, uint32_t y, uint32_t m) const + { + assert((x < m_width) && (y < m_height)); + + int block_x0 = (static_cast<int>(x) - 2) >> 2; + int block_x1 = block_x0 + 1; + int block_y0 = (static_cast<int>(y) - 2) >> 2; + int block_y1 = block_y0 + 1; + + block_x0 = posmod(block_x0, m_block_width); + block_x1 = posmod(block_x1, m_block_width); + block_y0 = posmod(block_y0, m_block_height); + block_y1 = posmod(block_y1, m_block_height); + + if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) + { + if (m == 0) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + else if (m == 3) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); + color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); + + return color_rgba((l[0] + h[0]) / 2, (l[1] + h[1]) / 2, (l[2] + h[2]) / 2, (m == 2) ? 0 : (l[3] + h[3]) / 2); + } + else + { + if (m == 0) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); + else if (m == 3) + return interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); + + color_rgba l(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0))); + color_rgba h(interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1))); + + if (m == 2) + return color_rgba((l[0] * 3 + h[0] * 5) / 8, (l[1] * 3 + h[1] * 5) / 8, (l[2] * 3 + h[2] * 5) / 8, (l[3] * 3 + h[3] * 5) / 8); + else + return color_rgba((l[0] * 5 + h[0] * 3) / 8, (l[1] * 5 + h[1] * 3) / 8, (l[2] * 5 + h[2] * 3) / 8, (l[3] * 5 + h[3] * 3) / 8); + } + } + + uint64_t pvrtc4_image::local_endpoint_optimization_opaque(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual) + { + uint64_t initial_error = evaluate_1x1_endpoint_error(bx, by, orig_img, perceptual, false); + if (!initial_error) + return initial_error; + + vec3F c_avg_orig(0); + + for (int y = 0; y < 7; y++) + { + const uint32_t py = wrap_y(by * 4 + y - 1); + for (uint32_t x = 0; x < 7; x++) + { + const uint32_t px = wrap_x(bx * 4 + x - 1); + + const color_rgba& c = orig_img(px, py); + + c_avg_orig[0] += c[0]; + c_avg_orig[1] += c[1]; + c_avg_orig[2] += c[2]; + } + } + + c_avg_orig *= 1.0f / 49.0f; + + vec3F quant_colors[2]; + quant_colors[0].set(c_avg_orig); + quant_colors[0] -= vec3F(.0125f); + + quant_colors[1].set(c_avg_orig); + quant_colors[1] += vec3F(.0125f); + + float total_weight[2]; + + bool success = true; + + for (uint32_t pass = 0; pass < 4; pass++) + { + vec3F new_colors[2] = { vec3F(0), vec3F(0) }; + memset(total_weight, 0, sizeof(total_weight)); + + static const float s_weights[7][7] = + { + { 1.000000f, 1.637089f, 2.080362f, 2.242640f, 2.080362f, 1.637089f, 1.000000f }, + { 1.637089f, 2.414213f, 3.006572f, 3.242640f, 3.006572f, 2.414213f, 1.637089f }, + { 2.080362f, 3.006572f, 3.828426f, 4.242640f, 3.828426f, 3.006572f, 2.080362f }, + { 2.242640f, 3.242640f, 4.242640f, 5.000000f, 4.242640f, 3.242640f, 2.242640f }, + { 2.080362f, 3.006572f, 3.828426f, 4.242640f, 3.828426f, 3.006572f, 2.080362f }, + { 1.637089f, 2.414213f, 3.006572f, 3.242640f, 3.006572f, 2.414213f, 1.637089f }, + { 1.000000f, 1.637089f, 2.080362f, 2.242640f, 2.080362f, 1.637089f, 1.000000f } + }; + + for (int y = 0; y < 7; y++) + { + const uint32_t py = wrap_y(by * 4 + y - 1); + for (uint32_t x = 0; x < 7; x++) + { + const uint32_t px = wrap_x(bx * 4 + x - 1); + + const color_rgba& orig_c = orig_img(px, py); + + vec3F color(orig_c[0], orig_c[1], orig_c[2]); + + uint32_t c = quant_colors[0].squared_distance(color) > quant_colors[1].squared_distance(color); + + const float weight = s_weights[y][x]; + new_colors[c] += color * weight; + + total_weight[c] += weight; + } + } + + if (!total_weight[0] || !total_weight[1]) + success = false; + + quant_colors[0] = new_colors[0] / (float)total_weight[0]; + quant_colors[1] = new_colors[1] / (float)total_weight[1]; + } + + if (!success) + { + quant_colors[0] = c_avg_orig; + quant_colors[1] = c_avg_orig; + } + + vec4F colors[2] = { quant_colors[0], quant_colors[1] }; + + colors[0] += vec3F(.5f); + colors[1] += vec3F(.5f); + color_rgba color_0((int)colors[0][0], (int)colors[0][1], (int)colors[0][2], 0); + color_rgba color_1((int)colors[1][0], (int)colors[1][1], (int)colors[1][2], 0); + + pvrtc4_block cur_blocks[3][3]; + + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + cur_blocks[x + 1][y + 1] = m_blocks(block_x, block_y); + } + } + + color_rgba l1(0), h1(0); + + l1[0] = g_pvrtc_5_nearest[color_0[0]]; + h1[0] = g_pvrtc_5_nearest[color_1[0]]; + + l1[1] = g_pvrtc_5_nearest[color_0[1]]; + h1[1] = g_pvrtc_5_nearest[color_1[1]]; + + l1[2] = g_pvrtc_4_nearest[color_0[2]]; + h1[2] = g_pvrtc_5_nearest[color_0[2]]; + + l1[3] = 0; + h1[3] = 0; + + m_blocks(bx, by).set_endpoint_raw(0, l1, true); + m_blocks(bx, by).set_endpoint_raw(1, h1, true); + + uint64_t e03_err_0 = remap_pixels_influenced_by_endpoint(bx, by, orig_img, perceptual, false); + + pvrtc4_block blocks0[3][3]; + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + blocks0[x + 1][y + 1] = m_blocks(block_x, block_y); + } + } + + l1[0] = g_pvrtc_5_nearest[color_1[0]]; + h1[0] = g_pvrtc_5_nearest[color_0[0]]; + + l1[1] = g_pvrtc_5_nearest[color_1[1]]; + h1[1] = g_pvrtc_5_nearest[color_0[1]]; + + l1[2] = g_pvrtc_4_nearest[color_1[2]]; + h1[2] = g_pvrtc_5_nearest[color_0[2]]; + + l1[3] = 0; + h1[3] = 0; + + m_blocks(bx, by).set_endpoint_raw(0, l1, true); + m_blocks(bx, by).set_endpoint_raw(1, h1, true); + + uint64_t e03_err_1 = remap_pixels_influenced_by_endpoint(bx, by, orig_img, perceptual, false); + + if (initial_error < basisu::minimum(e03_err_0, e03_err_1)) + { + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + m_blocks(block_x, block_y) = cur_blocks[x + 1][y + 1]; + } + } + return initial_error; + } + else if (e03_err_0 < e03_err_1) + { + for (int y = -1; y <= 1; y++) + { + for (int x = -1; x <= 1; x++) + { + const uint32_t block_x = wrap_block_x(bx + x); + const uint32_t block_y = wrap_block_y(by + y); + m_blocks(block_x, block_y) = blocks0[x + 1][y + 1]; + } + } + assert(e03_err_0 == evaluate_1x1_endpoint_error(bx, by, orig_img, perceptual, false)); + return e03_err_0; + } + + assert(e03_err_1 == evaluate_1x1_endpoint_error(bx, by, orig_img, perceptual, false)); + return e03_err_1; + } + +} // basisu diff --git a/thirdparty/basis_universal/basisu_pvrtc1_4.h b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h index 80b4413351..db6985a439 100644 --- a/thirdparty/basis_universal/basisu_pvrtc1_4.h +++ b/thirdparty/basis_universal/encoder/basisu_pvrtc1_4.h @@ -1,5 +1,5 @@ // basisu_pvrtc1_4.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -87,6 +87,14 @@ namespace basisu return (m_modulation >> ((y * 4 + x) * 2)) & 3; } + inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < 4) && (y < 4) && (s < 4)); + uint32_t n = (y * 4 + x) * 2; + m_modulation = (m_modulation & (~(3 << n))) | (s << n); + assert(get_modulation(x, y) == s); + } + // Scaled by 8 inline const uint32_t* get_scaled_modulation_values(bool block_uses_transparent_modulation) const { @@ -107,7 +115,7 @@ namespace basisu } // opaque endpoints: 554, 555 - // transparent endpoints: 3443 or 3444 + // transparent endpoints: 3443, 3444 inline void set_endpoint_raw(uint32_t endpoint_index, const color_rgba& c, bool opaque_endpoint) { assert(endpoint_index < 2); @@ -352,7 +360,93 @@ namespace basisu return result; } - + + inline void set_modulation(uint32_t x, uint32_t y, uint32_t s) + { + assert((x < m_width) && (y < m_height)); + return m_blocks(x >> 2, y >> 2).set_modulation(x & 3, y & 3, s); + } + + inline uint64_t map_pixel(uint32_t x, uint32_t y, const color_rgba& c, bool perceptual, bool alpha_is_significant, bool record = true) + { + color_rgba v[4]; + get_interpolated_colors(x, y, v); + + uint64_t best_dist = color_distance(perceptual, c, v[0], alpha_is_significant); + uint32_t best_v = 0; + for (uint32_t i = 1; i < 4; i++) + { + uint64_t dist = color_distance(perceptual, c, v[i], alpha_is_significant); + if (dist < best_dist) + { + best_dist = dist; + best_v = i; + } + } + + if (record) + set_modulation(x, y, best_v); + + return best_dist; + } + + inline uint64_t remap_pixels_influenced_by_endpoint(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant) + { + uint64_t total_error = 0; + + for (int yd = -3; yd <= 3; yd++) + { + const int y = wrap_y((int)by * 4 + 2 + yd); + + for (int xd = -3; xd <= 3; xd++) + { + const int x = wrap_x((int)bx * 4 + 2 + xd); + + total_error += map_pixel(x, y, orig_img(x, y), perceptual, alpha_is_significant); + } + } + + return total_error; + } + + inline uint64_t evaluate_1x1_endpoint_error(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual, bool alpha_is_significant, uint64_t threshold_error = 0) const + { + uint64_t total_error = 0; + + for (int yd = -3; yd <= 3; yd++) + { + const int y = wrap_y((int)by * 4 + 2 + yd); + + for (int xd = -3; xd <= 3; xd++) + { + const int x = wrap_x((int)bx * 4 + 2 + xd); + + total_error += color_distance(perceptual, get_pixel(x, y), orig_img(x, y), alpha_is_significant); + + if ((threshold_error) && (total_error >= threshold_error)) + return total_error; + } + } + + return total_error; + } + + uint64_t local_endpoint_optimization_opaque(uint32_t bx, uint32_t by, const image& orig_img, bool perceptual); + + inline uint64_t map_all_pixels(const image& img, bool perceptual, bool alpha_is_significant) + { + assert(m_width == img.get_width()); + assert(m_height == img.get_height()); + + uint64_t total_error = 0; + for (uint32_t y = 0; y < img.get_height(); y++) + for (uint32_t x = 0; x < img.get_width(); x++) + total_error += map_pixel(x, y, img(x, y), perceptual, alpha_is_significant); + + return total_error; + } + + public: uint32_t m_width, m_height; pvrtc4_block_vector2D m_blocks; uint32_t m_block_width, m_block_height; diff --git a/thirdparty/basis_universal/basisu_resample_filters.cpp b/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp index d0b2fd77bb..597cb3f618 100644 --- a/thirdparty/basis_universal/basisu_resample_filters.cpp +++ b/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp @@ -1,5 +1,5 @@ // basisu_resampler_filters.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -283,7 +283,7 @@ namespace basisu return sum; } - static const float KAISER_ALPHA = 4.0; + //static const float KAISER_ALPHA = 4.0; static double kaiser(double alpha, double half_width, double x) { const double ratio = (x / half_width); @@ -310,10 +310,22 @@ namespace basisu const resample_filter g_resample_filters[] = { - { "box", box_filter, BOX_FILTER_SUPPORT }, { "tent", tent_filter, TENT_FILTER_SUPPORT }, { "bell", bell_filter, BELL_SUPPORT }, { "b-spline", B_spline_filter, B_SPLINE_SUPPORT }, - { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, { "lanczos3", lanczos3_filter, LANCZOS3_SUPPORT }, { "blackman", blackman_filter, BLACKMAN_SUPPORT }, { "lanczos4", lanczos4_filter, LANCZOS4_SUPPORT }, - { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, { "kaiser", kaiser_filter, KAISER_SUPPORT }, { "gaussian", gaussian_filter, GAUSSIAN_SUPPORT }, - { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, { "quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT }, + { "box", box_filter, BOX_FILTER_SUPPORT }, + { "tent", tent_filter, TENT_FILTER_SUPPORT }, + { "bell", bell_filter, BELL_SUPPORT }, + { "b-spline", B_spline_filter, B_SPLINE_SUPPORT }, + { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, + { "blackman", blackman_filter, BLACKMAN_SUPPORT }, + { "lanczos3", lanczos3_filter, LANCZOS3_SUPPORT }, + { "lanczos4", lanczos4_filter, LANCZOS4_SUPPORT }, + { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, + { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, + { "kaiser", kaiser_filter, KAISER_SUPPORT }, + { "gaussian", gaussian_filter, GAUSSIAN_SUPPORT }, + { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, + { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, + { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, + { "quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT }, }; const int g_num_resample_filters = BASISU_ARRAY_SIZE(g_resample_filters); diff --git a/thirdparty/basis_universal/basisu_resampler.cpp b/thirdparty/basis_universal/encoder/basisu_resampler.cpp index e193ce83ff..e193ce83ff 100644 --- a/thirdparty/basis_universal/basisu_resampler.cpp +++ b/thirdparty/basis_universal/encoder/basisu_resampler.cpp diff --git a/thirdparty/basis_universal/basisu_resampler.h b/thirdparty/basis_universal/encoder/basisu_resampler.h index c3f2e05c25..dc0978caeb 100644 --- a/thirdparty/basis_universal/basisu_resampler.h +++ b/thirdparty/basis_universal/encoder/basisu_resampler.h @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" #define BASISU_RESAMPLER_DEBUG_OPS (0) #define BASISU_RESAMPLER_DEFAULT_FILTER "lanczos4" diff --git a/thirdparty/basis_universal/basisu_resampler_filters.h b/thirdparty/basis_universal/encoder/basisu_resampler_filters.h index 5659c5fe86..0ebb51c334 100644 --- a/thirdparty/basis_universal/basisu_resampler_filters.h +++ b/thirdparty/basis_universal/encoder/basisu_resampler_filters.h @@ -14,7 +14,7 @@ // limitations under the License. #pragma once -#include "transcoder/basisu.h" +#include "../transcoder/basisu.h" namespace basisu { diff --git a/thirdparty/basis_universal/basisu_ssim.cpp b/thirdparty/basis_universal/encoder/basisu_ssim.cpp index cceb400b88..cceb400b88 100644 --- a/thirdparty/basis_universal/basisu_ssim.cpp +++ b/thirdparty/basis_universal/encoder/basisu_ssim.cpp diff --git a/thirdparty/basis_universal/basisu_ssim.h b/thirdparty/basis_universal/encoder/basisu_ssim.h index 986ca3bbdf..986ca3bbdf 100644 --- a/thirdparty/basis_universal/basisu_ssim.h +++ b/thirdparty/basis_universal/encoder/basisu_ssim.h diff --git a/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp b/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp new file mode 100644 index 0000000000..ca2b325693 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp @@ -0,0 +1,4189 @@ +// basisu_uastc_enc.cpp +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "basisu_uastc_enc.h" +#include "basisu_astc_decomp.h" +#include "basisu_gpu_texture.h" +#include "basisu_bc7enc.h" + +#ifdef _DEBUG +// When BASISU_VALIDATE_UASTC_ENC is 1, we pack and unpack to/from UASTC and ASTC, then validate that each codec returns the exact same results. This is slower. +#define BASISU_VALIDATE_UASTC_ENC 1 +#endif + +#define BASISU_SUPPORT_FORCE_MODE 0 + +using namespace basist; + +namespace basisu +{ + const uint32_t MAX_ENCODE_RESULTS = 512; + +#if BASISU_VALIDATE_UASTC_ENC + static void validate_func(bool condition, int line) + { + if (!condition) + { + fprintf(stderr, "basisu_uastc_enc: Internal validation failed on line %u!\n", line); + } + } + + #define VALIDATE(c) validate_func(c, __LINE__); +#else + #define VALIDATE(c) +#endif + + enum dxt_constants + { + cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U, + cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U, + }; + + struct dxt1_block + { + enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 }; + + uint8_t m_low_color[cTotalEndpointBytes]; + uint8_t m_high_color[cTotalEndpointBytes]; + uint8_t m_selectors[cTotalSelectorBytes]; + + inline void clear() { basisu::clear_obj(*this); } + + inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } + inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } + inline void set_low_color(uint16_t c) { m_low_color[0] = static_cast<uint8_t>(c & 0xFF); m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); } + inline void set_high_color(uint16_t c) { m_high_color[0] = static_cast<uint8_t>(c & 0xFF); m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF); } + inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits))& cDXT1SelectorMask; } + inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); } + + static uint16_t pack_color(const color_rgba& color, bool scaled, uint32_t bias = 127U) + { + uint32_t r = color.r, g = color.g, b = color.b; + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + return static_cast<uint16_t>(basisu::minimum(b, 31U) | (basisu::minimum(g, 63U) << 5U) | (basisu::minimum(r, 31U) << 11U)); + } + + static uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); } + }; + +#define UASTC_WRITE_MODE_DESCS 0 + + static inline void uastc_write_bits(uint8_t* pBuf, uint32_t& bit_offset, uint64_t code, uint32_t codesize, const char* pDesc) + { + (void)pDesc; + +#if UASTC_WRITE_MODE_DESCS + if (pDesc) + printf("%s: %u %u\n", pDesc, bit_offset, codesize); +#endif + + assert((codesize == 64) || (code < (1ULL << codesize))); + + while (codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_write = basisu::minimum<int>(codesize, 8 - byte_bit_offset); + + pBuf[bit_offset >> 3] |= (code << byte_bit_offset); + + code >>= bits_to_write; + codesize -= bits_to_write; + bit_offset += bits_to_write; + } + } + + void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1) + { + if ((g_uastc_mode_has_alpha[result.m_uastc_mode]) && (result.m_uastc_mode != UASTC_MODE_INDEX_SOLID_COLOR)) + { + assert(etc_eac_a8_blk.m_multiplier >= 1); + } + + uint8_t buf[32]; + memset(buf, 0, sizeof(buf)); + + uint32_t block_bit_offset = 0; + +#if UASTC_WRITE_MODE_DESCS + printf("**** Mode: %u\n", result.m_uastc_mode); +#endif + + uastc_write_bits(buf, block_bit_offset, g_uastc_mode_huff_codes[result.m_uastc_mode][0], g_uastc_mode_huff_codes[result.m_uastc_mode][1], "mode"); + + if (result.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.r, 8, "R"); + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.g, 8, "G"); + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.b, 8, "B"); + uastc_write_bits(buf, block_bit_offset, result.m_solid_color.a, 8, "A"); + + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_selector(0, 0), 2, "ETC1S"); + + uint32_t r, g, b; + if (etc1_blk.get_diff_bit()) + etc_block::unpack_color5(r, g, b, etc1_blk.get_base5_color(), false); + else + etc_block::unpack_color4(r, g, b, etc1_blk.get_base4_color(0), false); + + uastc_write_bits(buf, block_bit_offset, r, 5, "ETC1R"); + uastc_write_bits(buf, block_bit_offset, g, 5, "ETC1G"); + uastc_write_bits(buf, block_bit_offset, b, 5, "ETC1B"); + + memcpy(&blk, buf, sizeof(blk)); + return; + } + + if (g_uastc_mode_has_bc1_hint0[result.m_uastc_mode]) + uastc_write_bits(buf, block_bit_offset, bc1_hint0, 1, "BC1H0"); + else + { + assert(bc1_hint0 == false); + } + + if (g_uastc_mode_has_bc1_hint1[result.m_uastc_mode]) + uastc_write_bits(buf, block_bit_offset, bc1_hint1, 1, "BC1H1"); + else + { + assert(bc1_hint1 == false); + } + + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_flip_bit(), 1, "ETC1F"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_diff_bit(), 1, "ETC1D"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(0), 3, "ETC1I0"); + uastc_write_bits(buf, block_bit_offset, etc1_blk.get_inten_table(1), 3, "ETC1I1"); + + if (g_uastc_mode_has_etc1_bias[result.m_uastc_mode]) + uastc_write_bits(buf, block_bit_offset, etc1_bias, 5, "ETC1BIAS"); + else + { + assert(etc1_bias == 0); + } + + if (g_uastc_mode_has_alpha[result.m_uastc_mode]) + { + const uint32_t etc2_hints = etc_eac_a8_blk.m_table | (etc_eac_a8_blk.m_multiplier << 4); + + assert(etc2_hints > 0 && etc2_hints <= 0xFF); + uastc_write_bits(buf, block_bit_offset, etc2_hints, 8, "ETC2TM"); + } + + uint32_t subsets = 1; + switch (result.m_uastc_mode) + { + case 2: + case 4: + case 7: + case 9: + case 16: + uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 5, "PAT"); + subsets = 2; + break; + case 3: + uastc_write_bits(buf, block_bit_offset, result.m_common_pattern, 4, "PAT"); + subsets = 3; + break; + default: + break; + } + +#ifdef _DEBUG + uint32_t part_seed = 0; + switch (result.m_uastc_mode) + { + case 2: + case 4: + case 9: + case 16: + part_seed = g_astc_bc7_common_partitions2[result.m_common_pattern].m_astc; + break; + case 3: + part_seed = g_astc_bc7_common_partitions3[result.m_common_pattern].m_astc; + break; + case 7: + part_seed = g_bc7_3_astc2_common_partitions[result.m_common_pattern].m_astc2; + break; + default: + break; + } +#endif + + uint32_t total_planes = 1; + switch (result.m_uastc_mode) + { + case 6: + case 11: + case 13: + uastc_write_bits(buf, block_bit_offset, result.m_astc.m_ccs, 2, "COMPSEL"); + total_planes = 2; + break; + case 17: + // CCS field is always 3 for dual plane LA. + assert(result.m_astc.m_ccs == 3); + total_planes = 2; + break; + default: + break; + } + + uint8_t weights[32]; + memcpy(weights, result.m_astc.m_weights, 16 * total_planes); + + uint8_t endpoints[18]; + memcpy(endpoints, result.m_astc.m_endpoints, sizeof(endpoints)); + + const uint32_t total_comps = g_uastc_mode_comps[result.m_uastc_mode]; + + // LLAA + // LLAA LLAA + // LLAA LLAA LLAA + // RRGGBB + // RRGGBB RRGGBB + // RRGGBB RRGGBB RRGGBB + // RRGGBBAA + // RRGGBBAA RRGGBBAA + + const uint32_t weight_bits = g_uastc_mode_weight_bits[result.m_uastc_mode]; + + const uint8_t* pPartition_pattern; + const uint8_t* pSubset_anchor_indices = basist::get_anchor_indices(subsets, result.m_uastc_mode, result.m_common_pattern, pPartition_pattern); + + for (uint32_t plane_index = 0; plane_index < total_planes; plane_index++) + { + for (uint32_t subset_index = 0; subset_index < subsets; subset_index++) + { + const uint32_t anchor_index = pSubset_anchor_indices[subset_index]; + +#ifdef _DEBUG + if (subsets >= 2) + { + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t part_index = astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true); + if (part_index == subset_index) + { + assert(anchor_index == i); + break; + } + } + } + else + { + assert(!anchor_index); + } +#endif + + // Check anchor weight's MSB - if it's set then invert this subset's weights and swap the endpoints + if (weights[anchor_index * total_planes + plane_index] & (1 << (weight_bits - 1))) + { + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t part_index = pPartition_pattern[i]; + +#ifdef _DEBUG + if (subsets >= 2) + { + assert(part_index == (uint32_t)astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true)); + } + else + { + assert(!part_index); + } +#endif + + if (part_index == subset_index) + weights[i * total_planes + plane_index] = ((1 << weight_bits) - 1) - weights[i * total_planes + plane_index]; + } + + if (total_planes == 2) + { + for (int c = 0; c < (int)total_comps; c++) + { + const uint32_t comp_plane = (total_comps == 2) ? c : ((c == result.m_astc.m_ccs) ? 1 : 0); + + if (comp_plane == plane_index) + std::swap(endpoints[c * 2 + 0], endpoints[c * 2 + 1]); + } + } + else + { + for (uint32_t c = 0; c < total_comps; c++) + std::swap(endpoints[subset_index * total_comps * 2 + c * 2 + 0], endpoints[subset_index * total_comps * 2 + c * 2 + 1]); + } + } + } // subset_index + } // plane_index + + const uint32_t total_values = total_comps * 2 * subsets; + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[result.m_uastc_mode]; + + uint32_t bit_values[18]; + uint32_t tq_values[8]; + uint32_t total_tq_values = 0; + uint32_t tq_accum = 0; + uint32_t tq_mul = 1; + + const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0]; + const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1]; + const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2]; + + for (uint32_t i = 0; i < total_values; i++) + { + uint32_t val = endpoints[i]; + + uint32_t bits = val & ((1 << ep_bits) - 1); + uint32_t tq = val >> ep_bits; + + bit_values[i] = bits; + + if (ep_trits) + { + assert(tq < 3); + tq_accum += tq * tq_mul; + tq_mul *= 3; + if (tq_mul == 243) + { + tq_values[total_tq_values++] = tq_accum; + tq_accum = 0; + tq_mul = 1; + } + } + else if (ep_quints) + { + assert(tq < 5); + tq_accum += tq * tq_mul; + tq_mul *= 5; + if (tq_mul == 125) + { + tq_values[total_tq_values++] = tq_accum; + tq_accum = 0; + tq_mul = 1; + } + } + } + + uint32_t total_endpoint_bits = 0; + + for (uint32_t i = 0; i < total_tq_values; i++) + { + const uint32_t num_bits = ep_trits ? 8 : 7; + uastc_write_bits(buf, block_bit_offset, tq_values[i], num_bits, "ETQ"); + total_endpoint_bits += num_bits; + } + + if (tq_mul > 1) + { + uint32_t num_bits; + if (ep_trits) + { + if (tq_mul == 3) + num_bits = 2; + else if (tq_mul == 9) + num_bits = 4; + else if (tq_mul == 27) + num_bits = 5; + else //if (tq_mul == 81) + num_bits = 7; + } + else + { + if (tq_mul == 5) + num_bits = 3; + else //if (tq_mul == 25) + num_bits = 5; + } + uastc_write_bits(buf, block_bit_offset, tq_accum, num_bits, "ETQ"); + total_endpoint_bits += num_bits; + } + + for (uint32_t i = 0; i < total_values; i++) + { + uastc_write_bits(buf, block_bit_offset, bit_values[i], ep_bits, "EBITS"); + total_endpoint_bits += ep_bits; + } + +#if UASTC_WRITE_MODE_DESCS + uint32_t weight_start = block_bit_offset; +#endif + + uint32_t total_weight_bits = 0; + const uint32_t plane_shift = (total_planes == 2) ? 1 : 0; + for (uint32_t i = 0; i < 16 * total_planes; i++) + { + uint32_t numbits = weight_bits; + for (uint32_t s = 0; s < subsets; s++) + { + if (pSubset_anchor_indices[s] == (i >> plane_shift)) + { + numbits--; + break; + } + } + + uastc_write_bits(buf, block_bit_offset, weights[i], numbits, nullptr); + + total_weight_bits += numbits; + } + +#if UASTC_WRITE_MODE_DESCS + printf("WEIGHTS: %u %u\n", weight_start, total_weight_bits); +#endif + + assert(block_bit_offset <= 128); + memcpy(&blk, buf, sizeof(blk)); + +#if UASTC_WRITE_MODE_DESCS + printf("Total bits: %u, endpoint bits: %u, weight bits: %u\n", block_bit_offset, total_endpoint_bits, total_weight_bits); +#endif + } + + // MODE 0 + // 0. DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 19 (192) MODE6 RGB + // 18. DualPlane: 0, WeightRange: 11 (32), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 11 (32) MODE6 RGB + static void astc_mode0_or_18(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, const uint8_t *pForce_selectors = nullptr) + { + const uint32_t endpoint_range = (mode == 18) ? 11 : 19; + const uint32_t weight_range = (mode == 18) ? 11 : 8; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = (mode == 18) ? 32 : 16; + ccell_params.m_pSelector_weights = (mode == 18) ? g_astc_weights5 : g_astc_weights4; + ccell_params.m_pSelector_weightsx = (mode == 18) ? (const bc7enc_vec4F*)g_astc_weights5x : (const bc7enc_vec4F*)g_astc_weights4x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_pForce_selectors = pForce_selectors; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range;// (mode == 18) ? 11 : 8; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 8; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + + bool invert = false; + + if (pForce_selectors == nullptr) + { + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert = true; + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = ((mode == 18) ? 31 : 15) - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = mode; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // MODE 1 + // 1-subset, 2-bit indices, 8-bit endpoints, BC7 mode 3 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) MODE3 or MODE5 RGB + static void astc_mode1(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 4; + ccell_params.m_pSelector_weights = g_bc7_weights2; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params.m_astc_endpoint_range = 20; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 8; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + + const uint32_t range = 20; + + bool invert = false; + + int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 1; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + static uint32_t estimate_partition2(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], const uint32_t weights[4]) + { + assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64); + + uint64_t best_err = UINT64_MAX; + uint32_t best_common_pattern = 0; + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16]; + + color_quad_u8 subset_colors[2][16]; + uint32_t subset_total_colors[2] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++) + total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights); + + if (total_subset_err < best_err) + { + best_err = total_subset_err; + best_common_pattern = common_pattern; + } + } + + return best_common_pattern; + } + + // MODE 2 + // 2-subset, 3-bit indices, 4-bit endpoints, BC7 mode 1 + // DualPlane: 0, WeightRange: 5 (8), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 8 (16) MODE1 + static void astc_mode2(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2; + + if (estimate_partition) + { + const uint32_t weights[4] = { 1, 1, 1, 1 }; + first_common_pattern = estimate_partition2(8, 3, g_bc7_weights3, block, weights); + last_common_pattern = first_common_pattern + 1; + } + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[part]; + part_pixels[part][num_part_pixels[part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_part_err = 0; + for (uint32_t part = 0; part < 2; part++) + { + memset(&ccell_params[part], 0, sizeof(ccell_params[part])); + + ccell_params[part].m_num_pixels = num_part_pixels[part]; + ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0]; + ccell_params[part].m_num_selector_weights = 8; + ccell_params[part].m_pSelector_weights = g_bc7_weights3; + ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x; + ccell_params[part].m_astc_endpoint_range = 8; + ccell_params[part].m_weights[0] = 1; + ccell_params[part].m_weights[1] = 1; + ccell_params[part].m_weights[2] = 1; + ccell_params[part].m_weights[3] = 1; + + memset(&ccell_results[part], 0, sizeof(ccell_results[part])); + ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0]; + ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params); + total_part_err += part_err; + } // part + + { + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 5; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 2; + astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc; + astc_results.m_cem = 8; + + uint32_t p0 = 0; + uint32_t p1 = 1; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + std::swap(p0, p1); + + astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2]; + + const uint32_t range = 8; + + bool invert[2] = { false, false }; + + int s0 = g_astc_unquant[range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert[0] = true; + } + + astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2]; + + s0 = g_astc_unquant[range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[4 + 6]].m_unquant; + s1 = g_astc_unquant[range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[range][astc_results.m_endpoints[5 + 6]].m_unquant; + + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]); + std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]); + std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]); + invert[1] = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = bc7_part; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + astc_part = 1 - astc_part; + + if (invert[astc_part]) + astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 2; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + } + + } // common_pattern + } + + // MODE 3 + // 3-subsets, 2-bit indices, [0,11] endpoints, BC7 mode 2 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 3, CEM: 8 (RGB Direct ), EndpointRange: 7 (12) MODE2 + static void astc_mode3(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS3; + + if (estimate_partition) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_common_pattern = 0; + const uint32_t weights[4] = { 1, 1, 1, 1 }; + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS3; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7; + + const uint8_t* pPartition = &g_bc7_partition3[bc7_pattern * 16]; + + color_quad_u8 subset_colors[3][16]; + uint32_t subset_total_colors[3] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; (subset < 3) && (total_subset_err < best_err); subset++) + total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights); + + if (total_subset_err < best_err) + { + best_err = total_subset_err; + best_common_pattern = common_pattern; + } + } + + first_common_pattern = best_common_pattern; + last_common_pattern = best_common_pattern + 1; + } + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t endpoint_range = 7; + + const uint32_t bc7_pattern = g_astc_bc7_common_partitions3[common_pattern].m_bc7; + + color_rgba part_pixels[3][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[3] = { 0, 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[bc7_part]; + part_pixels[bc7_part][num_part_pixels[bc7_part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[3]; + color_cell_compressor_results ccell_results[3]; + uint8_t ccell_result_selectors[3][16]; + uint8_t ccell_result_selectors_temp[3][16]; + + uint64_t total_part_err = 0; + for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++) + { + memset(&ccell_params[bc7_part], 0, sizeof(ccell_params[bc7_part])); + + ccell_params[bc7_part].m_num_pixels = num_part_pixels[bc7_part]; + ccell_params[bc7_part].m_pPixels = (color_quad_u8*)&part_pixels[bc7_part][0]; + ccell_params[bc7_part].m_num_selector_weights = 4; + ccell_params[bc7_part].m_pSelector_weights = g_bc7_weights2; + ccell_params[bc7_part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[bc7_part].m_astc_endpoint_range = endpoint_range; + ccell_params[bc7_part].m_weights[0] = 1; + ccell_params[bc7_part].m_weights[1] = 1; + ccell_params[bc7_part].m_weights[2] = 1; + ccell_params[bc7_part].m_weights[3] = 1; + + memset(&ccell_results[bc7_part], 0, sizeof(ccell_results[bc7_part])); + ccell_results[bc7_part].m_pSelectors = &ccell_result_selectors[bc7_part][0]; + ccell_results[bc7_part].m_pSelectors_temp = &ccell_result_selectors_temp[bc7_part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[bc7_part], &ccell_results[bc7_part], &comp_params); + total_part_err += part_err; + } // part + + { + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 3; + astc_results.m_partition_seed = g_astc_bc7_common_partitions3[common_pattern].m_astc; + astc_results.m_cem = 8; + + uint32_t astc_to_bc7_part[3]; // converts ASTC to BC7 partition index + const uint32_t perm = g_astc_bc7_common_partitions3[common_pattern].m_astc_to_bc7_perm; + astc_to_bc7_part[0] = g_astc_to_bc7_partition_index_perm_tables[perm][0]; + astc_to_bc7_part[1] = g_astc_to_bc7_partition_index_perm_tables[perm][1]; + astc_to_bc7_part[2] = g_astc_to_bc7_partition_index_perm_tables[perm][2]; + + bool invert_astc_part[3] = { false, false, false }; + + for (uint32_t astc_part = 0; astc_part < 3; astc_part++) + { + uint8_t* pEndpoints = &astc_results.m_endpoints[6 * astc_part]; + + pEndpoints[0] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[0]; + pEndpoints[1] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[0]; + pEndpoints[2] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[1]; + pEndpoints[3] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[1]; + pEndpoints[4] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_low_endpoint.m_c[2]; + pEndpoints[5] = ccell_results[astc_to_bc7_part[astc_part]].m_astc_high_endpoint.m_c[2]; + + int s0 = g_astc_unquant[endpoint_range][pEndpoints[0]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[2]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][pEndpoints[1]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[3]].m_unquant + g_astc_unquant[endpoint_range][pEndpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(pEndpoints[0], pEndpoints[1]); + std::swap(pEndpoints[2], pEndpoints[3]); + std::swap(pEndpoints[4], pEndpoints[5]); + invert_astc_part[astc_part] = true; + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition3[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = 0; + for (uint32_t i = 0; i < 3; i++) + { + if (astc_to_bc7_part[i] == bc7_part) + { + astc_part = i; + break; + } + } + + if (invert_astc_part[astc_part]) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 3; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + + } + + } // common_pattern + } + + // MODE 4 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE3 + static void astc_mode4(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + //const uint32_t weight_range = 2; + const uint32_t endpoint_range = 12; + + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2; + + if (estimate_partition) + { + const uint32_t weights[4] = { 1, 1, 1, 1 }; + first_common_pattern = estimate_partition2(4, 3, g_bc7_weights2, block, weights); + last_common_pattern = first_common_pattern + 1; + } + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[part]; + part_pixels[part][num_part_pixels[part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_part_err = 0; + for (uint32_t part = 0; part < 2; part++) + { + memset(&ccell_params[part], 0, sizeof(ccell_params[part])); + + ccell_params[part].m_num_pixels = num_part_pixels[part]; + ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0]; + ccell_params[part].m_num_selector_weights = 4; + ccell_params[part].m_pSelector_weights = g_bc7_weights2; + ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[part].m_astc_endpoint_range = endpoint_range; + ccell_params[part].m_weights[0] = 1; + ccell_params[part].m_weights[1] = 1; + ccell_params[part].m_weights[2] = 1; + ccell_params[part].m_weights[3] = 1; + + memset(&ccell_results[part], 0, sizeof(ccell_results[part])); + ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0]; + ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params); + total_part_err += part_err; + } // part + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 2; + astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc; + astc_results.m_cem = 8; + + uint32_t p0 = 0; + uint32_t p1 = 1; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + std::swap(p0, p1); + + astc_results.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2]; + + bool invert[2] = { false, false }; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + invert[0] = true; + } + + astc_results.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2]; + + s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4 + 6]].m_unquant; + s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5 + 6]].m_unquant; + + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0 + 6], astc_results.m_endpoints[1 + 6]); + std::swap(astc_results.m_endpoints[2 + 6], astc_results.m_endpoints[3 + 6]); + std::swap(astc_results.m_endpoints[4 + 6], astc_results.m_endpoints[5 + 6]); + invert[1] = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = bc7_part; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + astc_part = 1 - astc_part; + + if (invert[astc_part]) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 4; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + + } // common_pattern + } + + // MODE 5 + // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) BC7 MODE 6 (or MODE 1 1-subset) + static void astc_mode5(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 5; + const uint32_t endpoint_range = 20; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 8; + ccell_params.m_pSelector_weights = g_bc7_weights3; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = false; + blk.m_weight_range = weight_range; + + blk.m_ccs = 0; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = 8; + + blk.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + blk.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + blk.m_weights[x + y * 4] = 7 - blk.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 5; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // MODE 6 + // DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 18 (160) BC7 MODE5 + static void astc_mode6(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + for (uint32_t rot_comp = 0; rot_comp < 3; rot_comp++) + { + const uint32_t weight_range = 2; + const uint32_t endpoint_range = 18; + + color_quad_u8 block_rgb[16]; + color_quad_u8 block_a[16]; + for (uint32_t i = 0; i < 16; i++) + { + block_rgb[i] = ((color_quad_u8*)&block[0][0])[i]; + block_a[i] = block_rgb[i]; + + uint8_t c = block_a[i].m_c[rot_comp]; + block_a[i].m_c[0] = c; + block_a[i].m_c[1] = c; + block_a[i].m_c[2] = c; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[rot_comp] = 255; + } + + uint8_t ccell_result_selectors_temp[16]; + + color_cell_compressor_params ccell_params_rgb; + memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb)); + + ccell_params_rgb.m_num_pixels = 16; + ccell_params_rgb.m_pPixels = block_rgb; + ccell_params_rgb.m_num_selector_weights = 4; + ccell_params_rgb.m_pSelector_weights = g_bc7_weights2; + ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_rgb.m_astc_endpoint_range = endpoint_range; + ccell_params_rgb.m_weights[0] = 1; + ccell_params_rgb.m_weights[1] = 1; + ccell_params_rgb.m_weights[2] = 1; + ccell_params_rgb.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_rgb; + uint8_t ccell_result_selectors_rgb[16]; + memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb)); + ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0]; + ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &comp_params); + + color_cell_compressor_params ccell_params_a; + memset(&ccell_params_a, 0, sizeof(ccell_params_a)); + + ccell_params_a.m_num_pixels = 16; + ccell_params_a.m_pPixels = block_a; + ccell_params_a.m_num_selector_weights = 4; + ccell_params_a.m_pSelector_weights = g_bc7_weights2; + ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_a.m_astc_endpoint_range = endpoint_range; + ccell_params_a.m_weights[0] = 1; + ccell_params_a.m_weights[1] = 1; + ccell_params_a.m_weights[2] = 1; + ccell_params_a.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_a; + uint8_t ccell_result_selectors_a[16]; + memset(&ccell_results_a, 0, sizeof(ccell_results_a)); + ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0]; + ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &comp_params) / 3; + + uint64_t total_err = part_err_rgb + part_err_a; + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = true; + blk.m_weight_range = weight_range; + + blk.m_ccs = rot_comp; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = 8; + + blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4]; + uint32_t a_index = ccell_result_selectors_a[x + y * 4]; + + if (invert) + { + rgb_index = 3 - rgb_index; + a_index = 3 - a_index; + } + + blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index; + blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 6; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } // rot_comp + } + + // MODE 7 - 2 subset ASTC, 3 subset BC7 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 8 (RGB Direct ), EndpointRange: 12 (40) MODE2 + static void astc_mode7(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, bool estimate_partition) + { + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS; + + if (estimate_partition) + { + uint64_t best_err = UINT64_MAX; + uint32_t best_common_pattern = 0; + const uint32_t weights[4] = { 1, 1, 1, 1 }; + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS; common_pattern++) + { + const uint8_t* pPartition = &g_bc7_3_astc2_patterns2[common_pattern][0]; + +#ifdef _DEBUG + const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2; + const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73; + const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); + assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true)); + assert(astc_part == pPartition[x + y * 4]); + } + } +#endif + + color_quad_u8 subset_colors[2][16]; + uint32_t subset_total_colors[2] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; (subset < 2) && (total_subset_err < best_err); subset++) + total_subset_err += color_cell_compression_est_astc(4, 3, g_bc7_weights2, subset_total_colors[subset], &subset_colors[subset][0], best_err, weights); + + if (total_subset_err < best_err) + { + best_err = total_subset_err; + best_common_pattern = common_pattern; + } + } + + first_common_pattern = best_common_pattern; + last_common_pattern = best_common_pattern + 1; + } + + //const uint32_t weight_range = 2; + const uint32_t endpoint_range = 12; + + for (uint32_t common_pattern = first_common_pattern; common_pattern < last_common_pattern; common_pattern++) + { + const uint32_t astc_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_astc2; + const uint32_t bc7_pattern = g_bc7_3_astc2_common_partitions[common_pattern].m_bc73; + const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[common_pattern].k; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); +#ifdef _DEBUG + assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true)); +#endif + + part_pixel_index[y][x] = num_part_pixels[astc_part]; + part_pixels[astc_part][num_part_pixels[astc_part]++] = block[y][x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_part_err = 0; + for (uint32_t part = 0; part < 2; part++) + { + memset(&ccell_params[part], 0, sizeof(ccell_params[part])); + + ccell_params[part].m_num_pixels = num_part_pixels[part]; + ccell_params[part].m_pPixels = (color_quad_u8*)&part_pixels[part][0]; + ccell_params[part].m_num_selector_weights = 4; + ccell_params[part].m_pSelector_weights = g_bc7_weights2; + ccell_params[part].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[part].m_astc_endpoint_range = endpoint_range; + ccell_params[part].m_weights[0] = 1; + ccell_params[part].m_weights[1] = 1; + ccell_params[part].m_weights[2] = 1; + ccell_params[part].m_weights[3] = 1; + + memset(&ccell_results[part], 0, sizeof(ccell_results[part])); + ccell_results[part].m_pSelectors = &ccell_result_selectors[part][0]; + ccell_results[part].m_pSelectors_temp = &ccell_result_selectors_temp[part][0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params[part], &ccell_results[part], &comp_params); + total_part_err += part_err; + } // part + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = false; + blk.m_weight_range = 2; + + blk.m_ccs = 0; + blk.m_subsets = 2; + blk.m_partition_seed = astc_pattern; + blk.m_cem = 8; + + const uint32_t p0 = 0; + const uint32_t p1 = 1; + + blk.m_endpoints[0] = ccell_results[p0].m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = ccell_results[p0].m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = ccell_results[p0].m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = ccell_results[p0].m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = ccell_results[p0].m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = ccell_results[p0].m_astc_high_endpoint.m_c[2]; + + bool invert[2] = { false, false }; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + invert[0] = true; + } + + blk.m_endpoints[6] = ccell_results[p1].m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[7] = ccell_results[p1].m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[8] = ccell_results[p1].m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[9] = ccell_results[p1].m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[10] = ccell_results[p1].m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[11] = ccell_results[p1].m_astc_high_endpoint.m_c[2]; + + s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4 + 6]].m_unquant; + s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3 + 6]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5 + 6]].m_unquant; + + if (s1 < s0) + { + std::swap(blk.m_endpoints[0 + 6], blk.m_endpoints[1 + 6]); + std::swap(blk.m_endpoints[2 + 6], blk.m_endpoints[3 + 6]); + std::swap(blk.m_endpoints[4 + 6], blk.m_endpoints[5 + 6]); + invert[1] = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); + + blk.m_weights[x + y * 4] = ccell_result_selectors[astc_part][part_pixel_index[y][x]]; + + if (invert[astc_part]) + blk.m_weights[x + y * 4] = 3 - blk.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 7; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_part_err; + total_results++; + } + + } // common_pattern + } + + static void estimate_partition2_list(uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeights, const color_rgba block[4][4], uint32_t* pParts, uint32_t max_parts, const uint32_t weights[4]) + { + assert(pWeights[0] == 0 && pWeights[num_weights - 1] == 64); + + const uint32_t MAX_PARTS = 8; + assert(max_parts <= MAX_PARTS); + + uint64_t part_error[MAX_PARTS]; + memset(part_error, 0xFF, sizeof(part_error)); + memset(pParts, 0, sizeof(pParts[0]) * max_parts); + + for (uint32_t common_pattern = 0; common_pattern < TOTAL_ASTC_BC7_COMMON_PARTITIONS2; common_pattern++) + { + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + const uint8_t* pPartition = &g_bc7_partition2[bc7_pattern * 16]; + + color_quad_u8 subset_colors[2][16]; + uint32_t subset_total_colors[2] = { 0, 0 }; + for (uint32_t index = 0; index < 16; index++) + subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = ((const color_quad_u8*)block)[index]; + + uint64_t total_subset_err = 0; + for (uint32_t subset = 0; subset < 2; subset++) + total_subset_err += color_cell_compression_est_astc(num_weights, num_comps, pWeights, subset_total_colors[subset], &subset_colors[subset][0], UINT64_MAX, weights); + + for (int i = 0; i < (int)max_parts; i++) + { + if (total_subset_err < part_error[i]) + { + for (int j = max_parts - 1; j > i; --j) + { + pParts[j] = pParts[j - 1]; + part_error[j] = part_error[j - 1]; + } + + pParts[i] = common_pattern; + part_error[i] = total_subset_err; + + break; + } + } + } + +#ifdef _DEBUG + for (uint32_t i = 0; i < max_parts - 1; i++) + { + assert(part_error[i] <= part_error[i + 1]); + } +#endif + } + + // 9. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 12 (RGBA Direct), EndpointRange: 8 (16) - BC7 MODE 7 + // 16. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, CEM: 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE 7 + static void astc_mode9_or_16(uint32_t mode, const color_rgba source_block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, uint32_t estimate_partition_list_size) + { + assert(mode == 9 || mode == 16); + + const color_rgba* pBlock = &source_block[0][0]; + + color_rgba temp_block[16]; + if (mode == 16) + { + for (uint32_t i = 0; i < 16; i++) + { + if (mode == 16) + { + assert(pBlock[i].r == pBlock[i].g); + assert(pBlock[i].r == pBlock[i].b); + } + + const uint32_t l = pBlock[i].r; + const uint32_t a = pBlock[i].a; + + // Use (l,0,0,a) not (l,l,l,a) so both components are treated equally. + temp_block[i].set_noclamp_rgba(l, 0, 0, a); + } + + pBlock = temp_block; + } + + const uint32_t weights[4] = { 1, 1, 1, 1 }; + + //const uint32_t weight_range = 2; + const uint32_t endpoint_range = (mode == 16) ? 20 : 8; + + uint32_t first_common_pattern = 0; + uint32_t last_common_pattern = TOTAL_ASTC_BC7_COMMON_PARTITIONS2; + bool use_part_list = false; + + const uint32_t MAX_PARTS = 8; + uint32_t parts[MAX_PARTS]; + + if (estimate_partition_list_size == 1) + { + first_common_pattern = estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights); + last_common_pattern = first_common_pattern + 1; + } + else if (estimate_partition_list_size > 0) + { + assert(estimate_partition_list_size <= MAX_PARTS); + estimate_partition_list_size = basisu::minimum(estimate_partition_list_size, MAX_PARTS); + + estimate_partition2_list(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, parts, estimate_partition_list_size, weights); + + first_common_pattern = 0; + last_common_pattern = estimate_partition_list_size; + use_part_list = true; + +#ifdef _DEBUG + assert(parts[0] == estimate_partition2(4, 4, g_bc7_weights2, (const color_rgba(*)[4])pBlock, weights)); +#endif + } + + for (uint32_t common_pattern_iter = first_common_pattern; common_pattern_iter < last_common_pattern; common_pattern_iter++) + { + const uint32_t common_pattern = use_part_list ? parts[common_pattern_iter] : common_pattern_iter; + + const uint32_t bc7_pattern = g_astc_bc7_common_partitions2[common_pattern].m_bc7; + + color_rgba part_pixels[2][16]; + uint32_t part_pixel_index[4][4]; + uint32_t num_part_pixels[2] = { 0, 0 }; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + part_pixel_index[y][x] = num_part_pixels[part]; + part_pixels[part][num_part_pixels[part]++] = pBlock[y * 4 + x]; + } + } + + color_cell_compressor_params ccell_params[2]; + color_cell_compressor_results ccell_results[2]; + uint8_t ccell_result_selectors[2][16]; + uint8_t ccell_result_selectors_temp[2][16]; + + uint64_t total_err = 0; + for (uint32_t subset = 0; subset < 2; subset++) + { + memset(&ccell_params[subset], 0, sizeof(ccell_params[subset])); + + ccell_params[subset].m_num_pixels = num_part_pixels[subset]; + ccell_params[subset].m_pPixels = (color_quad_u8*)&part_pixels[subset][0]; + ccell_params[subset].m_num_selector_weights = 4; + ccell_params[subset].m_pSelector_weights = g_bc7_weights2; + ccell_params[subset].m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params[subset].m_astc_endpoint_range = endpoint_range; + ccell_params[subset].m_weights[0] = weights[0]; + ccell_params[subset].m_weights[1] = weights[1]; + ccell_params[subset].m_weights[2] = weights[2]; + ccell_params[subset].m_weights[3] = weights[3]; + ccell_params[subset].m_has_alpha = true; + + memset(&ccell_results[subset], 0, sizeof(ccell_results[subset])); + ccell_results[subset].m_pSelectors = &ccell_result_selectors[subset][0]; + ccell_results[subset].m_pSelectors_temp = &ccell_result_selectors_temp[subset][0]; + + uint64_t subset_err = color_cell_compression(255, &ccell_params[subset], &ccell_results[subset], &comp_params); + + if (mode == 16) + { + color_rgba colors[4]; + for (uint32_t c = 0; c < 4; c++) + { + colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + colors[3].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results[subset].m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + } + + for (uint32_t i = 1; i < 4 - 1; i++) + for (uint32_t c = 0; c < 4; c++) + colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[3].m_comps[c], g_bc7_weights2[i], false); + + for (uint32_t p = 0; p < ccell_params[subset].m_num_pixels; p++) + { + color_rgba orig_pix(part_pixels[subset][p]); + orig_pix.g = orig_pix.r; + orig_pix.b = orig_pix.r; + total_err += color_distance_la(orig_pix, colors[ccell_result_selectors[subset][p]]); + } + } + else + { + total_err += subset_err; + } + } // subset + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = 2; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 2; + astc_results.m_partition_seed = g_astc_bc7_common_partitions2[common_pattern].m_astc; + astc_results.m_cem = (mode == 16) ? 4 : 12; + + uint32_t part[2] = { 0, 1 }; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + std::swap(part[0], part[1]); + + bool invert[2] = { false, false }; + + for (uint32_t p = 0; p < 2; p++) + { + if (mode == 16) + { + astc_results.m_endpoints[p * 4 + 0] = ccell_results[part[p]].m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[p * 4 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[0]; + + astc_results.m_endpoints[p * 4 + 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[p * 4 + 3] = ccell_results[part[p]].m_astc_high_endpoint.m_c[3]; + } + else + { + for (uint32_t c = 0; c < 4; c++) + { + astc_results.m_endpoints[p * 8 + c * 2] = ccell_results[part[p]].m_astc_low_endpoint.m_c[c]; + astc_results.m_endpoints[p * 8 + c * 2 + 1] = ccell_results[part[p]].m_astc_high_endpoint.m_c[c]; + } + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 0]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 2]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 4]].m_unquant; + + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 1]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 3]].m_unquant + + g_astc_unquant[endpoint_range][astc_results.m_endpoints[p * 8 + 5]].m_unquant; + + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[p * 8 + 0], astc_results.m_endpoints[p * 8 + 1]); + std::swap(astc_results.m_endpoints[p * 8 + 2], astc_results.m_endpoints[p * 8 + 3]); + std::swap(astc_results.m_endpoints[p * 8 + 4], astc_results.m_endpoints[p * 8 + 5]); + std::swap(astc_results.m_endpoints[p * 8 + 6], astc_results.m_endpoints[p * 8 + 7]); + invert[p] = true; + } + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t bc7_part = g_bc7_partition2[16 * bc7_pattern + x + y * 4]; + + astc_results.m_weights[x + y * 4] = ccell_result_selectors[bc7_part][part_pixel_index[y][x]]; + + uint32_t astc_part = bc7_part; + if (g_astc_bc7_common_partitions2[common_pattern].m_invert) + astc_part = 1 - astc_part; + + if (invert[astc_part]) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = mode; + pResults[total_results].m_common_pattern = common_pattern; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + + } // common_pattern + } + + // MODE 10 + // DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 13 (48) MODE6 + static void astc_mode10(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 8; + const uint32_t endpoint_range = 13; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 16; + ccell_params.m_pSelector_weights = g_astc_weights4; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 12; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 15 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 10; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // 11. DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct), EndpointRange: 13 (48) MODE5 + // 17. DualPlane: 1, WeightRange : 2 (4), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE5 + static void astc_mode11_or_17(uint32_t mode, const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + assert((mode == 11) || (mode == 17)); + + const uint32_t weight_range = 2; + const uint32_t endpoint_range = (mode == 17) ? 20 : 13; + + bc7enc_compress_block_params local_comp_params(comp_params); + local_comp_params.m_perceptual = false; + local_comp_params.m_weights[0] = 1; + local_comp_params.m_weights[1] = 1; + local_comp_params.m_weights[2] = 1; + local_comp_params.m_weights[3] = 1; + + const uint32_t last_rot_comp = (mode == 17) ? 1 : 4; + + for (uint32_t rot_comp = 0; rot_comp < last_rot_comp; rot_comp++) + { + color_quad_u8 block_rgb[16]; + color_quad_u8 block_a[16]; + for (uint32_t i = 0; i < 16; i++) + { + block_rgb[i] = ((color_quad_u8*)&block[0][0])[i]; + block_a[i] = block_rgb[i]; + + if (mode == 17) + { + assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[1]); + assert(block_rgb[i].m_c[0] == block_rgb[i].m_c[2]); + + block_a[i].m_c[0] = block_rgb[i].m_c[3]; + block_a[i].m_c[1] = block_rgb[i].m_c[3]; + block_a[i].m_c[2] = block_rgb[i].m_c[3]; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[1] = block_rgb[i].m_c[0]; + block_rgb[i].m_c[2] = block_rgb[i].m_c[0]; + block_rgb[i].m_c[3] = 255; + } + else + { + uint8_t c = block_a[i].m_c[rot_comp]; + block_a[i].m_c[0] = c; + block_a[i].m_c[1] = c; + block_a[i].m_c[2] = c; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3]; + block_rgb[i].m_c[3] = 255; + } + } + + uint8_t ccell_result_selectors_temp[16]; + + color_cell_compressor_params ccell_params_rgb; + memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb)); + + ccell_params_rgb.m_num_pixels = 16; + ccell_params_rgb.m_pPixels = block_rgb; + ccell_params_rgb.m_num_selector_weights = 4; + ccell_params_rgb.m_pSelector_weights = g_bc7_weights2; + ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_rgb.m_astc_endpoint_range = endpoint_range; + ccell_params_rgb.m_weights[0] = 1; + ccell_params_rgb.m_weights[1] = 1; + ccell_params_rgb.m_weights[2] = 1; + ccell_params_rgb.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_rgb; + uint8_t ccell_result_selectors_rgb[16]; + memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb)); + ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0]; + ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params); + + color_cell_compressor_params ccell_params_a; + memset(&ccell_params_a, 0, sizeof(ccell_params_a)); + + ccell_params_a.m_num_pixels = 16; + ccell_params_a.m_pPixels = block_a; + ccell_params_a.m_num_selector_weights = 4; + ccell_params_a.m_pSelector_weights = g_bc7_weights2; + ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params_a.m_astc_endpoint_range = endpoint_range; + ccell_params_a.m_weights[0] = 1; + ccell_params_a.m_weights[1] = 1; + ccell_params_a.m_weights[2] = 1; + ccell_params_a.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_a; + uint8_t ccell_result_selectors_a[16]; + memset(&ccell_results_a, 0, sizeof(ccell_results_a)); + ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0]; + ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3; + + uint64_t total_err = (mode == 17) ? ((part_err_rgb / 3) + part_err_a) : (part_err_rgb + part_err_a); + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = true; + blk.m_weight_range = weight_range; + + blk.m_ccs = (mode == 17) ? 3 : rot_comp; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = (mode == 17) ? 4 : 12; + + bool invert = false; + + if (mode == 17) + { + assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[1]); + assert(ccell_results_rgb.m_astc_low_endpoint.m_c[0] == ccell_results_rgb.m_astc_low_endpoint.m_c[2]); + + assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[1]); + assert(ccell_results_rgb.m_astc_high_endpoint.m_c[0] == ccell_results_rgb.m_astc_high_endpoint.m_c[2]); + + blk.m_endpoints[0] = ccell_results_rgb.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = ccell_results_rgb.m_astc_high_endpoint.m_c[0]; + + blk.m_endpoints[2] = ccell_results_a.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[3] = ccell_results_a.m_astc_high_endpoint.m_c[0]; + } + else + { + blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2]; + if (rot_comp == 3) + { + blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0]; + } + else + { + blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp]; + blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp]; + } + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + std::swap(blk.m_endpoints[6], blk.m_endpoints[7]); + invert = true; + } + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4]; + uint32_t a_index = ccell_result_selectors_a[x + y * 4]; + + if (invert) + { + rgb_index = 3 - rgb_index; + a_index = 3 - a_index; + } + + blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index; + blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = mode; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } // rot_comp + } + + // MODE 12 + // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 19 (192) MODE6 + static void astc_mode12(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 5; + const uint32_t endpoint_range = 19; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 8; + ccell_params.m_pSelector_weights = g_bc7_weights3; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights3x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 12; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 7 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 12; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // 13. DualPlane: 1, WeightRange: 0 (2), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE5 + static void astc_mode13(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + bc7enc_compress_block_params local_comp_params(comp_params); + local_comp_params.m_perceptual = false; + local_comp_params.m_weights[0] = 1; + local_comp_params.m_weights[1] = 1; + local_comp_params.m_weights[2] = 1; + local_comp_params.m_weights[3] = 1; + + for (uint32_t rot_comp = 0; rot_comp < 4; rot_comp++) + { + const uint32_t weight_range = 0; + const uint32_t endpoint_range = 20; + + color_quad_u8 block_rgb[16]; + color_quad_u8 block_a[16]; + for (uint32_t i = 0; i < 16; i++) + { + block_rgb[i] = ((color_quad_u8*)&block[0][0])[i]; + block_a[i] = block_rgb[i]; + + uint8_t c = block_a[i].m_c[rot_comp]; + block_a[i].m_c[0] = c; + block_a[i].m_c[1] = c; + block_a[i].m_c[2] = c; + block_a[i].m_c[3] = 255; + + block_rgb[i].m_c[rot_comp] = block_rgb[i].m_c[3]; + block_rgb[i].m_c[3] = 255; + } + + uint8_t ccell_result_selectors_temp[16]; + + color_cell_compressor_params ccell_params_rgb; + memset(&ccell_params_rgb, 0, sizeof(ccell_params_rgb)); + + ccell_params_rgb.m_num_pixels = 16; + ccell_params_rgb.m_pPixels = block_rgb; + ccell_params_rgb.m_num_selector_weights = 2; + ccell_params_rgb.m_pSelector_weights = g_bc7_weights1; + ccell_params_rgb.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x; + ccell_params_rgb.m_astc_endpoint_range = endpoint_range; + ccell_params_rgb.m_weights[0] = 1; + ccell_params_rgb.m_weights[1] = 1; + ccell_params_rgb.m_weights[2] = 1; + ccell_params_rgb.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_rgb; + uint8_t ccell_result_selectors_rgb[16]; + memset(&ccell_results_rgb, 0, sizeof(ccell_results_rgb)); + ccell_results_rgb.m_pSelectors = &ccell_result_selectors_rgb[0]; + ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &local_comp_params); + + color_cell_compressor_params ccell_params_a; + memset(&ccell_params_a, 0, sizeof(ccell_params_a)); + + ccell_params_a.m_num_pixels = 16; + ccell_params_a.m_pPixels = block_a; + ccell_params_a.m_num_selector_weights = 2; + ccell_params_a.m_pSelector_weights = g_bc7_weights1; + ccell_params_a.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights1x; + ccell_params_a.m_astc_endpoint_range = endpoint_range; + ccell_params_a.m_weights[0] = 1; + ccell_params_a.m_weights[1] = 1; + ccell_params_a.m_weights[2] = 1; + ccell_params_a.m_weights[3] = 1; + + color_cell_compressor_results ccell_results_a; + uint8_t ccell_result_selectors_a[16]; + memset(&ccell_results_a, 0, sizeof(ccell_results_a)); + ccell_results_a.m_pSelectors = &ccell_result_selectors_a[0]; + ccell_results_a.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err_a = color_cell_compression(255, &ccell_params_a, &ccell_results_a, &local_comp_params) / 3; + + uint64_t total_err = part_err_rgb + part_err_a; + + // ASTC + astc_block_desc blk; + memset(&blk, 0, sizeof(blk)); + + blk.m_dual_plane = true; + blk.m_weight_range = weight_range; + + blk.m_ccs = rot_comp; + blk.m_subsets = 1; + blk.m_partition_seed = 0; + blk.m_cem = 12; + + blk.m_endpoints[0] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[1] = (rot_comp == 0 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[0]; + blk.m_endpoints[2] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[1]; + blk.m_endpoints[3] = (rot_comp == 1 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[1]; + blk.m_endpoints[4] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_low_endpoint.m_c[2]; + blk.m_endpoints[5] = (rot_comp == 2 ? ccell_results_a : ccell_results_rgb).m_astc_high_endpoint.m_c[2]; + if (rot_comp == 3) + { + blk.m_endpoints[6] = ccell_results_a.m_astc_low_endpoint.m_c[0]; + blk.m_endpoints[7] = ccell_results_a.m_astc_high_endpoint.m_c[0]; + } + else + { + blk.m_endpoints[6] = ccell_results_rgb.m_astc_low_endpoint.m_c[rot_comp]; + blk.m_endpoints[7] = ccell_results_rgb.m_astc_high_endpoint.m_c[rot_comp]; + } + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][blk.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][blk.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][blk.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(blk.m_endpoints[0], blk.m_endpoints[1]); + std::swap(blk.m_endpoints[2], blk.m_endpoints[3]); + std::swap(blk.m_endpoints[4], blk.m_endpoints[5]); + std::swap(blk.m_endpoints[6], blk.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t rgb_index = ccell_result_selectors_rgb[x + y * 4]; + uint32_t a_index = ccell_result_selectors_a[x + y * 4]; + + if (invert) + { + rgb_index = 1 - rgb_index; + a_index = 1 - a_index; + } + + blk.m_weights[(x + y * 4) * 2 + 0] = (uint8_t)rgb_index; + blk.m_weights[(x + y * 4) * 2 + 1] = (uint8_t)a_index; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 13; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = blk; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } // rot_comp + } + + // MODE14 + // DualPlane: 0, WeightRange: 2 (4), Subsets: 1, CEM: 12 (RGBA Direct ), EndpointRange: 20 (256) MODE6 + static void astc_mode14(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 2; + const uint32_t endpoint_range = 20; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + ccell_params.m_num_pixels = 16; + ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_num_selector_weights = 4; + ccell_params.m_pSelector_weights = g_bc7_weights2; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_bc7_weights2x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + uint64_t part_err = color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 12; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[1]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; + astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; + astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; + astc_results.m_endpoints[6] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[7] = ccell_results.m_astc_high_endpoint.m_c[3]; + + bool invert = false; + + int s0 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[0]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[2]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[4]].m_unquant; + int s1 = g_astc_unquant[endpoint_range][astc_results.m_endpoints[1]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[3]].m_unquant + g_astc_unquant[endpoint_range][astc_results.m_endpoints[5]].m_unquant; + if (s1 < s0) + { + std::swap(astc_results.m_endpoints[0], astc_results.m_endpoints[1]); + std::swap(astc_results.m_endpoints[2], astc_results.m_endpoints[3]); + std::swap(astc_results.m_endpoints[4], astc_results.m_endpoints[5]); + std::swap(astc_results.m_endpoints[6], astc_results.m_endpoints[7]); + invert = true; + } + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + if (invert) + astc_results.m_weights[x + y * 4] = 3 - astc_results.m_weights[x + y * 4]; + } + } + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 14; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = part_err; + total_results++; + } + } + + // MODE 15 + // DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) BC7 MODE6 + static void astc_mode15(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) + { + const uint32_t weight_range = 8; + const uint32_t endpoint_range = 20; + + color_cell_compressor_params ccell_params; + memset(&ccell_params, 0, sizeof(ccell_params)); + + color_rgba temp_block[16]; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t l = ((const color_rgba*)block)[i].r; + const uint32_t a = ((const color_rgba*)block)[i].a; + + // Use (l,0,0,a) not (l,l,l,a) so both components are treated equally. + temp_block[i].set_noclamp_rgba(l, 0, 0, a); + } + + ccell_params.m_num_pixels = 16; + //ccell_params.m_pPixels = (color_quad_u8*)&block[0][0]; + ccell_params.m_pPixels = (color_quad_u8*)temp_block; + ccell_params.m_num_selector_weights = 16; + ccell_params.m_pSelector_weights = g_astc_weights4; + ccell_params.m_pSelector_weightsx = (const bc7enc_vec4F*)g_astc_weights4x; + ccell_params.m_astc_endpoint_range = endpoint_range; + ccell_params.m_weights[0] = 1; + ccell_params.m_weights[1] = 1; + ccell_params.m_weights[2] = 1; + ccell_params.m_weights[3] = 1; + ccell_params.m_has_alpha = true; + + color_cell_compressor_results ccell_results; + uint8_t ccell_result_selectors[16]; + uint8_t ccell_result_selectors_temp[16]; + memset(&ccell_results, 0, sizeof(ccell_results)); + ccell_results.m_pSelectors = &ccell_result_selectors[0]; + ccell_results.m_pSelectors_temp = &ccell_result_selectors_temp[0]; + + color_cell_compression(255, &ccell_params, &ccell_results, &comp_params); + + // ASTC + astc_block_desc astc_results; + memset(&astc_results, 0, sizeof(astc_results)); + + astc_results.m_dual_plane = false; + astc_results.m_weight_range = weight_range; + + astc_results.m_ccs = 0; + astc_results.m_subsets = 1; + astc_results.m_partition_seed = 0; + astc_results.m_cem = 4; + + astc_results.m_endpoints[0] = ccell_results.m_astc_low_endpoint.m_c[0]; + astc_results.m_endpoints[1] = ccell_results.m_astc_high_endpoint.m_c[0]; + + astc_results.m_endpoints[2] = ccell_results.m_astc_low_endpoint.m_c[3]; + astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[3]; + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + astc_results.m_weights[x + y * 4] = ccell_result_selectors[x + y * 4]; + + color_rgba colors[16]; + for (uint32_t c = 0; c < 4; c++) + { + colors[0].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results.m_astc_low_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + colors[15].m_comps[c] = g_astc_unquant[endpoint_range][ccell_results.m_astc_high_endpoint.m_c[(c < 3) ? 0 : 3]].m_unquant; + } + + for (uint32_t i = 1; i < 16 - 1; i++) + for (uint32_t c = 0; c < 4; c++) + colors[i].m_comps[c] = (uint8_t)astc_interpolate(colors[0].m_comps[c], colors[15].m_comps[c], g_astc_weights4[i], false); + + uint64_t total_err = 0; + for (uint32_t p = 0; p < 16; p++) + total_err += color_distance_la(((const color_rgba*)block)[p], colors[ccell_result_selectors[p]]); + + assert(total_results < MAX_ENCODE_RESULTS); + if (total_results < MAX_ENCODE_RESULTS) + { + pResults[total_results].m_uastc_mode = 15; + pResults[total_results].m_common_pattern = 0; + pResults[total_results].m_astc = astc_results; + pResults[total_results].m_astc_err = total_err; + total_results++; + } + } + + static void compute_block_error(const color_rgba block[4][4], const color_rgba decoded_block[4][4], uint64_t &total_rgb_err, uint64_t &total_rgba_err, uint64_t &total_la_err) + { + uint64_t total_err_r = 0, total_err_g = 0, total_err_b = 0, total_err_a = 0; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int dr = (int)block[y][x].m_comps[0] - (int)decoded_block[y][x].m_comps[0]; + const int dg = (int)block[y][x].m_comps[1] - (int)decoded_block[y][x].m_comps[1]; + const int db = (int)block[y][x].m_comps[2] - (int)decoded_block[y][x].m_comps[2]; + const int da = (int)block[y][x].m_comps[3] - (int)decoded_block[y][x].m_comps[3]; + + total_err_r += dr * dr; + total_err_g += dg * dg; + total_err_b += db * db; + total_err_a += da * da; + } + } + + total_la_err = total_err_r + total_err_a; + total_rgb_err = total_err_r + total_err_g + total_err_b; + total_rgba_err = total_rgb_err + total_err_a; + } + + static void compute_bc1_hints(bool &bc1_hint0, bool &bc1_hint1, const uastc_encode_results &best_results, const color_rgba block[4][4], const color_rgba decoded_uastc_block[4][4]) + { + const uint32_t best_mode = best_results.m_uastc_mode; + const bool perceptual = false; + + bc1_hint0 = false; + bc1_hint1 = false; + + if (best_mode == UASTC_MODE_INDEX_SOLID_COLOR) + return; + + if (!g_uastc_mode_has_bc1_hint0[best_mode] && !g_uastc_mode_has_bc1_hint1[best_mode]) + return; + + color_rgba tblock_bc1[4][4]; + dxt1_block tbc1_block[8]; + basist::encode_bc1(tbc1_block, (const uint8_t*)&decoded_uastc_block[0][0], 0); + unpack_block(texture_format::cBC1, tbc1_block, &tblock_bc1[0][0]); + + color_rgba tblock_hint0_bc1[4][4]; + color_rgba tblock_hint1_bc1[4][4]; + + etc_block etc1_blk; + memset(&etc1_blk, 0, sizeof(etc1_blk)); + + eac_a8_block etc2_blk; + memset(&etc2_blk, 0, sizeof(etc2_blk)); + etc2_blk.m_multiplier = 1; + + // Pack to UASTC, then unpack, because the endpoints may be swapped. + + uastc_block temp_ublock; + pack_uastc(temp_ublock, best_results, etc1_blk, 0, etc2_blk, false, false); + + unpacked_uastc_block temp_ublock_unpacked; + unpack_uastc(temp_ublock, temp_ublock_unpacked, false); + + unpacked_uastc_block ublock; + memset(&ublock, 0, sizeof(ublock)); + ublock.m_mode = best_results.m_uastc_mode; + ublock.m_common_pattern = best_results.m_common_pattern; + ublock.m_astc = temp_ublock_unpacked.m_astc; + + dxt1_block b; + + // HINT1 + if (!g_uastc_mode_has_bc1_hint1[best_mode]) + { + memset(tblock_hint1_bc1, 0, sizeof(tblock_hint1_bc1)); + } + else + { + transcode_uastc_to_bc1_hint1(ublock, (color32 (*)[4]) decoded_uastc_block, &b, false); + + unpack_block(texture_format::cBC1, &b, &tblock_hint1_bc1[0][0]); + } + + // HINT0 + if (!g_uastc_mode_has_bc1_hint0[best_mode]) + { + memset(tblock_hint0_bc1, 0, sizeof(tblock_hint0_bc1)); + } + else + { + transcode_uastc_to_bc1_hint0(ublock, &b); + + unpack_block(texture_format::cBC1, &b, &tblock_hint0_bc1[0][0]); + } + + // Compute block errors + uint64_t total_t_err = 0, total_hint0_err = 0, total_hint1_err = 0; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + total_t_err += color_distance(perceptual, block[y][x], tblock_bc1[y][x], false); + total_hint0_err += color_distance(perceptual, block[y][x], tblock_hint0_bc1[y][x], false); + total_hint1_err += color_distance(perceptual, block[y][x], tblock_hint1_bc1[y][x], false); + } + } + + const float t_err = sqrtf((float)total_t_err); + const float t_err_hint0 = sqrtf((float)total_hint0_err); + const float t_err_hint1 = sqrtf((float)total_hint1_err); + + const float err_thresh0 = 1.075f; + const float err_thresh1 = 1.075f; + + if ((g_uastc_mode_has_bc1_hint0[best_mode]) && (t_err_hint0 <= t_err * err_thresh0)) + bc1_hint0 = true; + + if ((g_uastc_mode_has_bc1_hint1[best_mode]) && (t_err_hint1 <= t_err * err_thresh1)) + bc1_hint1 = true; + } + + struct ycbcr + { + int32_t m_y; + int32_t m_cb; + int32_t m_cr; + }; + + static inline void rgb_to_y_cb_cr(const color_rgba& c, ycbcr& dst) + { + const int y = c.r * 54 + c.g * 183 + c.b * 19; + dst.m_y = y; + dst.m_cb = (c.b << 8) - y; + dst.m_cr = (c.r << 8) - y; + } + + static inline uint64_t color_diff(const ycbcr& a, const ycbcr& b) + { + const int y_delta = a.m_y - b.m_y; + const int cb_delta = a.m_cb - b.m_cb; + const int cr_delta = a.m_cr - b.m_cr; + return ((int64_t)y_delta * y_delta * 4) + ((int64_t)cr_delta * cr_delta) + ((int64_t)cb_delta * cb_delta); + } + + static inline int gray_distance2(const color_rgba& c, int r, int g, int b) + { + int gray_dist = (((int)c[0] - r) + ((int)c[1] - g) + ((int)c[2] - b) + 1) / 3; + + int gray_point_r = clamp255(r + gray_dist); + int gray_point_g = clamp255(g + gray_dist); + int gray_point_b = clamp255(b + gray_dist); + + int dist_to_gray_point_r = c[0] - gray_point_r; + int dist_to_gray_point_g = c[1] - gray_point_g; + int dist_to_gray_point_b = c[2] - gray_point_b; + + return (dist_to_gray_point_r * dist_to_gray_point_r) + (dist_to_gray_point_g * dist_to_gray_point_g) + (dist_to_gray_point_b * dist_to_gray_point_b); + } + + static bool pack_etc1_estimate_flipped(const color_rgba* pSrc_pixels) + { + int sums[3][2][2]; + +#define GET_XY(x, y, c) pSrc_pixels[(x) + ((y) * 4)][c] + + for (uint32_t c = 0; c < 3; c++) + { + sums[c][0][0] = GET_XY(0, 0, c) + GET_XY(0, 1, c) + GET_XY(1, 0, c) + GET_XY(1, 1, c); + sums[c][1][0] = GET_XY(2, 0, c) + GET_XY(2, 1, c) + GET_XY(3, 0, c) + GET_XY(3, 1, c); + sums[c][0][1] = GET_XY(0, 2, c) + GET_XY(0, 3, c) + GET_XY(1, 2, c) + GET_XY(1, 3, c); + sums[c][1][1] = GET_XY(2, 2, c) + GET_XY(2, 3, c) + GET_XY(3, 2, c) + GET_XY(3, 3, c); + } + + int upper_avg[3], lower_avg[3], left_avg[3], right_avg[3]; + for (uint32_t c = 0; c < 3; c++) + { + upper_avg[c] = (sums[c][0][0] + sums[c][1][0] + 4) / 8; + lower_avg[c] = (sums[c][0][1] + sums[c][1][1] + 4) / 8; + left_avg[c] = (sums[c][0][0] + sums[c][0][1] + 4) / 8; + right_avg[c] = (sums[c][1][0] + sums[c][1][1] + 4) / 8; + } + +#undef GET_XY +#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a[0], a[1], a[2]) + + int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0; + for (uint32_t i = 0; i < 4; i++) + { + for (uint32_t j = 0; j < 2; j++) + { + upper_gray_dist += GET_XY(i, j, upper_avg); + lower_gray_dist += GET_XY(i, 2 + j, lower_avg); + left_gray_dist += GET_XY(j, i, left_avg); + right_gray_dist += GET_XY(2 + j, i, right_avg); + } + } + +#undef GET_XY + + int upper_lower_sum = upper_gray_dist + lower_gray_dist; + int left_right_sum = left_gray_dist + right_gray_dist; + + return upper_lower_sum < left_right_sum; + } + + static void compute_etc1_hints(etc_block& best_etc1_blk, uint32_t& best_etc1_bias, const uastc_encode_results& best_results, const color_rgba block[4][4], const color_rgba decoded_uastc_block[4][4], int level, uint32_t flags) + { + best_etc1_bias = 0; + + if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_etc1_block_solid_color(best_etc1_blk, &best_results.m_solid_color.m_comps[0]); + return; + } + + const bool faster_etc1 = (flags & cPackUASTCETC1FasterHints) != 0; + const bool fastest_etc1 = (flags & cPackUASTCETC1FastestHints) != 0; + + const bool has_bias = g_uastc_mode_has_etc1_bias[best_results.m_uastc_mode]; + + // 0 should be at the top, but we need 13 first because it represents bias (0,0,0). + const uint8_t s_sorted_bias_modes[32] = { 13, 0, 22, 29, 27, 12, 26, 9, 30, 31, 8, 10, 25, 2, 23, 5, 15, 7, 3, 11, 6, 17, 28, 18, 1, 19, 20, 21, 24, 4, 14, 16 }; + + uint32_t last_bias = 1; + bool use_faster_bias_mode_table = false; + const bool flip_estimate = (level <= cPackUASTCLevelFaster) || (faster_etc1) || (fastest_etc1); + if (has_bias) + { + switch (level) + { + case cPackUASTCLevelFastest: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 1 : 2); + use_faster_bias_mode_table = true; + break; + } + case cPackUASTCLevelFaster: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 3 : 5); + use_faster_bias_mode_table = true; + break; + } + case cPackUASTCLevelDefault: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 10 : 20); + use_faster_bias_mode_table = true; + break; + } + case cPackUASTCLevelSlower: + { + last_bias = fastest_etc1 ? 1 : (faster_etc1 ? 16 : 32); + use_faster_bias_mode_table = true; + break; + } + default: + { + last_bias = 32; + break; + } + } + } + + memset(&best_etc1_blk, 0, sizeof(best_etc1_blk)); + uint64_t best_err = UINT64_MAX; + + etc_block trial_block; + memset(&trial_block, 0, sizeof(trial_block)); + + ycbcr block_ycbcr[4][4], decoded_uastc_block_ycbcr[4][4]; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + rgb_to_y_cb_cr(block[y][x], block_ycbcr[y][x]); + rgb_to_y_cb_cr(decoded_uastc_block[y][x], decoded_uastc_block_ycbcr[y][x]); + } + } + + uint32_t first_flip = 0, last_flip = 2; + uint32_t first_individ = 0, last_individ = 2; + + if (flags & cPackUASTCETC1DisableFlipAndIndividual) + { + last_flip = 1; + last_individ = 1; + } + else if (flip_estimate) + { + if (pack_etc1_estimate_flipped(&decoded_uastc_block[0][0])) + first_flip = 1; + last_flip = first_flip + 1; + } + + for (uint32_t flip = first_flip; flip < last_flip; flip++) + { + trial_block.set_flip_bit(flip != 0); + + for (uint32_t individ = first_individ; individ < last_individ; individ++) + { + const uint32_t mul = individ ? 15 : 31; + + trial_block.set_diff_bit(individ == 0); + + color_rgba unbiased_block_colors[2]; + + int min_r[2] = { 255, 255 }, min_g[2] = { 255, 255 }, min_b[2] = { 255, 255 }, max_r[2] = { 0, 0 }, max_g[2] = { 0, 0 }, max_b[2] = { 0, 0 }; + + for (uint32_t subset = 0; subset < 2; subset++) + { + uint32_t avg_color[3]; + memset(avg_color, 0, sizeof(avg_color)); + + for (uint32_t j = 0; j < 8; j++) + { + const etc_coord2 &c = g_etc1_pixel_coords[flip][subset][j]; + const color_rgba& p = decoded_uastc_block[c.m_y][c.m_x]; + + avg_color[0] += p.r; + avg_color[1] += p.g; + avg_color[2] += p.b; + + min_r[subset] = basisu::minimum<uint32_t>(min_r[subset], p.r); + min_g[subset] = basisu::minimum<uint32_t>(min_g[subset], p.g); + min_b[subset] = basisu::minimum<uint32_t>(min_b[subset], p.b); + + max_r[subset] = basisu::maximum<uint32_t>(max_r[subset], p.r); + max_g[subset] = basisu::maximum<uint32_t>(max_g[subset], p.g); + max_b[subset] = basisu::maximum<uint32_t>(max_b[subset], p.b); + } // j + + unbiased_block_colors[subset][0] = (uint8_t)((avg_color[0] * mul + 1020) / (8 * 255)); + unbiased_block_colors[subset][1] = (uint8_t)((avg_color[1] * mul + 1020) / (8 * 255)); + unbiased_block_colors[subset][2] = (uint8_t)((avg_color[2] * mul + 1020) / (8 * 255)); + unbiased_block_colors[subset][3] = 0; + + } // subset + + for (uint32_t bias_iter = 0; bias_iter < last_bias; bias_iter++) + { + const uint32_t bias = use_faster_bias_mode_table ? s_sorted_bias_modes[bias_iter] : bias_iter; + + color_rgba block_colors[2]; + for (uint32_t subset = 0; subset < 2; subset++) + block_colors[subset] = has_bias ? apply_etc1_bias((color32&)unbiased_block_colors[subset], bias, mul, subset) : unbiased_block_colors[subset]; + + if (individ) + trial_block.set_block_color4(block_colors[0], block_colors[1]); + else + trial_block.set_block_color5_clamp(block_colors[0], block_colors[1]); + + uint32_t range[2]; + for (uint32_t subset = 0; subset < 2; subset++) + { + const color_rgba base_c(trial_block.get_block_color(subset, true)); + + const int pos_r = iabs(max_r[subset] - base_c.r); + const int neg_r = iabs(base_c.r - min_r[subset]); + + const int pos_g = iabs(max_g[subset] - base_c.g); + const int neg_g = iabs(base_c.g - min_g[subset]); + + const int pos_b = iabs(max_b[subset] - base_c.b); + const int neg_b = iabs(base_c.b - min_b[subset]); + + range[subset] = maximum(maximum(pos_r, neg_r, pos_g, neg_g), pos_b, neg_b); + } + + uint32_t best_inten_table[2] = { 0, 0 }; + + for (uint32_t subset = 0; subset < 2; subset++) + { + uint64_t best_subset_err = UINT64_MAX; + + const uint32_t inten_table_limit = (level == cPackUASTCLevelVerySlow) ? 8 : ((range[subset] > 51) ? 8 : (range[subset] >= 7 ? 4 : 2)); + + for (uint32_t inten_table = 0; inten_table < inten_table_limit; inten_table++) + { + trial_block.set_inten_table(subset, inten_table); + + color_rgba color_table[4]; + trial_block.get_block_colors(color_table, subset); + + ycbcr color_table_ycbcr[4]; + for (uint32_t i = 0; i < 4; i++) + rgb_to_y_cb_cr(color_table[i], color_table_ycbcr[i]); + + uint64_t total_error = 0; + if (flip) + { + for (uint32_t y = 0; y < 2; y++) + { + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][0]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][1]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][2]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][3]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + if (total_error >= best_subset_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + { + const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + 0]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + { + const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + 1]; + total_error += minimum(color_diff(color_table_ycbcr[0], c), color_diff(color_table_ycbcr[1], c), color_diff(color_table_ycbcr[2], c), color_diff(color_table_ycbcr[3], c)); + } + } + if (total_error >= best_subset_err) + break; + } + + if (total_error < best_subset_err) + { + best_subset_err = total_error; + best_inten_table[subset] = inten_table; + } + + } // inten_table + + } // subset + + trial_block.set_inten_table(0, best_inten_table[0]); + trial_block.set_inten_table(1, best_inten_table[1]); + + // Compute error against the ORIGINAL block. + uint64_t err = 0; + + for (uint32_t subset = 0; subset < 2; subset++) + { + color_rgba color_table[4]; + trial_block.get_block_colors(color_table, subset); + + ycbcr color_table_ycbcr[4]; + for (uint32_t i = 0; i < 4; i++) + rgb_to_y_cb_cr(color_table[i], color_table_ycbcr[i]); + + if (flip) + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const ycbcr& c = decoded_uastc_block_ycbcr[subset * 2 + y][x]; + const uint64_t best_index_err = minimum(color_diff(color_table_ycbcr[0], c) << 2, (color_diff(color_table_ycbcr[1], c) << 2) + 1, (color_diff(color_table_ycbcr[2], c) << 2) + 2, (color_diff(color_table_ycbcr[3], c) << 2) + 3); + + const uint32_t best_index = (uint32_t)best_index_err & 3; + err += color_diff(block_ycbcr[subset * 2 + y][x], color_table_ycbcr[best_index]); + } + if (err >= best_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const ycbcr& c = decoded_uastc_block_ycbcr[y][subset * 2 + x]; + const uint64_t best_index_err = minimum(color_diff(color_table_ycbcr[0], c) << 2, (color_diff(color_table_ycbcr[1], c) << 2) + 1, (color_diff(color_table_ycbcr[2], c) << 2) + 2, (color_diff(color_table_ycbcr[3], c) << 2) + 3); + + const uint32_t best_index = (uint32_t)best_index_err & 3; + err += color_diff(block_ycbcr[y][subset * 2 + x], color_table_ycbcr[best_index]); + } + if (err >= best_err) + break; + } + } + + } // subset + + if (err < best_err) + { + best_err = err; + + best_etc1_blk = trial_block; + best_etc1_bias = bias; + } + + } // bias_iter + + } // individ + + } // flip + } + + struct uastc_pack_eac_a8_results + { + uint32_t m_base; + uint32_t m_table; + uint32_t m_multiplier; + }; + + static uint64_t uastc_pack_eac_a8(uastc_pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) + { + assert(num_pixels <= 16); + + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < num_pixels; i++) + { + const uint32_t a = pPixels[i]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + results.m_base = min_alpha; + results.m_table = 13; + results.m_multiplier = 1; + return 0; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + uint64_t best_err = UINT64_MAX; + + for (uint32_t table = 0; table < 16; table++) + { + if ((table_mask & (1U << table)) == 0) + continue; + + const float range = (float)(g_etc2_eac_tables[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + const int center = (int)roundf(lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_etc2_eac_tables[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)); + + const int base_min = clamp255(center - base_search_rad); + const int base_max = clamp255(center + base_search_rad); + + const int mul = (int)roundf(alpha_range / range); + const int mul_low = clamp<int>(mul - mul_search_rad, 1, 15); + const int mul_high = clamp<int>(mul + mul_search_rad, 1, 15); + + for (int base = base_min; base <= base_max; base++) + { + for (int multiplier = mul_low; multiplier <= mul_high; multiplier++) + { + uint64_t total_err = 0; + + for (uint32_t i = 0; i < num_pixels; i++) + { + const int a = pPixels[i]; + + uint32_t best_s_err = UINT32_MAX; + //uint32_t best_s = 0; + for (uint32_t s = 0; s < 8; s++) + { + const int v = clamp255((int)multiplier * g_etc2_eac_tables[table][s] + (int)base); + + uint32_t err = iabs(a - v); + if (err < best_s_err) + { + best_s_err = err; + //best_s = s; + } + } + + total_err += best_s_err * best_s_err; + if (total_err >= best_err) + break; + } + + if (total_err < best_err) + { + best_err = total_err; + results.m_base = base; + results.m_multiplier = multiplier; + results.m_table = table; + if (!best_err) + return best_err; + } + + } // table + + } // multiplier + + } // base + + return best_err; + } + + const int32_t DEFAULT_BC7_ERROR_WEIGHT = 50; + const float UASTC_ERROR_THRESH = 1.3f; + + // TODO: This is a quick hack to favor certain modes when we know we'll be followed up with an RDO postprocess. + static inline float get_uastc_mode_weight(uint32_t mode) + { + const float FAVORED_MODE_WEIGHT = .8f; + + switch (mode) + { + case 0: + case 10: + return FAVORED_MODE_WEIGHT; + default: + break; + } + + return 1.0f; + } + + void encode_uastc(const uint8_t* pRGBAPixels, uastc_block& output_block, uint32_t flags) + { +// printf("encode_uastc: \n"); +// for (int i = 0; i < 16; i++) +// printf("[%u %u %u %u] ", pRGBAPixels[i * 4 + 0], pRGBAPixels[i * 4 + 1], pRGBAPixels[i * 4 + 2], pRGBAPixels[i * 4 + 3]); +// printf("\n"); + + const color_rgba(*block)[4] = reinterpret_cast<const color_rgba(*)[4]>(pRGBAPixels); + + bool solid_color = true, has_alpha = false, is_la = true; + + const color_rgba first_color(block[0][0]); + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + if (block[y][x].a < 255) + has_alpha = true; + + if (block[y][x] != first_color) + solid_color = false; + + if ((block[y][x].r != block[y][x].g) || (block[y][x].r != block[y][x].b)) + is_la = false; + } + } + + if (solid_color) + { + // Solid color blocks are so common that we handle them specially and as quickly as we can. + uastc_encode_results solid_results; + solid_results.m_uastc_mode = UASTC_MODE_INDEX_SOLID_COLOR; + solid_results.m_astc_err = 0; + solid_results.m_common_pattern = 0; + solid_results.m_solid_color = first_color; + memset(&solid_results.m_astc, 0, sizeof(solid_results.m_astc)); + + etc_block etc1_blk; + uint32_t etc1_bias = 0; + + pack_etc1_block_solid_color(etc1_blk, &first_color.m_comps[0]); + + eac_a8_block eac_a8_blk; + eac_a8_blk.m_table = 0; + eac_a8_blk.m_multiplier = 1; + + pack_uastc(output_block, solid_results, etc1_blk, etc1_bias, eac_a8_blk, false, false); + +// printf(" Solid\n"); + + return; + } + + int level = flags & 7; + const bool favor_uastc_error = (flags & cPackUASTCFavorUASTCError) != 0; + const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0); + //const bool etc1_perceptual = true; + + uastc_encode_results results[MAX_ENCODE_RESULTS]; + + level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow); + + // Set all options to slowest, then configure from there depending on the selected level. + uint32_t mode_mask = UINT32_MAX; + uint32_t uber_level = 6; + bool estimate_partition = false; + bool always_try_alpha_modes = true; + uint32_t eac_a8_mul_search_rad = 3; + uint32_t eac_a8_table_mask = UINT32_MAX; + uint32_t least_squares_passes = 2; + bool bc1_hints = true; + bool only_use_la_on_transparent_blocks = false; + + switch (level) + { + case cPackUASTCLevelFastest: + { + mode_mask = (1 << 0) | (1 << 8) | + (1 << 11) | (1 << 12) | + (1 << 15); + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + uber_level = 0; + least_squares_passes = 1; + bc1_hints = false; + estimate_partition = true; + only_use_la_on_transparent_blocks = true; + break; + } + case cPackUASTCLevelFaster: + { + mode_mask = (1 << 0) | (1 << 4) | (1 << 6) | (1 << 8) | + (1 << 9) | (1 << 11) | (1 << 12) | + (1 << 15) | (1 << 17); + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + uber_level = 0; + least_squares_passes = 1; + estimate_partition = true; + break; + } + case cPackUASTCLevelDefault: + { + mode_mask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 6) | (1 << 8) | + (1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 13) | + (1 << 15) | (1 << 16) | (1 << 17); + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 1; + eac_a8_table_mask = (1 << 0) | (1 << 2) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) | (1 << 11) | (1 << 13); + uber_level = 1; + least_squares_passes = 1; + estimate_partition = true; + break; + } + case cPackUASTCLevelSlower: + { + always_try_alpha_modes = false; + eac_a8_mul_search_rad = 2; + uber_level = 3; + estimate_partition = true; + break; + } + case cPackUASTCLevelVerySlow: + { + break; + } + } + +#if BASISU_SUPPORT_FORCE_MODE + static int force_mode = -1; + force_mode = (force_mode + 1) % TOTAL_UASTC_MODES; + mode_mask = UINT32_MAX; + always_try_alpha_modes = true; + only_use_la_on_transparent_blocks = false; +#endif + + // HACK HACK + //mode_mask &= ~(1 << 18); + //mode_mask = (1 << 18)| (1 << 10); + + uint32_t total_results = 0; + + if (only_use_la_on_transparent_blocks) + { + if ((is_la) && (!has_alpha)) + is_la = false; + } + + const bool try_alpha_modes = has_alpha || always_try_alpha_modes; + + bc7enc_compress_block_params comp_params; + memset(&comp_params, 0, sizeof(comp_params)); + comp_params.m_max_partitions_mode1 = 64; + comp_params.m_least_squares_passes = least_squares_passes; + comp_params.m_weights[0] = 1; + comp_params.m_weights[1] = 1; + comp_params.m_weights[2] = 1; + comp_params.m_weights[3] = 1; + comp_params.m_uber_level = uber_level; + + if (is_la) + { + if (mode_mask & (1U << 15)) + astc_mode15(block, results, total_results, comp_params); + + if (mode_mask & (1U << 16)) + astc_mode9_or_16(16, block, results, total_results, comp_params, estimate_partition ? 4 : 0); + + if (mode_mask & (1U << 17)) + astc_mode11_or_17(17, block, results, total_results, comp_params); + } + + if (!has_alpha) + { + if (mode_mask & (1U << 0)) + astc_mode0_or_18(0, block, results, total_results, comp_params); + + if (mode_mask & (1U << 1)) + astc_mode1(block, results, total_results, comp_params); + + if (mode_mask & (1U << 2)) + astc_mode2(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 3)) + astc_mode3(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 4)) + astc_mode4(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 5)) + astc_mode5(block, results, total_results, comp_params); + + if (mode_mask & (1U << 6)) + astc_mode6(block, results, total_results, comp_params); + + if (mode_mask & (1U << 7)) + astc_mode7(block, results, total_results, comp_params, estimate_partition); + + if (mode_mask & (1U << 18)) + astc_mode0_or_18(18, block, results, total_results, comp_params); + } + + if (try_alpha_modes) + { + if (mode_mask & (1U << 9)) + astc_mode9_or_16(9, block, results, total_results, comp_params, estimate_partition ? 4 : 0); + + if (mode_mask & (1U << 10)) + astc_mode10(block, results, total_results, comp_params); + + if (mode_mask & (1U << 11)) + astc_mode11_or_17(11, block, results, total_results, comp_params); + + if (mode_mask & (1U << 12)) + astc_mode12(block, results, total_results, comp_params); + + if (mode_mask & (1U << 13)) + astc_mode13(block, results, total_results, comp_params); + + if (mode_mask & (1U << 14)) + astc_mode14(block, results, total_results, comp_params); + } + + assert(total_results); + + // Fix up the errors so we consistently have LA, RGB, or RGBA error. + for (uint32_t i = 0; i < total_results; i++) + { + uastc_encode_results& r = results[i]; + if (!is_la) + { + if (g_uastc_mode_is_la[r.m_uastc_mode]) + { + color_rgba unpacked_block[16]; + unpack_uastc(r.m_uastc_mode, r.m_common_pattern, r.m_solid_color.get_color32(), r.m_astc, (basist::color32 *)unpacked_block, false); + + uint64_t total_err = 0; + for (uint32_t j = 0; j < 16; j++) + total_err += color_distance(unpacked_block[j], ((const color_rgba*)block)[j], true); + + r.m_astc_err = total_err; + } + } + else + { + if (!g_uastc_mode_is_la[r.m_uastc_mode]) + { + color_rgba unpacked_block[16]; + unpack_uastc(r.m_uastc_mode, r.m_common_pattern, r.m_solid_color.get_color32(), r.m_astc, (basist::color32 *)unpacked_block, false); + + uint64_t total_err = 0; + for (uint32_t j = 0; j < 16; j++) + total_err += color_distance_la(unpacked_block[j], ((const color_rgba*)block)[j]); + + r.m_astc_err = total_err; + } + } + } + + unpacked_uastc_block unpacked_ublock; + memset(&unpacked_ublock, 0, sizeof(unpacked_ublock)); + + uint64_t total_overall_err[MAX_ENCODE_RESULTS]; + float uastc_err_f[MAX_ENCODE_RESULTS]; + double best_uastc_err_f = 1e+20f; + + int best_index = -1; + + if (total_results == 1) + { + best_index = 0; + } + else + { + const uint32_t bc7_err_weight = favor_bc7_error ? 100 : ((favor_uastc_error ? 0 : DEFAULT_BC7_ERROR_WEIGHT)); + const uint32_t uastc_err_weight = favor_bc7_error ? 0 : 100; + + // Find best overall results, balancing UASTC and UASTC->BC7 error. + // We purposely allow UASTC error to increase a little, if doing so lowers the BC7 error. + for (uint32_t i = 0; i < total_results; i++) + { +#if BASISU_SUPPORT_FORCE_MODE + if (results[i].m_uastc_mode == force_mode) + { + best_index = i; + break; + } +#endif + + unpacked_ublock.m_mode = results[i].m_uastc_mode; + unpacked_ublock.m_astc = results[i].m_astc; + unpacked_ublock.m_common_pattern = results[i].m_common_pattern; + unpacked_ublock.m_solid_color = results[i].m_solid_color.get_color32(); + + color_rgba decoded_uastc_block[4][4]; + bool success = unpack_uastc(results[i].m_uastc_mode, results[i].m_common_pattern, results[i].m_solid_color.get_color32(), results[i].m_astc, (basist::color32 *)&decoded_uastc_block[0][0], false); + (void)success; + VALIDATE(success); + + uint64_t total_uastc_rgb_err, total_uastc_rgba_err, total_uastc_la_err; + compute_block_error(block, decoded_uastc_block, total_uastc_rgb_err, total_uastc_rgba_err, total_uastc_la_err); + + // Validate the computed error, or we're go mad if it's inaccurate. + if (results[i].m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + VALIDATE(total_uastc_rgba_err == 0); + } + else if (is_la) + { + VALIDATE(total_uastc_la_err == results[i].m_astc_err); + } + else if (g_uastc_mode_has_alpha[results[i].m_uastc_mode]) + { + VALIDATE(total_uastc_rgba_err == results[i].m_astc_err); + } + else + { + VALIDATE(total_uastc_rgb_err == results[i].m_astc_err); + } + + // Transcode to BC7 + bc7_optimization_results bc7_results; + transcode_uastc_to_bc7(unpacked_ublock, bc7_results); + + bc7_block bc7_data; + encode_bc7_block(&bc7_data, &bc7_results); + + color_rgba decoded_bc7_block[4][4]; + unpack_block(texture_format::cBC7, &bc7_data, &decoded_bc7_block[0][0]); + + // Compute BC7 error + uint64_t total_bc7_la_err, total_bc7_rgb_err, total_bc7_rgba_err; + compute_block_error(block, decoded_bc7_block, total_bc7_rgb_err, total_bc7_rgba_err, total_bc7_la_err); + + if (results[i].m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + VALIDATE(total_bc7_rgba_err == 0); + + best_index = i; + break; + } + + uint64_t total_uastc_err = 0, total_bc7_err = 0; + if (is_la) + { + total_bc7_err = total_bc7_la_err; + total_uastc_err = total_uastc_la_err; + } + else if (has_alpha) + { + total_bc7_err = total_bc7_rgba_err; + total_uastc_err = total_uastc_rgba_err; + } + else + { + total_bc7_err = total_bc7_rgb_err; + total_uastc_err = total_uastc_rgb_err; + } + + total_overall_err[i] = ((total_bc7_err * bc7_err_weight) / 100) + ((total_uastc_err * uastc_err_weight) / 100); + if (!total_overall_err[i]) + { + best_index = i; + break; + } + + uastc_err_f[i] = sqrtf((float)total_uastc_err); + + if (uastc_err_f[i] < best_uastc_err_f) + { + best_uastc_err_f = uastc_err_f[i]; + } + + } // total_results + + if (best_index < 0) + { + uint64_t best_err = UINT64_MAX; + + if ((best_uastc_err_f == 0.0f) || (favor_bc7_error)) + { + for (uint32_t i = 0; i < total_results; i++) + { + // TODO: This is a quick hack to favor modes 0 or 10 for better RDO compression. + const float err_weight = (flags & cPackUASTCFavorSimplerModes) ? get_uastc_mode_weight(results[i].m_uastc_mode) : 1.0f; + + const uint64_t w = (uint64_t)(total_overall_err[i] * err_weight); + if (w < best_err) + { + best_err = w; + best_index = i; + if (!best_err) + break; + } + } // i + } + else + { + // Scan the UASTC results, and consider all results within a window that has the best UASTC+BC7 error. + for (uint32_t i = 0; i < total_results; i++) + { + double err_delta = uastc_err_f[i] / best_uastc_err_f; + + if (err_delta <= UASTC_ERROR_THRESH) + { + // TODO: This is a quick hack to favor modes 0 or 10 for better RDO compression. + const float err_weight = (flags & cPackUASTCFavorSimplerModes) ? get_uastc_mode_weight(results[i].m_uastc_mode) : 1.0f; + + const uint64_t w = (uint64_t)(total_overall_err[i] * err_weight); + if (w < best_err) + { + best_err = w; + best_index = i; + if (!best_err) + break; + } + } + } // i + } + } + } + + const uastc_encode_results& best_results = results[best_index]; + const uint32_t best_mode = best_results.m_uastc_mode; + const astc_block_desc& best_astc_results = best_results.m_astc; + + color_rgba decoded_uastc_block[4][4]; + bool success = unpack_uastc(best_mode, best_results.m_common_pattern, best_results.m_solid_color.get_color32(), best_astc_results, (basist::color32 *)&decoded_uastc_block[0][0], false); + (void)success; + VALIDATE(success); + +#if BASISU_VALIDATE_UASTC_ENC + // Make sure that the UASTC block unpacks to the same exact pixels as the ASTC block does, using two different decoders. + { + // Round trip to packed UASTC and back, then decode to pixels. + etc_block etc1_blk; + memset(&etc1_blk, 0, sizeof(etc1_blk)); + eac_a8_block etc_eac_a8_blk; + memset(&etc_eac_a8_blk, 0, sizeof(etc_eac_a8_blk)); + etc_eac_a8_blk.m_multiplier = 1; + + basist::uastc_block temp_block; + pack_uastc(temp_block, best_results, etc1_blk, 0, etc_eac_a8_blk, false, false); + + basist::color32 temp_block_unpacked[4][4]; + success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false); + VALIDATE(success); + + // Now round trip to packed ASTC and back, then decode to pixels. + uint32_t astc_data[4]; + + if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) + pack_astc_solid_block(astc_data, (color32 &)best_results.m_solid_color); + else + { + success = pack_astc_block(astc_data, &best_astc_results, best_results.m_uastc_mode); + VALIDATE(success); + } + + color_rgba decoded_astc_block[4][4]; + success = basisu_astc::astc::decompress((uint8_t*)decoded_astc_block, (uint8_t*)&astc_data, false, 4, 4); + VALIDATE(success); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + VALIDATE(decoded_astc_block[y][x] == decoded_uastc_block[y][x]); + + VALIDATE(temp_block_unpacked[y][x].c[0] == decoded_uastc_block[y][x].r); + VALIDATE(temp_block_unpacked[y][x].c[1] == decoded_uastc_block[y][x].g); + VALIDATE(temp_block_unpacked[y][x].c[2] == decoded_uastc_block[y][x].b); + VALIDATE(temp_block_unpacked[y][x].c[3] == decoded_uastc_block[y][x].a); + } + } + } +#endif + + // Compute BC1 hints + bool bc1_hint0 = false, bc1_hint1 = false; + if (bc1_hints) + compute_bc1_hints(bc1_hint0, bc1_hint1, best_results, block, decoded_uastc_block); + + eac_a8_block eac_a8_blk; + if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR)) + { + // Compute ETC2 hints + uint8_t decoded_uastc_block_alpha[16]; + for (uint32_t i = 0; i < 16; i++) + decoded_uastc_block_alpha[i] = decoded_uastc_block[i >> 2][i & 3].a; + + uastc_pack_eac_a8_results eac8_a8_results; + memset(&eac8_a8_results, 0, sizeof(eac8_a8_results)); + uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask); + + // All we care about for hinting is the table and multiplier. + eac_a8_blk.m_table = eac8_a8_results.m_table; + eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier; + } + else + { + memset(&eac_a8_blk, 0, sizeof(eac_a8_blk)); + } + + // Compute ETC1 hints + etc_block etc1_blk; + uint32_t etc1_bias = 0; + compute_etc1_hints(etc1_blk, etc1_bias, best_results, block, decoded_uastc_block, level, flags); + + // Finally, pack the UASTC block with its hints and we're done. + pack_uastc(output_block, best_results, etc1_blk, etc1_bias, eac_a8_blk, bc1_hint0, bc1_hint1); + +// printf(" Packed: "); +// for (int i = 0; i < 16; i++) +// printf("%X ", output_block.m_bytes[i]); +// printf("\n"); + } + + static bool uastc_recompute_hints(basist::uastc_block* pBlock, const color_rgba* pBlock_pixels, uint32_t flags, const unpacked_uastc_block *pUnpacked_blk) + { + unpacked_uastc_block unpacked_blk; + + if (pUnpacked_blk) + unpacked_blk = *pUnpacked_blk; + else + { + if (!unpack_uastc(*pBlock, unpacked_blk, false, true)) + return false; + } + color_rgba decoded_uastc_block[4][4]; + if (!unpack_uastc(unpacked_blk, (basist::color32 *)decoded_uastc_block, false)) + return false; + uastc_encode_results results; + results.m_uastc_mode = unpacked_blk.m_mode; + results.m_common_pattern = unpacked_blk.m_common_pattern; + results.m_astc = unpacked_blk.m_astc; + results.m_solid_color = unpacked_blk.m_solid_color; + results.m_astc_err = 0; + bool bc1_hints = true; + uint32_t eac_a8_mul_search_rad = 3; + uint32_t eac_a8_table_mask = UINT32_MAX; + const uint32_t level = flags & cPackUASTCLevelMask; + switch (level) + { + case cPackUASTCLevelFastest: + { + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + bc1_hints = false; + break; + } + case cPackUASTCLevelFaster: + { + eac_a8_mul_search_rad = 0; + eac_a8_table_mask = (1 << 2) | (1 << 8) | (1 << 11) | (1 << 13); + break; + } + case cPackUASTCLevelDefault: + { + eac_a8_mul_search_rad = 1; + eac_a8_table_mask = (1 << 0) | (1 << 2) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) | (1 << 11) | (1 << 13); + break; + } + case cPackUASTCLevelSlower: + { + eac_a8_mul_search_rad = 2; + break; + } + case cPackUASTCLevelVerySlow: + { + break; + } + } + bool bc1_hint0 = false, bc1_hint1 = false; + if (bc1_hints) + compute_bc1_hints(bc1_hint0, bc1_hint1, results, (color_rgba (*)[4])pBlock_pixels, decoded_uastc_block); + const uint32_t best_mode = unpacked_blk.m_mode; + eac_a8_block eac_a8_blk; + if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR)) + { + uint8_t decoded_uastc_block_alpha[16]; + for (uint32_t i = 0; i < 16; i++) + decoded_uastc_block_alpha[i] = decoded_uastc_block[i >> 2][i & 3].a; + uastc_pack_eac_a8_results eac8_a8_results; + memset(&eac8_a8_results, 0, sizeof(eac8_a8_results)); + uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask); + eac_a8_blk.m_table = eac8_a8_results.m_table; + eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier; + } + else + { + memset(&eac_a8_blk, 0, sizeof(eac_a8_blk)); + } + etc_block etc1_blk; + uint32_t etc1_bias = 0; + compute_etc1_hints(etc1_blk, etc1_bias, results, (color_rgba (*)[4])pBlock_pixels, decoded_uastc_block, level, flags); + pack_uastc(*pBlock, results, etc1_blk, etc1_bias, eac_a8_blk, bc1_hint0, bc1_hint1); + return true; + } + + static const uint8_t g_uastc_mode_selector_bits[TOTAL_UASTC_MODES][2] = + { + { 65, 63 }, { 69, 31 }, { 73, 46 }, { 89, 29 }, + { 89, 30 }, { 68, 47 }, { 66, 62 }, { 89, 30 }, + { 0, 0 }, { 97, 30 }, { 65, 63 }, { 66, 62 }, + { 81, 47 }, { 94, 30 }, { 92, 31 }, { 62, 63 }, + { 98, 30 }, { 61, 62 }, { 49, 79 } + }; + + static inline uint32_t set_block_bits(uint8_t* pBytes, uint64_t val, uint32_t num_bits, uint32_t cur_ofs) + { + assert(num_bits <= 64); + assert((num_bits == 64) || (val < (1ULL << num_bits))); + uint64_t mask = (num_bits == 64) ? UINT64_MAX : ((1ULL << num_bits) - 1); + while (num_bits) + { + const uint32_t n = basisu::minimum<uint32_t>(8U - (cur_ofs & 7U), num_bits); + pBytes[cur_ofs >> 3] &= ~static_cast<uint8_t>(mask << (cur_ofs & 7U)); + pBytes[cur_ofs >> 3] |= static_cast<uint8_t>(val << (cur_ofs & 7U)); + val >>= n; + mask >>= n; + num_bits -= n; + cur_ofs += n; + } + return cur_ofs; + } + + static const uint8_t g_tdefl_small_dist_extra[512] = + { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + }; + + static const uint8_t g_tdefl_large_dist_extra[128] = + { + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 + }; + + static inline uint32_t compute_match_cost_estimate(uint32_t dist) + { + uint32_t len_cost = 7; + uint32_t dist_cost = 5; + if (dist < 512) + dist_cost += g_tdefl_small_dist_extra[dist & 511]; + else + { + dist_cost += g_tdefl_large_dist_extra[basisu::minimum<uint32_t>(dist, 32767) >> 8]; + while (dist >= 32768) + { + dist_cost++; + dist >>= 1; + } + } + return len_cost + dist_cost; + } + + struct selector_bitsequence + { + uint64_t m_sel; + uint32_t m_ofs; + selector_bitsequence() { } + selector_bitsequence(uint32_t bit_ofs, uint64_t sel) : m_sel(sel), m_ofs(bit_ofs) { } + bool operator== (const selector_bitsequence& other) const + { + return (m_ofs == other.m_ofs) && (m_sel == other.m_sel); + } + + bool operator< (const selector_bitsequence& other) const + { + if (m_ofs < other.m_ofs) + return true; + else if (m_ofs == other.m_ofs) + return m_sel < other.m_sel; + + return false; + } + }; + + struct selector_bitsequence_hash + { + std::size_t operator()(selector_bitsequence const& s) const noexcept + { + return static_cast<std::size_t>(hash_hsieh((uint8_t *)&s, sizeof(s)) ^ s.m_sel); + } + }; + + class tracked_stat + { + public: + tracked_stat() { clear(); } + + void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + uint32_t get_number_of_values() { return m_num; } + uint64_t get_total() const { return m_total; } + uint64_t get_total2() const { return m_total2; } + + float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; + float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + float get_variance() const { float s = get_std_dev(); return s * s; } + + private: + uint32_t m_num; + uint64_t m_total; + uint64_t m_total2; + }; + + static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, + uint32_t &total_skipped, uint32_t &total_refined, uint32_t &total_modified, uint32_t &total_smooth) + { + debug_printf("uastc_rdo_blocks: Processing blocks %u to %u\n", first_index, last_index); + + const int total_blocks_to_check = basisu::maximum<uint32_t>(1U, params.m_lz_dict_size / sizeof(basist::uastc_block)); + const bool perceptual = false; + + std::unordered_map<selector_bitsequence, uint32_t, selector_bitsequence_hash> selector_history; + + for (uint32_t block_index = first_index; block_index < last_index; block_index++) + { + const basist::uastc_block& blk = pBlocks[block_index]; + const color_rgba* pPixels = &pBlock_pixels[16 * block_index]; + + unpacked_uastc_block unpacked_blk; + if (!unpack_uastc(blk, unpacked_blk, false, true)) + return false; + + const uint32_t block_mode = unpacked_blk.m_mode; + if (block_mode == UASTC_MODE_INDEX_SOLID_COLOR) + continue; + + tracked_stat r_stats, g_stats, b_stats, a_stats; + + for (uint32_t i = 0; i < 16; i++) + { + r_stats.update(pPixels[i].r); + g_stats.update(pPixels[i].g); + b_stats.update(pPixels[i].b); + a_stats.update(pPixels[i].a); + } + + const float max_std_dev = basisu::maximum<float>(basisu::maximum<float>(basisu::maximum(r_stats.get_std_dev(), g_stats.get_std_dev()), b_stats.get_std_dev()), a_stats.get_std_dev()); + + float yl = clamp<float>(max_std_dev / params.m_max_smooth_block_std_dev, 0.0f, 1.0f); + yl = yl * yl; + const float smooth_block_error_scale = lerp<float>(params.m_smooth_block_max_error_scale, 1.0f, yl); + if (smooth_block_error_scale > 1.0f) + total_smooth++; + + color_rgba decoded_uastc_block[4][4]; + if (!unpack_uastc(unpacked_blk, (basist::color32*)decoded_uastc_block, false)) + return false; + + uint64_t uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_uastc_block)[i], true); + + // Transcode to BC7 + bc7_optimization_results b7_results; + if (!transcode_uastc_to_bc7(unpacked_blk, b7_results)) + return false; + + basist::bc7_block b7_block; + basist::encode_bc7_block(&b7_block, &b7_results); + + color_rgba decoded_b7_blk[4][4]; + unpack_block(texture_format::cBC7, &b7_block, &decoded_b7_blk[0][0]); + + uint64_t bc7_err = 0; + for (uint32_t i = 0; i < 16; i++) + bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_b7_blk)[i], true); + + uint64_t cur_err = (uastc_err + bc7_err) / 2; + + // Divide by 16*4 to compute RMS error + const float cur_ms_err = (float)cur_err * (1.0f / 64.0f); + const float cur_rms_err = sqrt(cur_ms_err); + + const uint32_t first_sel_bit = g_uastc_mode_selector_bits[block_mode][0]; + const uint32_t total_sel_bits = g_uastc_mode_selector_bits[block_mode][1]; + assert(first_sel_bit + total_sel_bits <= 128); + assert(total_sel_bits > 0); + + uint32_t cur_bit_offset = first_sel_bit; + uint64_t cur_sel_bits = read_bits((const uint8_t*)&blk, cur_bit_offset, basisu::minimum(64U, total_sel_bits)); + + if (cur_rms_err >= params.m_skip_block_rms_thresh) + { + auto cur_search_res = selector_history.insert(std::make_pair(selector_bitsequence(first_sel_bit, cur_sel_bits), block_index)); + + // Block already has too much error, so don't mess with it. + if (!cur_search_res.second) + (*cur_search_res.first).second = block_index; + + total_skipped++; + continue; + } + + int cur_bits; + auto cur_find_res = selector_history.find(selector_bitsequence(first_sel_bit, cur_sel_bits)); + if (cur_find_res == selector_history.end()) + { + // Wasn't found - wildly estimate literal cost + //cur_bits = (total_sel_bits * 5) / 4; + cur_bits = (total_sel_bits * params.m_lz_literal_cost) / 100; + } + else + { + // Was found - wildly estimate match cost + uint32_t match_block_index = cur_find_res->second; + const int block_dist_in_bytes = (block_index - match_block_index) * 16; + cur_bits = compute_match_cost_estimate(block_dist_in_bytes); + } + + int first_block_to_check = basisu::maximum<int>(first_index, block_index - total_blocks_to_check); + int last_block_to_check = block_index - 1; + + basist::uastc_block best_block(blk); + uint32_t best_block_index = block_index; + + float best_t = cur_ms_err * smooth_block_error_scale + cur_bits * params.m_lambda; + + // Now scan through previous blocks, insert their selector bit patterns into the current block, and find + // selector bit patterns which don't increase the overall block error too much. + for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index) + { + const basist::uastc_block& prev_blk = pBlocks[prev_block_index]; + + uint32_t bit_offset = first_sel_bit; + uint64_t sel_bits = read_bits((const uint8_t*)&prev_blk, bit_offset, basisu::minimum(64U, total_sel_bits)); + + int match_block_index = prev_block_index; + auto res = selector_history.find(selector_bitsequence(first_sel_bit, sel_bits)); + if (res != selector_history.end()) + match_block_index = res->second; + // Have we already checked this bit pattern? If so then skip this block. + if (match_block_index > prev_block_index) + continue; + + unpacked_uastc_block unpacked_prev_blk; + if (!unpack_uastc(prev_blk, unpacked_prev_blk, false, true)) + return false; + + basist::uastc_block trial_blk(blk); + + set_block_bits((uint8_t*)&trial_blk, sel_bits, basisu::minimum(64U, total_sel_bits), first_sel_bit); + + if (total_sel_bits > 64) + { + sel_bits = read_bits((const uint8_t*)&prev_blk, bit_offset, total_sel_bits - 64U); + + set_block_bits((uint8_t*)&trial_blk, sel_bits, total_sel_bits - 64U, first_sel_bit + basisu::minimum(64U, total_sel_bits)); + } + + unpacked_uastc_block unpacked_trial_blk; + if (!unpack_uastc(trial_blk, unpacked_trial_blk, false, true)) + continue; + + color_rgba decoded_trial_uastc_block[4][4]; + if (!unpack_uastc(unpacked_trial_blk, (basist::color32*)decoded_trial_uastc_block, false)) + continue; + + uint64_t trial_uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + trial_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_uastc_block)[i], true); + + // Transcode trial to BC7, compute error + bc7_optimization_results trial_b7_results; + if (!transcode_uastc_to_bc7(unpacked_trial_blk, trial_b7_results)) + return false; + + basist::bc7_block trial_b7_block; + basist::encode_bc7_block(&trial_b7_block, &trial_b7_results); + + color_rgba decoded_trial_b7_blk[4][4]; + unpack_block(texture_format::cBC7, &trial_b7_block, &decoded_trial_b7_blk[0][0]); + + uint64_t trial_bc7_err = 0; + for (uint32_t i = 0; i < 16; i++) + trial_bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_b7_blk)[i], true); + + uint64_t trial_err = (trial_uastc_err + trial_bc7_err) / 2; + + const float trial_ms_err = (float)trial_err * (1.0f / 64.0f); + const float trial_rms_err = sqrtf(trial_ms_err); + + if (trial_rms_err > cur_rms_err * params.m_max_allowed_rms_increase_ratio) + continue; + + const int block_dist_in_bytes = (block_index - match_block_index) * 16; + const int match_bits = compute_match_cost_estimate(block_dist_in_bytes); + + float t = trial_ms_err * smooth_block_error_scale + match_bits * params.m_lambda; + if (t < best_t) + { + best_t = t; + best_block_index = prev_block_index; + + best_block = trial_blk; + } + + } // prev_block_index + + if (best_block_index != block_index) + { + total_modified++; + + unpacked_uastc_block unpacked_best_blk; + if (!unpack_uastc(best_block, unpacked_best_blk, false, false)) + return false; + + if ((params.m_endpoint_refinement) && (block_mode == 0)) + { + // Attempt to refine mode 0 block's endpoints, using the new selectors. This doesn't help much, but it does help. + // TODO: We could do this with the other modes too. + color_rgba decoded_best_uastc_block[4][4]; + if (!unpack_uastc(unpacked_best_blk, (basist::color32*)decoded_best_uastc_block, false)) + return false; + + // Compute the block's current error (with the modified selectors). + uint64_t best_uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + best_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_best_uastc_block)[i], true); + + bc7enc_compress_block_params comp_params; + memset(&comp_params, 0, sizeof(comp_params)); + comp_params.m_max_partitions_mode1 = 64; + comp_params.m_least_squares_passes = 1; + comp_params.m_weights[0] = 1; + comp_params.m_weights[1] = 1; + comp_params.m_weights[2] = 1; + comp_params.m_weights[3] = 1; + comp_params.m_uber_level = 0; + + uastc_encode_results results; + uint32_t total_results = 0; + astc_mode0_or_18(0, (color_rgba(*)[4])pPixels, &results, total_results, comp_params, unpacked_best_blk.m_astc.m_weights); + assert(total_results == 1); + + // See if the overall error has actually gone done. + + color_rgba decoded_trial_uastc_block[4][4]; + bool success = unpack_uastc(results.m_uastc_mode, results.m_common_pattern, results.m_solid_color.get_color32(), results.m_astc, (basist::color32*) & decoded_trial_uastc_block[0][0], false); + assert(success); + + BASISU_NOTE_UNUSED(success); + + uint64_t trial_uastc_err = 0; + for (uint32_t i = 0; i < 16; i++) + trial_uastc_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_trial_uastc_block)[i], true); + + if (trial_uastc_err < best_uastc_err) + { + // The error went down, so accept the new endpoints. + + // Ensure the selectors haven't changed, otherwise we'll invalidate the LZ matches. + for (uint32_t i = 0; i < 16; i++) + assert(unpacked_best_blk.m_astc.m_weights[i] == results.m_astc.m_weights[i]); + + unpacked_best_blk.m_astc = results.m_astc; + + total_refined++; + } + } // if ((params.m_endpoint_refinement) && (block_mode == 0)) + + // The selectors have changed, so go recompute the block hints. + if (!uastc_recompute_hints(&best_block, pPixels, flags, &unpacked_best_blk)) + return false; + + // Write the modified block + pBlocks[block_index] = best_block; + + } // if (best_block_index != block_index) + + { + uint32_t bit_offset = first_sel_bit; + uint64_t sel_bits = read_bits((const uint8_t*)&best_block, bit_offset, basisu::minimum(64U, total_sel_bits)); + + auto res = selector_history.insert(std::make_pair(selector_bitsequence(first_sel_bit, sel_bits), block_index)); + if (!res.second) + (*res.first).second = block_index; + } + + } // block_index + + return true; + } + + // This function implements a basic form of rate distortion optimization (RDO) for UASTC. + // It only changes selectors and then updates the hints. It uses very approximate LZ bitprice estimation. + // There's A LOT that can be done better in here, but it's a start. + // One nice advantage of the method used here is that it works for any input, no matter which or how many modes it uses. + bool uastc_rdo(uint32_t num_blocks, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, job_pool* pJob_pool, uint32_t total_jobs) + { + assert(params.m_max_allowed_rms_increase_ratio > 1.0f); + assert(params.m_lz_dict_size > 0); + assert(params.m_lambda > 0.0f); + + uint32_t total_skipped = 0, total_modified = 0, total_refined = 0, total_smooth = 0; + + uint32_t blocks_per_job = total_jobs ? (num_blocks / total_jobs) : 0; + + std::mutex stat_mutex; + + bool status = false; + + if ((!pJob_pool) || (total_jobs <= 1) || (blocks_per_job <= 8)) + { + status = uastc_rdo_blocks(0, num_blocks, pBlocks, pBlock_pixels, params, flags, total_skipped, total_refined, total_modified, total_smooth); + } + else + { + bool all_succeeded = true; + + for (uint32_t block_index_iter = 0; block_index_iter < num_blocks; block_index_iter += blocks_per_job) + { + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum<uint32_t>(num_blocks, block_index_iter + blocks_per_job); + +#ifndef __EMSCRIPTEN__ + pJob_pool->add_job([first_index, last_index, pBlocks, pBlock_pixels, ¶ms, flags, &total_skipped, &total_modified, &total_refined, &total_smooth, &all_succeeded, &stat_mutex] { +#endif + + uint32_t job_skipped = 0, job_modified = 0, job_refined = 0, job_smooth = 0; + + bool status = uastc_rdo_blocks(first_index, last_index, pBlocks, pBlock_pixels, params, flags, job_skipped, job_refined, job_modified, job_smooth); + + { + std::lock_guard<std::mutex> lck(stat_mutex); + + all_succeeded = all_succeeded && status; + total_skipped += job_skipped; + total_modified += job_modified; + total_refined += job_refined; + total_smooth += job_smooth; + } + +#ifndef __EMSCRIPTEN__ + } + ); +#endif + + } // block_index_iter + +#ifndef __EMSCRIPTEN__ + pJob_pool->wait_for_all(); +#endif + + status = all_succeeded; + } + + debug_printf("uastc_rdo: Total modified: %3.2f%%, total skipped: %3.2f%%, total refined: %3.2f%%, total smooth: %3.2f%%\n", total_modified * 100.0f / num_blocks, total_skipped * 100.0f / num_blocks, total_refined * 100.0f / num_blocks, total_smooth * 100.0f / num_blocks); + + return status; + } +} // namespace basisu + + + + + diff --git a/thirdparty/basis_universal/encoder/basisu_uastc_enc.h b/thirdparty/basis_universal/encoder/basisu_uastc_enc.h new file mode 100644 index 0000000000..ba39a558b3 --- /dev/null +++ b/thirdparty/basis_universal/encoder/basisu_uastc_enc.h @@ -0,0 +1,140 @@ +// basisu_uastc_enc.h +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include "basisu_etc.h" + +#include "../transcoder/basisu_transcoder_uastc.h" + +namespace basisu +{ + const uint32_t TOTAL_PACK_UASTC_LEVELS = 5; + + enum + { + // Fastest is the lowest quality, although it's stil substantially higher quality vs. BC1/ETC1. It supports 5 modes. + // The output may be somewhat blocky because this setting doesn't support 2/3-subset UASTC modes, but it should be less blocky vs. BC1/ETC1. + // This setting doesn't write BC1 hints, so BC1 transcoding will be slower. + // Transcoded ETC1 quality will be lower because it only considers 2 hints out of 32. + // Avg. 43.45 dB + cPackUASTCLevelFastest = 0, + + // Faster is ~3x slower than fastest. It supports 9 modes. + // Avg. 46.49 dB + cPackUASTCLevelFaster = 1, + + // Default is ~5.5x slower than fastest. It supports 14 modes. + // Avg. 47.47 dB + cPackUASTCLevelDefault = 2, + + // Slower is ~14.5x slower than fastest. It supports all 18 modes. + // Avg. 48.01 dB + cPackUASTCLevelSlower = 3, + + // VerySlow is ~200x slower than fastest. + // The best quality the codec is capable of, but you'll need to be patient or have a lot of cores. + // Avg. 48.24 dB + cPackUASTCLevelVerySlow = 4, + + cPackUASTCLevelMask = 0xF, + + // By default the encoder tries to strike a balance between UASTC and transcoded BC7 quality. + // These flags allow you to favor only optimizing for lowest UASTC error, or lowest BC7 error. + cPackUASTCFavorUASTCError = 8, + cPackUASTCFavorBC7Error = 16, + + cPackUASTCETC1FasterHints = 64, + cPackUASTCETC1FastestHints = 128, + cPackUASTCETC1DisableFlipAndIndividual = 256, + + // Favor UASTC modes 0 and 10 more than the others (this is experimental, it's useful for RDO compression) + cPackUASTCFavorSimplerModes = 512, + }; + + // pRGBAPixels: Pointer to source 4x4 block of RGBA pixels (R first in memory). + // block: Reference to destination UASTC block. + // level: Controls compression speed vs. performance tradeoff. + void encode_uastc(const uint8_t* pRGBAPixels, basist::uastc_block& output_block, uint32_t flags = cPackUASTCLevelDefault); + + struct uastc_encode_results + { + uint32_t m_uastc_mode; + uint32_t m_common_pattern; + basist::astc_block_desc m_astc; + color_rgba m_solid_color; + uint64_t m_astc_err; + }; + + void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1); + + const uint32_t UASCT_RDO_DEFAULT_LZ_DICT_SIZE = 4096; + + const float UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO = 10.0f; + const float UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH = 8.0f; + + // The RDO encoder computes a smoothness factor, from [0,1], for each block. To do this it computes each block's maximum component variance, then it divides this by this factor and clamps the result. + // Larger values will result in more blocks being protected from too much distortion. + const float UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV = 18.0f; + + // The RDO encoder can artifically boost the error of smooth blocks, in order to suppress distortions on smooth areas of the texture. + // The encoder will use this value as the maximum error scale to use on smooth blocks. The larger this value, the better smooth bocks will look. Set to 1.0 to disable this completely. + const float UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE = 10.0f; + + struct uastc_rdo_params + { + uastc_rdo_params() + { + clear(); + } + + void clear() + { + m_lz_dict_size = UASCT_RDO_DEFAULT_LZ_DICT_SIZE; + m_lambda = 0.5f; + m_max_allowed_rms_increase_ratio = UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO; + m_skip_block_rms_thresh = UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH; + m_endpoint_refinement = true; + m_lz_literal_cost = 100; + + m_max_smooth_block_std_dev = UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV; + m_smooth_block_max_error_scale = UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE; + } + + // m_lz_dict_size: Size of LZ dictionary to simulate in bytes. The larger this value, the slower the encoder but the higher the quality per LZ compressed bit. + uint32_t m_lz_dict_size; + + // m_lambda: The post-processor tries to reduce distortion+rate*lambda (rate is approximate LZ bits and distortion is scaled MS error). + // Larger values push the postprocessor towards optimizing more for lower rate, and smaller values more for distortion. 0=minimal distortion. + float m_lambda; + + // m_max_allowed_rms_increase_ratio: How much the RMS error of a block is allowed to increase before a trial is rejected. 1.0=no increase allowed, 1.05=5% increase allowed, etc. + float m_max_allowed_rms_increase_ratio; + + // m_skip_block_rms_thresh: Blocks with this much RMS error or more are completely skipped by the RDO encoder. + float m_skip_block_rms_thresh; + + // m_endpoint_refinement: If true, the post-process will attempt to refine the endpoints of blocks with modified selectors. + bool m_endpoint_refinement; + + float m_max_smooth_block_std_dev; + float m_smooth_block_max_error_scale; + + uint32_t m_lz_literal_cost; + }; + + // num_blocks, pBlocks: Number of blocks and pointer to UASTC blocks to process. + // pBlock_pixels: Pointer to an array of 4x4 blocks containing the original texture pixels. This is NOT a raster image, but a pointer to individual 4x4 blocks. + // flags: Pass in the same flags used to encode the UASTC blocks. The flags are used to reencode the transcode hints in the same way. + bool uastc_rdo(uint32_t num_blocks, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params ¶ms, uint32_t flags = cPackUASTCLevelDefault, job_pool* pJob_pool = nullptr, uint32_t total_jobs = 0); +} // namespace basisu diff --git a/thirdparty/basis_universal/encoder/cppspmd_flow.h b/thirdparty/basis_universal/encoder/cppspmd_flow.h new file mode 100644 index 0000000000..f6930476aa --- /dev/null +++ b/thirdparty/basis_universal/encoder/cppspmd_flow.h @@ -0,0 +1,590 @@ +// Do not include this header directly. +// Control flow functionality in common between all the headers. +// +// Copyright 2020-2021 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef _DEBUG +CPPSPMD_FORCE_INLINE void spmd_kernel::check_masks() +{ + assert(!any(andnot(m_kernel_exec, m_exec))); +} +#endif + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_break() +{ +#ifdef _DEBUG + assert(m_in_loop); +#endif + + m_exec = exec_mask::all_off(); +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_continue() +{ +#ifdef _DEBUG + assert(m_in_loop); +#endif + + // Kill any active lanes, and remember which lanes were active so we can re-enable them at the end of the loop body. + m_continue_mask = m_continue_mask | m_exec; + m_exec = exec_mask::all_off(); +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_return() +{ + // Permenantly kill all active lanes + m_kernel_exec = andnot(m_exec, m_kernel_exec); + m_exec = exec_mask::all_off(); +} + +template<typename UnmaskedBody> +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_unmasked(const UnmaskedBody& unmaskedBody) +{ + exec_mask orig_exec = m_exec, orig_kernel_exec = m_kernel_exec; + + m_kernel_exec = exec_mask::all_on(); + m_exec = exec_mask::all_on(); + + unmaskedBody(); + + m_kernel_exec = m_kernel_exec & orig_kernel_exec; + m_exec = m_exec & orig_exec; + + check_masks(); +} + +struct scoped_unmasked_restorer +{ + spmd_kernel *m_pKernel; + exec_mask m_orig_exec, m_orig_kernel_exec; + + CPPSPMD_FORCE_INLINE scoped_unmasked_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), + m_orig_exec(pKernel->m_exec), + m_orig_kernel_exec(pKernel->m_kernel_exec) + { + pKernel->m_kernel_exec = exec_mask::all_on(); + pKernel->m_exec = exec_mask::all_on(); + } + + CPPSPMD_FORCE_INLINE ~scoped_unmasked_restorer() + { + m_pKernel->m_kernel_exec = m_pKernel->m_kernel_exec & m_orig_kernel_exec; + m_pKernel->m_exec = m_pKernel->m_exec & m_orig_exec; + m_pKernel->check_masks(); + } +}; + +#define SPMD_UNMASKED_BEGIN { scoped_unmasked_restorer _unmasked_restorer(this); +#define SPMD_UNMASKED_END } + +#if 0 +template<typename SPMDKernel, typename... Args> +CPPSPMD_FORCE_INLINE decltype(auto) spmd_kernel::spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(m_exec); + return kernel._call(std::forward<Args>(args)...); +} +#else +template<typename SPMDKernel, typename... Args> +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(m_exec); + kernel._call(std::forward<Args>(args)...); +} +#endif + +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if_break(const vbool& cond) +{ +#ifdef _DEBUG + assert(m_in_loop); +#endif + + exec_mask cond_exec(cond); + + m_exec = andnot(m_exec & cond_exec, m_exec); + + check_masks(); +} + +// No SPMD breaks, continues, etc. allowed +template<typename IfBody> +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sif(const vbool& cond, const IfBody& ifBody) +{ + exec_mask im = m_exec & exec_mask(cond); + + if (any(im)) + { + const exec_mask orig_exec = m_exec; + m_exec = im; + ifBody(); + m_exec = orig_exec; + } +} + +// No SPMD breaks, continues, etc. allowed +template<typename IfBody, typename ElseBody> +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sifelse(const vbool& cond, const IfBody& ifBody, const ElseBody &elseBody) +{ + const exec_mask orig_exec = m_exec; + + exec_mask im = m_exec & exec_mask(cond); + + if (any(im)) + { + m_exec = im; + ifBody(); + } + + exec_mask em = orig_exec & exec_mask(!cond); + + if (any(em)) + { + m_exec = em; + elseBody(); + } + + m_exec = orig_exec; +} + +template<typename IfBody> +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if(const vbool& cond, const IfBody& ifBody) +{ + exec_mask cond_exec(cond); + + exec_mask pre_if_exec = cond_exec & m_exec; + + if (any(pre_if_exec)) + { + exec_mask unexecuted_lanes = andnot(cond_exec, m_exec); + m_exec = pre_if_exec; + + ifBody(); + + // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body. + m_exec = m_exec | unexecuted_lanes; + + check_masks(); + } +} + +template<typename IfBody, typename ElseBody> +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_ifelse(const vbool& cond, const IfBody& ifBody, const ElseBody& elseBody) +{ + bool all_flag = false; + + exec_mask cond_exec(cond); + + { + exec_mask pre_if_exec = cond_exec & m_exec; + + int mask = pre_if_exec.get_movemask(); + if (mask != 0) + { + all_flag = ((uint32_t)mask == m_exec.get_movemask()); + + exec_mask unexecuted_lanes = andnot(cond_exec, m_exec); + m_exec = pre_if_exec; + + ifBody(); + + // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body. + m_exec = m_exec | unexecuted_lanes; + + check_masks(); + } + } + + if (!all_flag) + { + exec_mask pre_if_exec = andnot(cond_exec, m_exec); + + if (any(pre_if_exec)) + { + exec_mask unexecuted_lanes = cond_exec & m_exec; + m_exec = pre_if_exec; + + ifBody(); + + // Propagate any lanes that got disabled inside the if body into the exec mask outside the if body, but turn on any lanes that didn't execute inside the if body. + m_exec = m_exec | unexecuted_lanes; + + check_masks(); + } + } +} + +struct scoped_exec_restorer +{ + exec_mask *m_pMask; + exec_mask m_prev_mask; + CPPSPMD_FORCE_INLINE scoped_exec_restorer(exec_mask *pExec_mask) : m_pMask(pExec_mask), m_prev_mask(*pExec_mask) { } + CPPSPMD_FORCE_INLINE ~scoped_exec_restorer() { *m_pMask = m_prev_mask; } +}; + +// Cannot use SPMD break, continue, or return inside "simple" if/else +#define SPMD_SIF(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SELSE(cond) } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SENDIF } + +// Same as SPMD_SIF, except doesn't use a scoped object +#define SPMD_SIF2(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { exec_mask _orig_exec = m_exec; m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SELSE2(cond) m_exec = _orig_exec; } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { exec_mask _orig_exec = m_exec; m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SEND_IF2 m_exec = _orig_exec; } + +// Same as SPMD_SIF(), except the if/else blocks are always executed +#define SPMD_SAIF(cond) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(vbool(cond))); { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); \ + m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SAELSE(cond) } exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(m_exec & exec_mask(!vbool(cond))); { CPPSPMD::scoped_exec_restorer CPPSPMD_GLUER2(_exec_restore_, __LINE__)(&m_exec); \ + m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); + +#define SPMD_SAENDIF } + +// Cannot use SPMD break, continue, or return inside sselect +#define SPMD_SSELECT(var) do { vint_t _select_var = var; scoped_exec_restorer _orig_exec(&m_exec); exec_mask _select_executed(exec_mask::all_off()); +#define SPMD_SCASE(value) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(_orig_exec.m_prev_mask & exec_mask(vbool(_select_var == (value)))); if (any(CPPSPMD_GLUER2(_exec_temp, __LINE__))) \ + { m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); _select_executed = _select_executed | m_exec; + +//#define SPMD_SCASE_END if (_select_executed.get_movemask() == _orig_exec.m_prev_mask.get_movemask()) break; } +#define SPMD_SCASE_END if (!any(_select_executed ^ _orig_exec.m_prev_mask)) break; } +#define SPMD_SDEFAULT exec_mask _all_other_lanes(andnot(_select_executed, _orig_exec.m_prev_mask)); if (any(_all_other_lanes)) { m_exec = _all_other_lanes; +#define SPMD_SDEFAULT_END } +#define SPMD_SSELECT_END } while(0); + +// Same as SPMD_SSELECT, except all cases are executed. +// Cannot use SPMD break, continue, or return inside sselect +#define SPMD_SASELECT(var) do { vint_t _select_var = var; scoped_exec_restorer _orig_exec(&m_exec); exec_mask _select_executed(exec_mask::all_off()); + +#define SPMD_SACASE(value) exec_mask CPPSPMD_GLUER2(_exec_temp, __LINE__)(_orig_exec.m_prev_mask & exec_mask(vbool(_select_var == (value)))); { m_exec = CPPSPMD_GLUER2(_exec_temp, __LINE__); \ + _select_executed = _select_executed | m_exec; + +#define SPMD_SACASE_END } +#define SPMD_SADEFAULT exec_mask _all_other_lanes(andnot(_select_executed, _orig_exec.m_prev_mask)); { m_exec = _all_other_lanes; +#define SPMD_SADEFAULT_END } +#define SPMD_SASELECT_END } while(0); + +struct scoped_exec_restorer2 +{ + spmd_kernel *m_pKernel; + exec_mask m_unexecuted_lanes; + + CPPSPMD_FORCE_INLINE scoped_exec_restorer2(spmd_kernel *pKernel, const vbool &cond) : + m_pKernel(pKernel) + { + exec_mask cond_exec(cond); + m_unexecuted_lanes = andnot(cond_exec, pKernel->m_exec); + pKernel->m_exec = cond_exec & pKernel->m_exec; + } + + CPPSPMD_FORCE_INLINE ~scoped_exec_restorer2() + { + m_pKernel->m_exec = m_pKernel->m_exec | m_unexecuted_lanes; + m_pKernel->check_masks(); + } +}; + +#define SPMD_IF(cond) { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, vbool(cond)); if (any(m_exec)) { +#define SPMD_ELSE(cond) } } { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, !vbool(cond)); if (any(m_exec)) { +#define SPMD_END_IF } } + +// Same as SPMD_IF, except the conditional block is always executed. +#define SPMD_AIF(cond) { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, vbool(cond)); { +#define SPMD_AELSE(cond) } } { CPPSPMD::scoped_exec_restorer2 CPPSPMD_GLUER2(_exec_restore2_, __LINE__)(this, !vbool(cond)); { +#define SPMD_AEND_IF } } + +class scoped_exec_saver +{ + exec_mask m_exec, m_kernel_exec, m_continue_mask; + spmd_kernel *m_pKernel; +#ifdef _DEBUG + bool m_in_loop; +#endif + +public: + inline scoped_exec_saver(spmd_kernel *pKernel) : + m_exec(pKernel->m_exec), m_kernel_exec(pKernel->m_kernel_exec), m_continue_mask(pKernel->m_continue_mask), + m_pKernel(pKernel) + { +#ifdef _DEBUG + m_in_loop = pKernel->m_in_loop; +#endif + } + + inline ~scoped_exec_saver() + { + m_pKernel->m_exec = m_exec; + m_pKernel->m_continue_mask = m_continue_mask; + m_pKernel->m_kernel_exec = m_kernel_exec; +#ifdef _DEBUG + m_pKernel->m_in_loop = m_in_loop; + m_pKernel->check_masks(); +#endif + } +}; + +#define SPMD_BEGIN_CALL scoped_exec_saver CPPSPMD_GLUER2(_begin_call_scoped_exec_saver, __LINE__)(this); m_continue_mask = exec_mask::all_off(); +#define SPMD_BEGIN_CALL_ALL_LANES scoped_exec_saver CPPSPMD_GLUER2(_begin_call_scoped_exec_saver, __LINE__)(this); m_exec = exec_mask::all_on(); m_continue_mask = exec_mask::all_off(); + +template<typename ForeachBody> +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const ForeachBody& foreachBody) +{ + if (begin == end) + return; + + if (!any(m_exec)) + return; + + // We don't support iterating backwards. + if (begin > end) + std::swap(begin, end); + + exec_mask prev_continue_mask = m_continue_mask, prev_exec = m_exec; + + int total_full = (end - begin) / PROGRAM_COUNT; + int total_partial = (end - begin) % PROGRAM_COUNT; + + lint_t loop_index = begin + program_index; + + const int total_loops = total_full + (total_partial ? 1 : 0); + + m_continue_mask = exec_mask::all_off(); + + for (int i = 0; i < total_loops; i++) + { + int n = PROGRAM_COUNT; + if ((i == (total_loops - 1)) && (total_partial)) + { + exec_mask partial_mask = exec_mask(vint_t(total_partial) > vint_t(program_index)); + m_exec = m_exec & partial_mask; + n = total_partial; + } + + foreachBody(loop_index, n); + + m_exec = m_exec | m_continue_mask; + if (!any(m_exec)) + break; + + m_continue_mask = exec_mask::all_off(); + check_masks(); + + store_all(loop_index, loop_index + PROGRAM_COUNT); + } + + m_exec = prev_exec & m_kernel_exec; + m_continue_mask = prev_continue_mask; + check_masks(); +} + +template<typename WhileCondBody, typename WhileBody> +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_while(const WhileCondBody& whileCondBody, const WhileBody& whileBody) +{ + exec_mask orig_exec = m_exec; + + exec_mask orig_continue_mask = m_continue_mask; + m_continue_mask = exec_mask::all_off(); + +#ifdef _DEBUG + const bool prev_in_loop = m_in_loop; + m_in_loop = true; +#endif + + while(true) + { + exec_mask cond_exec = exec_mask(whileCondBody()); + m_exec = m_exec & cond_exec; + + if (!any(m_exec)) + break; + + whileBody(); + + m_exec = m_exec | m_continue_mask; + m_continue_mask = exec_mask::all_off(); + check_masks(); + } + +#ifdef _DEBUG + m_in_loop = prev_in_loop; +#endif + + m_exec = orig_exec & m_kernel_exec; + m_continue_mask = orig_continue_mask; + check_masks(); +} + +struct scoped_while_restorer +{ + spmd_kernel *m_pKernel; + exec_mask m_orig_exec, m_orig_continue_mask; +#ifdef _DEBUG + bool m_prev_in_loop; +#endif + + CPPSPMD_FORCE_INLINE scoped_while_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), + m_orig_exec(pKernel->m_exec), + m_orig_continue_mask(pKernel->m_continue_mask) + { + pKernel->m_continue_mask.all_off(); + +#ifdef _DEBUG + m_prev_in_loop = pKernel->m_in_loop; + pKernel->m_in_loop = true; +#endif + } + + CPPSPMD_FORCE_INLINE ~scoped_while_restorer() + { + m_pKernel->m_exec = m_orig_exec & m_pKernel->m_kernel_exec; + m_pKernel->m_continue_mask = m_orig_continue_mask; +#ifdef _DEBUG + m_pKernel->m_in_loop = m_prev_in_loop; + m_pKernel->check_masks(); +#endif + } +}; + +#undef SPMD_WHILE +#undef SPMD_WEND +#define SPMD_WHILE(cond) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); \ + m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; + +#define SPMD_WEND m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); } } + +// Nesting is not supported (although it will compile, but the results won't make much sense). +#define SPMD_FOREACH(loop_var, bi, ei) if (((bi) != (ei)) && (any(m_exec))) { \ + scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + uint32_t b = (uint32_t)(bi), e = (uint32_t)(ei); if ((b) > (e)) { std::swap(b, e); } const uint32_t total_full = ((e) - (b)) >> PROGRAM_COUNT_SHIFT, total_partial = ((e) - (b)) & (PROGRAM_COUNT - 1); \ + lint_t loop_var = program_index + (int)b; const uint32_t total_loops = total_full + (total_partial ? 1U : 0U); \ + for (uint32_t CPPSPMD_GLUER2(_foreach_counter, __LINE__) = 0; CPPSPMD_GLUER2(_foreach_counter, __LINE__) < total_loops; ++CPPSPMD_GLUER2(_foreach_counter, __LINE__)) { \ + if ((CPPSPMD_GLUER2(_foreach_counter, __LINE__) == (total_loops - 1)) && (total_partial)) { exec_mask partial_mask = exec_mask(vint_t((int)total_partial) > vint_t(program_index)); m_exec = m_exec & partial_mask; } + +#define SPMD_FOREACH_END(loop_var) m_exec = m_exec | m_continue_mask; if (!any(m_exec)) break; m_continue_mask = exec_mask::all_off(); check_masks(); store_all(loop_var, loop_var + PROGRAM_COUNT); } } + +// Okay to use spmd_continue or spmd_return, but not spmd_break +#define SPMD_FOREACH_ACTIVE(index_var) int64_t index_var; { uint64_t _movemask = m_exec.get_movemask(); if (_movemask) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + for (uint32_t _i = 0; _i < PROGRAM_COUNT; ++_i) { \ + if (_movemask & (1U << _i)) { \ + m_exec.enable_lane(_i); m_exec = m_exec & m_kernel_exec; \ + (index_var) = _i; \ + +#define SPMD_FOREACH_ACTIVE_END } } } } + +// Okay to use spmd_continue, but not spmd_break/spmd_continue +#define SPMD_FOREACH_UNIQUE_INT(index_var, var) { scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + CPPSPMD_DECL(int_t, _vals[PROGRAM_COUNT]); store_linear_all(_vals, var); std::sort(_vals, _vals + PROGRAM_COUNT); \ + const int _n = (int)(std::unique(_vals, _vals + PROGRAM_COUNT) - _vals); \ + for (int _i = 0; _i < _n; ++_i) { int index_var = _vals[_i]; vbool cond = (vint_t(var) == vint_t(index_var)); m_exec = exec_mask(cond); + +#define SPMD_FOREACH_UNIQUE_INT_END } } + +struct scoped_simple_while_restorer +{ + spmd_kernel* m_pKernel; + exec_mask m_orig_exec; +#ifdef _DEBUG + bool m_prev_in_loop; +#endif + + CPPSPMD_FORCE_INLINE scoped_simple_while_restorer(spmd_kernel* pKernel) : + m_pKernel(pKernel), + m_orig_exec(pKernel->m_exec) + { + +#ifdef _DEBUG + m_prev_in_loop = pKernel->m_in_loop; + pKernel->m_in_loop = true; +#endif + } + + CPPSPMD_FORCE_INLINE ~scoped_simple_while_restorer() + { + m_pKernel->m_exec = m_orig_exec; +#ifdef _DEBUG + m_pKernel->m_in_loop = m_prev_in_loop; + m_pKernel->check_masks(); +#endif + } +}; + +// Cannot use SPMD break, continue, or return inside simple while + +#define SPMD_SWHILE(cond) { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ + while(true) { \ + exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; +#define SPMD_SWEND } } + +// Cannot use SPMD break, continue, or return inside simple do +#define SPMD_SDO { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { +#define SPMD_SEND_DO(cond) exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; } } + +#undef SPMD_FOR +#undef SPMD_END_FOR +#define SPMD_FOR(for_init, for_cond) { for_init; scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(for_cond)); \ + m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; +#define SPMD_END_FOR(for_inc) m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); for_inc; } } + +template<typename ForInitBody, typename ForCondBody, typename ForIncrBody, typename ForBody> +CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_for(const ForInitBody& forInitBody, const ForCondBody& forCondBody, const ForIncrBody& forIncrBody, const ForBody& forBody) +{ + exec_mask orig_exec = m_exec; + + forInitBody(); + + exec_mask orig_continue_mask = m_continue_mask; + m_continue_mask = exec_mask::all_off(); + +#ifdef _DEBUG + const bool prev_in_loop = m_in_loop; + m_in_loop = true; +#endif + + while(true) + { + exec_mask cond_exec = exec_mask(forCondBody()); + m_exec = m_exec & cond_exec; + + if (!any(m_exec)) + break; + + forBody(); + + m_exec = m_exec | m_continue_mask; + m_continue_mask = exec_mask::all_off(); + check_masks(); + + forIncrBody(); + } + + m_exec = orig_exec & m_kernel_exec; + m_continue_mask = orig_continue_mask; + +#ifdef _DEBUG + m_in_loop = prev_in_loop; + check_masks(); +#endif +} diff --git a/thirdparty/basis_universal/encoder/cppspmd_math.h b/thirdparty/basis_universal/encoder/cppspmd_math.h new file mode 100644 index 0000000000..e7b3202b8e --- /dev/null +++ b/thirdparty/basis_universal/encoder/cppspmd_math.h @@ -0,0 +1,725 @@ +// Do not include this header directly. +// +// Copyright 2020-2021 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The general goal of these vectorized estimated math functions is scalability/performance. +// There are explictly no checks NaN's/Inf's on the input arguments. There are no assertions either. +// These are fast estimate functions - if you need more than that, use stdlib. Please do a proper +// engineering analysis before relying on them. +// I have chosen functions written by others, ported them to CppSPMD, then measured their abs/rel errors. +// I compared each to the ones in DirectXMath and stdlib's for accuracy/performance. + +CPPSPMD_FORCE_INLINE vfloat fmod_inv(const vfloat& a, const vfloat& b, const vfloat& b_inv) +{ + vfloat c = frac(abs(a * b_inv)) * abs(b); + return spmd_ternaryf(a < 0, -c, c); +} + +CPPSPMD_FORCE_INLINE vfloat fmod_inv_p(const vfloat& a, const vfloat& b, const vfloat& b_inv) +{ + return frac(a * b_inv) * b; +} + +// Avoids dividing by zero or very small values. +CPPSPMD_FORCE_INLINE vfloat safe_div(vfloat a, vfloat b, float fDivThresh = 1e-7f) +{ + return a / spmd_ternaryf( abs(b) > fDivThresh, b, spmd_ternaryf(b < 0.0f, -fDivThresh, fDivThresh) ); +} + +/* + clang 9.0.0 for win /fp:precise release + f range: 0.0000000000001250 10000000000.0000000000000000, vals: 1073741824 + + log2_est(): + max abs err: 0.0000023076808731 + max rel err: 0.0000000756678881 + avg abs err: 0.0000007535452724 + avg rel err: 0.0000000235117843 + + XMVectorLog2(): + max abs err: 0.0000023329709933 + max rel err: 0.0000000826961046 + avg abs err: 0.0000007564889684 + avg rel err: 0.0000000236051899 + + std::log2f(): + max abs err: 0.0000020265979401 + max rel err: 0.0000000626647654 + avg abs err: 0.0000007494445227 + avg rel err: 0.0000000233800985 +*/ + +// See https://tech.ebayinc.com/engineering/fast-approximate-logarithms-part-iii-the-formulas/ +inline vfloat spmd_kernel::log2_est(vfloat v) +{ + vfloat signif, fexp; + + // Just clamp to a very small value, instead of checking for invalid inputs. + vfloat x = max(v, 2.2e-38f); + + /* + * Assume IEEE representation, which is sgn(1):exp(8):frac(23) + * representing (1+frac)*2^(exp-127). Call 1+frac the significand + */ + + // get exponent + vint ux1_i = cast_vfloat_to_vint(x); + + vint exp = VUINT_SHIFT_RIGHT(ux1_i & 0x7F800000, 23); + + // actual exponent is exp-127, will subtract 127 later + + vint ux2_i; + vfloat ux2_f; + + vint greater = ux1_i & 0x00400000; // true if signif > 1.5 + SPMD_SIF(greater != 0) + { + // signif >= 1.5 so need to divide by 2. Accomplish this by stuffing exp = 126 which corresponds to an exponent of -1 + store_all(ux2_i, (ux1_i & 0x007FFFFF) | 0x3f000000); + + store_all(ux2_f, cast_vint_to_vfloat(ux2_i)); + + // 126 instead of 127 compensates for division by 2 + store_all(fexp, vfloat(exp - 126)); + } + SPMD_SELSE(greater != 0) + { + // get signif by stuffing exp = 127 which corresponds to an exponent of 0 + store(ux2_i, (ux1_i & 0x007FFFFF) | 0x3f800000); + + store(ux2_f, cast_vint_to_vfloat(ux2_i)); + + store(fexp, vfloat(exp - 127)); + } + SPMD_SENDIF + + store_all(signif, ux2_f); + store_all(signif, signif - 1.0f); + + const float a = 0.1501692f, b = 3.4226132f, c = 5.0225057f, d = 4.1130283f, e = 3.4813372f; + + vfloat xm1 = signif; + vfloat xm1sqr = xm1 * xm1; + + return fexp + ((a * (xm1sqr * xm1) + b * xm1sqr + c * xm1) / (xm1sqr + d * xm1 + e)); + + // fma lowers accuracy for SSE4.1 - no idea why (compiler reordering?) + //return fexp + ((vfma(a, (xm1sqr * xm1), vfma(b, xm1sqr, c * xm1))) / (xm1sqr + vfma(d, xm1, e))); +} + +// Uses log2_est(), so this function must be <= the precision of that. +inline vfloat spmd_kernel::log_est(vfloat v) +{ + return log2_est(v) * 0.693147181f; +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::reduce_expb(vfloat& arg, vfloat& two_int_a, vint& adjustment) +{ + // Assume we're using equation (2) + store_all(adjustment, 0); + + // integer part of the input argument + vint int_arg = (vint)arg; + + // if frac(arg) is in [0.5, 1.0]... + SPMD_SIF((arg - int_arg) > 0.5f) + { + store(adjustment, 1); + + // then change it to [0.0, 0.5] + store(arg, arg - 0.5f); + } + SPMD_SENDIF + + // arg == just the fractional part + store_all(arg, arg - (vfloat)int_arg); + + // Now compute 2** (int) arg. + store_all(int_arg, min(int_arg + 127, 254)); + + store_all(two_int_a, cast_vint_to_vfloat(VINT_SHIFT_LEFT(int_arg, 23))); +} + +/* + clang 9.0.0 for win /fp:precise release + f range : -50.0000000000000000 49.9999940395355225, vals : 16777216 + + exp2_est(): + Total passed near - zero check : 16777216 + Total sign diffs : 0 + max abs err: 1668910609.7500000000000000 + max rel err: 0.0000015642030031 + avg abs err: 10793794.4007573910057545 + avg rel err: 0.0000003890893282 + + XMVectorExp2(): + Total passed near-zero check: 16777216 + Total sign diffs: 0 + max abs err: 1665552836.8750000000000000 + max rel err: 0.0000114674862370 + avg abs err: 10771868.2627860084176064 + avg rel err: 0.0000011218880770 + + std::exp2f(): + Total passed near-zero check: 16777216 + Total sign diffs: 0 + max abs err: 1591636585.6250000000000000 + max rel err: 0.0000014849731018 + avg abs err: 10775800.3204844966530800 + avg rel err: 0.0000003851496422 +*/ + +// http://www.ganssle.com/item/approximations-c-code-exponentiation-log.htm +inline vfloat spmd_kernel::exp2_est(vfloat arg) +{ + SPMD_BEGIN_CALL + + const vfloat P00 = +7.2152891521493f; + const vfloat P01 = +0.0576900723731f; + const vfloat Q00 = +20.8189237930062f; + const vfloat Q01 = +1.0f; + const vfloat sqrt2 = 1.4142135623730950488f; // sqrt(2) for scaling + + vfloat result = 0.0f; + + // Return 0 if arg is too large. + // We're not introducing inf/nan's into calculations, or risk doing so by returning huge default values. + SPMD_IF(abs(arg) > 126.0f) + { + spmd_return(); + } + SPMD_END_IF + + // 2**(int(a)) + vfloat two_int_a; + + // set to 1 by reduce_expb + vint adjustment; + + // 0 if arg is +; 1 if negative + vint negative = 0; + + // If the input is negative, invert it. At the end we'll take the reciprocal, since n**(-1) = 1/(n**x). + SPMD_SIF(arg < 0.0f) + { + store(arg, -arg); + store(negative, 1); + } + SPMD_SENDIF + + store_all(arg, min(arg, 126.0f)); + + // reduce to [0.0, 0.5] + reduce_expb(arg, two_int_a, adjustment); + + // The format of the polynomial is: + // answer=(Q(x**2) + x*P(x**2))/(Q(x**2) - x*P(x**2)) + // + // The following computes the polynomial in several steps: + + // Q(x**2) + vfloat Q = vfma(Q01, (arg * arg), Q00); + + // x*P(x**2) + vfloat x_P = arg * (vfma(P01, arg * arg, P00)); + + vfloat answer = (Q + x_P) / (Q - x_P); + + // Now correct for the scaling factor of 2**(int(a)) + store_all(answer, answer * two_int_a); + + // If the result had a fractional part > 0.5, correct for that + store_all(answer, spmd_ternaryf(adjustment != 0, answer * sqrt2, answer)); + + // Correct for a negative input + SPMD_SIF(negative != 0) + { + store(answer, 1.0f / answer); + } + SPMD_SENDIF + + store(result, answer); + + return result; +} + +inline vfloat spmd_kernel::exp_est(vfloat arg) +{ + // e^x = exp2(x / log_base_e(2)) + // constant is 1.0/(log(2)/log(e)) or 1/log(2) + return exp2_est(arg * 1.44269504f); +} + +inline vfloat spmd_kernel::pow_est(vfloat arg1, vfloat arg2) +{ + return exp_est(log_est(arg1) * arg2); +} + +/* + clang 9.0.0 for win /fp:precise release + Total near-zero: 144, output above near-zero tresh: 30 + Total near-zero avg: 0.0000067941016621 max: 0.0000134706497192 + Total near-zero sign diffs: 5 + Total passed near-zero check: 16777072 + Total sign diffs: 5 + max abs err: 0.0000031375306036 + max rel err: 0.1140846017075028 + avg abs err: 0.0000003026226621 + avg rel err: 0.0000033564977623 +*/ + +// Math from this web page: http://developer.download.nvidia.com/cg/sin.html +// This is ~2x slower than sin_est() or cos_est(), and less accurate, but I'm keeping it here for comparison purposes to help validate/sanity check sin_est() and cos_est(). +inline vfloat spmd_kernel::sincos_est_a(vfloat a, bool sin_flag) +{ + const float c0_x = 0.0f, c0_y = 0.5f, c0_z = 1.0f; + const float c1_x = 0.25f, c1_y = -9.0f, c1_z = 0.75f, c1_w = 0.159154943091f; + const float c2_x = 24.9808039603f, c2_y = -24.9808039603f, c2_z = -60.1458091736f, c2_w = 60.1458091736f; + const float c3_x = 85.4537887573f, c3_y = -85.4537887573f, c3_z = -64.9393539429f, c3_w = 64.9393539429f; + const float c4_x = 19.7392082214f, c4_y = -19.7392082214f, c4_z = -1.0f, c4_w = 1.0f; + + vfloat r0_x, r0_y, r0_z, r1_x, r1_y, r1_z, r2_x, r2_y, r2_z; + + store_all(r1_x, sin_flag ? vfms(c1_w, a, c1_x) : c1_w * a); + + store_all(r1_y, frac(r1_x)); + + store_all(r2_x, (vfloat)(r1_y < c1_x)); + + store_all(r2_y, (vfloat)(r1_y >= c1_y)); + store_all(r2_z, (vfloat)(r1_y >= c1_z)); + + store_all(r2_y, vfma(r2_x, c4_z, vfma(r2_y, c4_w, r2_z * c4_z))); + + store_all(r0_x, c0_x - r1_y); + store_all(r0_y, c0_y - r1_y); + store_all(r0_z, c0_z - r1_y); + + store_all(r0_x, r0_x * r0_x); + store_all(r0_y, r0_y * r0_y); + store_all(r0_z, r0_z * r0_z); + + store_all(r1_x, vfma(c2_x, r0_x, c2_z)); + store_all(r1_y, vfma(c2_y, r0_y, c2_w)); + store_all(r1_z, vfma(c2_x, r0_z, c2_z)); + + store_all(r1_x, vfma(r1_x, r0_x, c3_x)); + store_all(r1_y, vfma(r1_y, r0_y, c3_y)); + store_all(r1_z, vfma(r1_z, r0_z, c3_x)); + + store_all(r1_x, vfma(r1_x, r0_x, c3_z)); + store_all(r1_y, vfma(r1_y, r0_y, c3_w)); + store_all(r1_z, vfma(r1_z, r0_z, c3_z)); + + store_all(r1_x, vfma(r1_x, r0_x, c4_x)); + store_all(r1_y, vfma(r1_y, r0_y, c4_y)); + store_all(r1_z, vfma(r1_z, r0_z, c4_x)); + + store_all(r1_x, vfma(r1_x, r0_x, c4_z)); + store_all(r1_y, vfma(r1_y, r0_y, c4_w)); + store_all(r1_z, vfma(r1_z, r0_z, c4_z)); + + store_all(r0_x, vfnma(r1_x, r2_x, vfnma(r1_y, r2_y, r1_z * -r2_z))); + + return r0_x; +} + +// positive values only +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::recip_est1(const vfloat& q) +{ + //const int mag = 0x7EF312AC; // 2 NR iters, 3 is 0x7EEEEBB3 + const int mag = 0x7EF311C3; + const float fMinThresh = .0000125f; + + vfloat l = spmd_ternaryf(q >= fMinThresh, q, cast_vint_to_vfloat(vint(mag))); + + vint x_l = vint(mag) - cast_vfloat_to_vint(l); + + vfloat rcp_l = cast_vint_to_vfloat(x_l); + + return rcp_l * vfnma(rcp_l, q, 2.0f); +} + +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::recip_est1_pn(const vfloat& t) +{ + //const int mag = 0x7EF312AC; // 2 NR iters, 3 is 0x7EEEEBB3 + const int mag = 0x7EF311C3; + const float fMinThresh = .0000125f; + + vfloat s = sign(t); + vfloat q = abs(t); + + vfloat l = spmd_ternaryf(q >= fMinThresh, q, cast_vint_to_vfloat(vint(mag))); + + vint x_l = vint(mag) - cast_vfloat_to_vint(l); + + vfloat rcp_l = cast_vint_to_vfloat(x_l); + + return rcp_l * vfnma(rcp_l, q, 2.0f) * s; +} + +// https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf +// https://github.com/hcs0/Hackers-Delight/blob/master/rsqrt.c.txt +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::rsqrt_est1(vfloat x0) +{ + vfloat xhalf = 0.5f * x0; + vfloat x = cast_vint_to_vfloat(vint(0x5F375A82) - (VINT_SHIFT_RIGHT(cast_vfloat_to_vint(x0), 1))); + return x * vfnma(xhalf * x, x, 1.5008909f); +} + +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::rsqrt_est2(vfloat x0) +{ + vfloat xhalf = 0.5f * x0; + vfloat x = cast_vint_to_vfloat(vint(0x5F37599E) - (VINT_SHIFT_RIGHT(cast_vfloat_to_vint(x0), 1))); + vfloat x1 = x * vfnma(xhalf * x, x, 1.5); + vfloat x2 = x1 * vfnma(xhalf * x1, x1, 1.5); + return x2; +} + +// Math from: http://developer.download.nvidia.com/cg/atan2.html +// TODO: Needs more validation, parameter checking. +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::atan2_est(vfloat y, vfloat x) +{ + vfloat t1 = abs(y); + vfloat t3 = abs(x); + + vfloat t0 = max(t3, t1); + store_all(t1, min(t3, t1)); + + store_all(t3, t1 / t0); + + vfloat t4 = t3 * t3; + store_all(t0, vfma(-0.013480470f, t4, 0.057477314f)); + store_all(t0, vfms(t0, t4, 0.121239071f)); + store_all(t0, vfma(t0, t4, 0.195635925f)); + store_all(t0, vfms(t0, t4, 0.332994597f)); + store_all(t0, vfma(t0, t4, 0.999995630f)); + store_all(t3, t0 * t3); + + store_all(t3, spmd_ternaryf(abs(y) > abs(x), vfloat(1.570796327f) - t3, t3)); + + store_all(t3, spmd_ternaryf(x < 0.0f, vfloat(3.141592654f) - t3, t3)); + store_all(t3, spmd_ternaryf(y < 0.0f, -t3, t3)); + + return t3; +} + +/* + clang 9.0.0 for win /fp:precise release + Tested range: -25.1327412287183449 25.1327382326621169, vals : 16777216 + Skipped angles near 90/270 within +- .001 radians. + Near-zero threshold: .0000125f + Near-zero output above check threshold: 1e-6f + + Total near-zero: 144, output above near-zero tresh: 20 + Total near-zero avg: 0.0000067510751968 max: 0.0000133514404297 + Total near-zero sign diffs: 5 + Total passed near-zero check: 16766400 + Total sign diffs: 5 + max abs err: 1.4982600811139264 + max rel err: 0.1459155900188041 + avg rel err: 0.0000054659502568 + + XMVectorTan() precise: + Total near-zero: 144, output above near-zero tresh: 18 + Total near-zero avg: 0.0000067641216186 max: 0.0000133524126795 + Total near-zero sign diffs: 0 + Total passed near-zero check: 16766400 + Total sign diffs: 0 + max abs err: 1.9883573246424930 + max rel err: 0.1459724171926864 + avg rel err: 0.0000054965766843 + + std::tanf(): + Total near-zero: 144, output above near-zero tresh: 0 + Total near-zero avg: 0.0000067116930779 max: 0.0000127713074107 + Total near-zero sign diffs: 11 + Total passed near-zero check: 16766400 + Total sign diffs: 11 + max abs err: 0.8989131818294709 + max rel err: 0.0573181403173166 + avg rel err: 0.0000030791301203 + + Originally from: + http://www.ganssle.com/approx.htm +*/ + +CPPSPMD_FORCE_INLINE vfloat spmd_kernel::tan82(vfloat x) +{ + // Original double version was 8.2 digits + //double c1 = 211.849369664121f, c2 = -12.5288887278448f, c3 = 269.7350131214121f, c4 = -71.4145309347748f; + // Tuned float constants for lower avg rel error (without using FMA3): + const float c1 = 211.849350f, c2 = -12.5288887f, c3 = 269.734985f, c4 = -71.4145203f; + vfloat x2 = x * x; + return (x * (vfma(c2, x2, c1)) / (vfma(x2, (c4 + x2), c3))); +} + +// Don't call this for angles close to 90/270!. +inline vfloat spmd_kernel::tan_est(vfloat x) +{ + const float fPi = 3.141592653589793f, fOneOverPi = 0.3183098861837907f; + CPPSPMD_DECL(const uint8_t, s_table0[16]) = { 128 + 0, 128 + 2, 128 + -2, 128 + 4, 128 + 0, 128 + 2, 128 + -2, 128 + 4, 128 + 0, 128 + 2, 128 + -2, 128 + 4, 128 + 0, 128 + 2, 128 + -2, 128 + 4 }; + + vint table = init_lookup4(s_table0); // a load + vint sgn = cast_vfloat_to_vint(x) & 0x80000000; + + store_all(x, abs(x)); + vfloat orig_x = x; + + vfloat q = x * fOneOverPi; + store_all(x, q - floor(q)); + + vfloat x4 = x * 4.0f; + vint octant = (vint)(x4); + + vfloat x0 = spmd_ternaryf((octant & 1) != 0, -x4, x4); + + vint k = table_lookup4_8(octant, table) & 0xFF; // a shuffle + + vfloat bias = (vfloat)k + -128.0f; + vfloat y = x0 + bias; + + vfloat z = tan82(y); + + vfloat r; + + vbool octant_one_or_two = (octant == 1) || (octant == 2); + + // SPMD optimization - skip costly divide if we can + if (spmd_any(octant_one_or_two)) + { + const float fDivThresh = .4371e-7f; + vfloat one_over_z = 1.0f / spmd_ternaryf(abs(z) > fDivThresh, z, spmd_ternaryf(z < 0.0f, -fDivThresh, fDivThresh)); + + vfloat b = spmd_ternaryf(octant_one_or_two, one_over_z, z); + store_all(r, spmd_ternaryf((octant & 2) != 0, -b, b)); + } + else + { + store_all(r, spmd_ternaryf(octant == 0, z, -z)); + } + + // Small angle approximation, to decrease the max rel error near Pi. + SPMD_SIF(x >= (1.0f - .0003125f*4.0f)) + { + store(r, vfnma(floor(q) + 1.0f, fPi, orig_x)); + } + SPMD_SENDIF + + return cast_vint_to_vfloat(cast_vfloat_to_vint(r) ^ sgn); +} + +inline void spmd_kernel::seed_rand(rand_context& x, vint seed) +{ + store(x.a, 0xf1ea5eed); + store(x.b, seed ^ 0xd8487b1f); + store(x.c, seed ^ 0xdbadef9a); + store(x.d, seed); + for (int i = 0; i < 20; ++i) + (void)get_randu(x); +} + +// https://burtleburtle.net/bob/rand/smallprng.html +// Returns 32-bit unsigned random numbers. +inline vint spmd_kernel::get_randu(rand_context& x) +{ + vint e = x.a - VINT_ROT(x.b, 27); + store(x.a, x.b ^ VINT_ROT(x.c, 17)); + store(x.b, x.c + x.d); + store(x.c, x.d + e); + store(x.d, e + x.a); + return x.d; +} + +// Returns random numbers between [low, high), or low if low >= high +inline vint spmd_kernel::get_randi(rand_context& x, vint low, vint high) +{ + vint rnd = get_randu(x); + + vint range = high - low; + + vint rnd_range = mulhiu(rnd, range); + + return spmd_ternaryi(low < high, low + rnd_range, low); +} + +// Returns random numbers between [low, high), or low if low >= high +inline vfloat spmd_kernel::get_randf(rand_context& x, vfloat low, vfloat high) +{ + vint rndi = get_randu(x) & 0x7fffff; + + vfloat rnd = (vfloat)(rndi) * (1.0f / 8388608.0f); + + return spmd_ternaryf(low < high, vfma(high - low, rnd, low), low); +} + +CPPSPMD_FORCE_INLINE void spmd_kernel::init_reverse_bits(vint& tab1, vint& tab2) +{ + const uint8_t tab1_bytes[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; + const uint8_t tab2_bytes[16] = { 0, 8 << 4, 4 << 4, 12 << 4, 2 << 4, 10 << 4, 6 << 4, 14 << 4, 1 << 4, 9 << 4, 5 << 4, 13 << 4, 3 << 4, 11 << 4, 7 << 4, 15 << 4 }; + store_all(tab1, init_lookup4(tab1_bytes)); + store_all(tab2, init_lookup4(tab2_bytes)); +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::reverse_bits(vint k, vint tab1, vint tab2) +{ + vint r0 = table_lookup4_8(k & 0x7F7F7F7F, tab2); + vint r1 = table_lookup4_8(VUINT_SHIFT_RIGHT(k, 4) & 0x7F7F7F7F, tab1); + vint r3 = r0 | r1; + return byteswap(r3); +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_leading_zeros(vint x) +{ + CPPSPMD_DECL(const uint8_t, s_tab[16]) = { 0, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + vint tab = init_lookup4(s_tab); + + //x <= 0x0000ffff + vbool c0 = (x & 0xFFFF0000) == 0; + vint n0 = spmd_ternaryi(c0, 16, 0); + vint x0 = spmd_ternaryi(c0, VINT_SHIFT_LEFT(x, 16), x); + + //x <= 0x00ffffff + vbool c1 = (x0 & 0xFF000000) == 0; + vint n1 = spmd_ternaryi(c1, n0 + 8, n0); + vint x1 = spmd_ternaryi(c1, VINT_SHIFT_LEFT(x0, 8), x0); + + //x <= 0x0fffffff + vbool c2 = (x1 & 0xF0000000) == 0; + vint n2 = spmd_ternaryi(c2, n1 + 4, n1); + vint x2 = spmd_ternaryi(c2, VINT_SHIFT_LEFT(x1, 4), x1); + + return table_lookup4_8(VUINT_SHIFT_RIGHT(x2, 28), tab) + n2; +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_leading_zeros_alt(vint x) +{ + //x <= 0x0000ffff + vbool c0 = (x & 0xFFFF0000) == 0; + vint n0 = spmd_ternaryi(c0, 16, 0); + vint x0 = spmd_ternaryi(c0, VINT_SHIFT_LEFT(x, 16), x); + + //x <= 0x00ffffff + vbool c1 = (x0 & 0xFF000000) == 0; + vint n1 = spmd_ternaryi(c1, n0 + 8, n0); + vint x1 = spmd_ternaryi(c1, VINT_SHIFT_LEFT(x0, 8), x0); + + //x <= 0x0fffffff + vbool c2 = (x1 & 0xF0000000) == 0; + vint n2 = spmd_ternaryi(c2, n1 + 4, n1); + vint x2 = spmd_ternaryi(c2, VINT_SHIFT_LEFT(x1, 4), x1); + + // x <= 0x3fffffff + vbool c3 = (x2 & 0xC0000000) == 0; + vint n3 = spmd_ternaryi(c3, n2 + 2, n2); + vint x3 = spmd_ternaryi(c3, VINT_SHIFT_LEFT(x2, 2), x2); + + // x <= 0x7fffffff + vbool c4 = (x3 & 0x80000000) == 0; + return spmd_ternaryi(c4, n3 + 1, n3); +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_trailing_zeros(vint x) +{ + // cast the least significant bit in v to a float + vfloat f = (vfloat)(x & -x); + + // extract exponent and adjust + return VUINT_SHIFT_RIGHT(cast_vfloat_to_vint(f), 23) - 0x7F; +} + +CPPSPMD_FORCE_INLINE vint spmd_kernel::count_set_bits(vint x) +{ + vint v = x - (VUINT_SHIFT_RIGHT(x, 1) & 0x55555555); + vint v1 = (v & 0x33333333) + (VUINT_SHIFT_RIGHT(v, 2) & 0x33333333); + return VUINT_SHIFT_RIGHT(((v1 + VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F) * 0x1010101), 24); +} + +CPPSPMD_FORCE_INLINE vint cmple_epu16(const vint &a, const vint &b) +{ + return cmpeq_epi16(subs_epu16(a, b), vint(0)); +} + +CPPSPMD_FORCE_INLINE vint cmpge_epu16(const vint &a, const vint &b) +{ + return cmple_epu16(b, a); +} + +CPPSPMD_FORCE_INLINE vint cmpgt_epu16(const vint &a, const vint &b) +{ + return andnot(cmpeq_epi16(a, b), cmple_epu16(b, a)); +} + +CPPSPMD_FORCE_INLINE vint cmplt_epu16(const vint &a, const vint &b) +{ + return cmpgt_epu16(b, a); +} + +CPPSPMD_FORCE_INLINE vint cmpge_epi16(const vint &a, const vint &b) +{ + return cmpeq_epi16(a, b) | cmpgt_epi16(a, b); +} + +CPPSPMD_FORCE_INLINE vint cmple_epi16(const vint &a, const vint &b) +{ + return cmpge_epi16(b, a); +} + +void spmd_kernel::print_vint(vint v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%i ", extract(v, i)); + printf("\n"); +} + +void spmd_kernel::print_vbool(vbool v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%i ", extract(v, i) ? 1 : 0); + printf("\n"); +} + +void spmd_kernel::print_vint_hex(vint v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("0x%X ", extract(v, i)); + printf("\n"); +} + +void spmd_kernel::print_active_lanes(const char *pPrefix) +{ + CPPSPMD_DECL(int, flags[PROGRAM_COUNT]); + memset(flags, 0, sizeof(flags)); + storeu_linear(flags, vint(1)); + + if (pPrefix) + printf("%s", pPrefix); + + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + { + if (flags[i]) + printf("%u ", i); + } + printf("\n"); +} + +void spmd_kernel::print_vfloat(vfloat v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%f ", extract(v, i)); + printf("\n"); +} diff --git a/thirdparty/basis_universal/encoder/cppspmd_math_declares.h b/thirdparty/basis_universal/encoder/cppspmd_math_declares.h new file mode 100644 index 0000000000..cdb6447b62 --- /dev/null +++ b/thirdparty/basis_universal/encoder/cppspmd_math_declares.h @@ -0,0 +1,89 @@ +// Do not include this header directly. +// This header defines shared struct spmd_kernel helpers. +// +// Copyright 2020-2021 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// See cppspmd_math.h for detailed error statistics. + +CPPSPMD_FORCE_INLINE void reduce_expb(vfloat& arg, vfloat& two_int_a, vint& adjustment); +CPPSPMD_FORCE_INLINE vfloat tan56(vfloat x); +CPPSPMD_FORCE_INLINE vfloat tan82(vfloat x); + +inline vfloat log2_est(vfloat v); + +inline vfloat log_est(vfloat v); + +inline vfloat exp2_est(vfloat arg); + +inline vfloat exp_est(vfloat arg); + +inline vfloat pow_est(vfloat arg1, vfloat arg2); + +CPPSPMD_FORCE_INLINE vfloat recip_est1(const vfloat& q); +CPPSPMD_FORCE_INLINE vfloat recip_est1_pn(const vfloat& q); + +inline vfloat mod_angles(vfloat a); + +inline vfloat sincos_est_a(vfloat a, bool sin_flag); +CPPSPMD_FORCE_INLINE vfloat sin_est_a(vfloat a) { return sincos_est_a(a, true); } +CPPSPMD_FORCE_INLINE vfloat cos_est_a(vfloat a) { return sincos_est_a(a, false); } + +inline vfloat sin_est(vfloat a); + +inline vfloat cos_est(vfloat a); + +// Don't call with values <= 0. +CPPSPMD_FORCE_INLINE vfloat rsqrt_est1(vfloat x0); + +// Don't call with values <= 0. +CPPSPMD_FORCE_INLINE vfloat rsqrt_est2(vfloat x0); + +CPPSPMD_FORCE_INLINE vfloat atan2_est(vfloat y, vfloat x); + +CPPSPMD_FORCE_INLINE vfloat atan_est(vfloat x) { return atan2_est(x, vfloat(1.0f)); } + +// Don't call this for angles close to 90/270! +inline vfloat tan_est(vfloat x); + +// https://burtleburtle.net/bob/rand/smallprng.html +struct rand_context { vint a, b, c, d; }; + +inline void seed_rand(rand_context& x, vint seed); + +// Returns 32-bit unsigned random numbers. +inline vint get_randu(rand_context& x); + +// Returns random numbers between [low, high), or low if low >= high +inline vint get_randi(rand_context& x, vint low, vint high); + +// Returns random numbers between [low, high), or low if low >= high +inline vfloat get_randf(rand_context& x, vfloat low, vfloat high); + +CPPSPMD_FORCE_INLINE void init_reverse_bits(vint& tab1, vint& tab2); +CPPSPMD_FORCE_INLINE vint reverse_bits(vint k, vint tab1, vint tab2); + +CPPSPMD_FORCE_INLINE vint count_leading_zeros(vint x); +CPPSPMD_FORCE_INLINE vint count_leading_zeros_alt(vint x); + +CPPSPMD_FORCE_INLINE vint count_trailing_zeros(vint x); + +CPPSPMD_FORCE_INLINE vint count_set_bits(vint x); + +void print_vint(vint v); +void print_vbool(vbool v); +void print_vint_hex(vint v); +void print_active_lanes(const char *pPrefix); +void print_vfloat(vfloat v); + diff --git a/thirdparty/basis_universal/encoder/cppspmd_sse.h b/thirdparty/basis_universal/encoder/cppspmd_sse.h new file mode 100644 index 0000000000..b39cb82a5f --- /dev/null +++ b/thirdparty/basis_universal/encoder/cppspmd_sse.h @@ -0,0 +1,2118 @@ +// cppspmd_sse.h +// Note for Basis Universal: All of the "cppspmd" code and headers are OPTIONAL to Basis Universal. if BASISU_SUPPORT_SSE is 0, it will never be included and does not impact compilation. +// SSE 2 or 4.1 +// Originally written by Nicolas Guillemot, Jefferson Amstutz in the "CppSPMD" project. +// 4/20: Richard Geldreich: Macro control flow, more SIMD instruction sets, optimizations, supports using multiple SIMD instruction sets in same executable. Still a work in progress! +// +// Originally Copyright 2016 Nicolas Guillemot +// Changed from the MIT license to Apache 2.0 with permission from the author. +// +// Modifications/enhancements Copyright 2020-2021 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stdlib.h> +#include <stdint.h> +#include <assert.h> +#include <math.h> +#include <utility> +#include <algorithm> + +#if CPPSPMD_SSE2 +#include <xmmintrin.h> // SSE +#include <emmintrin.h> // SSE2 +#else +#include <xmmintrin.h> // SSE +#include <emmintrin.h> // SSE2 +#include <pmmintrin.h> // SSE3 +#include <tmmintrin.h> // SSSE3 +#include <smmintrin.h> // SSE4.1 +//#include <nmmintrin.h> // SSE4.2 +#endif + +#undef CPPSPMD_SSE +#undef CPPSPMD_AVX1 +#undef CPPSPMD_AVX2 +#undef CPPSPMD_AVX +#undef CPPSPMD_FLOAT4 +#undef CPPSPMD_INT16 + +#define CPPSPMD_SSE 1 +#define CPPSPMD_AVX 0 +#define CPPSPMD_AVX1 0 +#define CPPSPMD_AVX2 0 +#define CPPSPMD_FLOAT4 0 +#define CPPSPMD_INT16 0 + +#ifdef _MSC_VER + #ifndef CPPSPMD_DECL + #define CPPSPMD_DECL(type, name) __declspec(align(16)) type name + #endif + + #ifndef CPPSPMD_ALIGN + #define CPPSPMD_ALIGN(v) __declspec(align(v)) + #endif + + #define _mm_undefined_si128 _mm_setzero_si128 + #define _mm_undefined_ps _mm_setzero_ps +#else + #ifndef CPPSPMD_DECL + #define CPPSPMD_DECL(type, name) type name __attribute__((aligned(32))) + #endif + + #ifndef CPPSPMD_ALIGN + #define CPPSPMD_ALIGN(v) __attribute__((aligned(v))) + #endif +#endif + +#ifndef CPPSPMD_FORCE_INLINE +#ifdef _DEBUG +#define CPPSPMD_FORCE_INLINE inline +#else + #ifdef _MSC_VER + #define CPPSPMD_FORCE_INLINE __forceinline + #else + #define CPPSPMD_FORCE_INLINE inline + #endif +#endif +#endif + +#undef CPPSPMD +#undef CPPSPMD_ARCH + +#if CPPSPMD_SSE2 + #define CPPSPMD_SSE41 0 + #define CPPSPMD cppspmd_sse2 + #define CPPSPMD_ARCH _sse2 +#else + #define CPPSPMD_SSE41 1 + #define CPPSPMD cppspmd_sse41 + #define CPPSPMD_ARCH _sse41 +#endif + +#ifndef CPPSPMD_GLUER + #define CPPSPMD_GLUER(a, b) a##b +#endif + +#ifndef CPPSPMD_GLUER2 + #define CPPSPMD_GLUER2(a, b) CPPSPMD_GLUER(a, b) +#endif + +#ifndef CPPSPMD_NAME +#define CPPSPMD_NAME(a) CPPSPMD_GLUER2(a, CPPSPMD_ARCH) +#endif + +#undef VASSERT +#define VCOND(cond) ((exec_mask(vbool(cond)) & m_exec).get_movemask() == m_exec.get_movemask()) +#define VASSERT(cond) assert( VCOND(cond) ) + +#define CPPSPMD_ALIGNMENT (16) + +#define storeu_si32(p, a) (void)(*(int*)(p) = _mm_cvtsi128_si32((a))) + +namespace CPPSPMD +{ + +const int PROGRAM_COUNT_SHIFT = 2; +const int PROGRAM_COUNT = 1 << PROGRAM_COUNT_SHIFT; + +template <typename N> inline N* aligned_new() { void* p = _mm_malloc(sizeof(N), 64); new (p) N; return static_cast<N*>(p); } +template <typename N> void aligned_delete(N* p) { if (p) { p->~N(); _mm_free(p); } } + +CPPSPMD_DECL(const uint32_t, g_allones_128[4]) = { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX }; +CPPSPMD_DECL(const uint32_t, g_x_128[4]) = { UINT32_MAX, 0, 0, 0 }; +CPPSPMD_DECL(const float, g_onef_128[4]) = { 1.0f, 1.0f, 1.0f, 1.0f }; +CPPSPMD_DECL(const uint32_t, g_oneu_128[4]) = { 1, 1, 1, 1 }; + +CPPSPMD_DECL(const uint32_t, g_lane_masks_128[4][4]) = +{ + { UINT32_MAX, 0, 0, 0 }, + { 0, UINT32_MAX, 0, 0 }, + { 0, 0, UINT32_MAX, 0 }, + { 0, 0, 0, UINT32_MAX }, +}; + +#if CPPSPMD_SSE41 +CPPSPMD_FORCE_INLINE __m128i _mm_blendv_epi32(__m128i a, __m128i b, __m128i c) { return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(c))); } +#endif + +CPPSPMD_FORCE_INLINE __m128i blendv_epi8(__m128i a, __m128i b, __m128i mask) +{ +#if CPPSPMD_SSE2 + return _mm_castps_si128(_mm_or_ps(_mm_and_ps(_mm_castsi128_ps(mask), _mm_castsi128_ps(b)), _mm_andnot_ps(_mm_castsi128_ps(mask), _mm_castsi128_ps(a)))); +#else + return _mm_blendv_epi8(a, b, mask); +#endif +} + +CPPSPMD_FORCE_INLINE __m128 blendv_mask_ps(__m128 a, __m128 b, __m128 mask) +{ +#if CPPSPMD_SSE2 + // We know it's a mask, so we can just emulate the blend. + return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); +#else + return _mm_blendv_ps(a, b, mask); +#endif +} + +CPPSPMD_FORCE_INLINE __m128 blendv_ps(__m128 a, __m128 b, __m128 mask) +{ +#if CPPSPMD_SSE2 + // Input is not a mask, but MSB bits - so emulate _mm_blendv_ps() by replicating bit 31. + mask = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(mask), 31)); + return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); +#else + return _mm_blendv_ps(a, b, mask); +#endif +} + +CPPSPMD_FORCE_INLINE __m128i blendv_mask_epi32(__m128i a, __m128i b, __m128i mask) +{ + return _mm_castps_si128(blendv_mask_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); +} + +CPPSPMD_FORCE_INLINE __m128i blendv_epi32(__m128i a, __m128i b, __m128i mask) +{ + return _mm_castps_si128(blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); +} + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE int extract_x(const __m128i& vec) { return _mm_cvtsi128_si32(vec); } +CPPSPMD_FORCE_INLINE int extract_y(const __m128i& vec) { return _mm_cvtsi128_si32(_mm_shuffle_epi32(vec, 0x55)); } +CPPSPMD_FORCE_INLINE int extract_z(const __m128i& vec) { return _mm_cvtsi128_si32(_mm_shuffle_epi32(vec, 0xAA)); } +CPPSPMD_FORCE_INLINE int extract_w(const __m128i& vec) { return _mm_cvtsi128_si32(_mm_shuffle_epi32(vec, 0xFF)); } + +// Returns float bits as int, to emulate _mm_extract_ps() +CPPSPMD_FORCE_INLINE int extract_ps_x(const __m128& vec) { float f = _mm_cvtss_f32(vec); return *(const int*)&f; } +CPPSPMD_FORCE_INLINE int extract_ps_y(const __m128& vec) { float f = _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0x55)); return *(const int*)&f; } +CPPSPMD_FORCE_INLINE int extract_ps_z(const __m128& vec) { float f = _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xAA)); return *(const int*)&f; } +CPPSPMD_FORCE_INLINE int extract_ps_w(const __m128& vec) { float f = _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xFF)); return *(const int*)&f; } + +// Returns floats +CPPSPMD_FORCE_INLINE float extractf_ps_x(const __m128& vec) { return _mm_cvtss_f32(vec); } +CPPSPMD_FORCE_INLINE float extractf_ps_y(const __m128& vec) { return _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0x55)); } +CPPSPMD_FORCE_INLINE float extractf_ps_z(const __m128& vec) { return _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xAA)); } +CPPSPMD_FORCE_INLINE float extractf_ps_w(const __m128& vec) { return _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, 0xFF)); } +#else +CPPSPMD_FORCE_INLINE int extract_x(const __m128i& vec) { return _mm_extract_epi32(vec, 0); } +CPPSPMD_FORCE_INLINE int extract_y(const __m128i& vec) { return _mm_extract_epi32(vec, 1); } +CPPSPMD_FORCE_INLINE int extract_z(const __m128i& vec) { return _mm_extract_epi32(vec, 2); } +CPPSPMD_FORCE_INLINE int extract_w(const __m128i& vec) { return _mm_extract_epi32(vec, 3); } + +// Returns float bits as int +CPPSPMD_FORCE_INLINE int extract_ps_x(const __m128& vec) { return _mm_extract_ps(vec, 0); } +CPPSPMD_FORCE_INLINE int extract_ps_y(const __m128& vec) { return _mm_extract_ps(vec, 1); } +CPPSPMD_FORCE_INLINE int extract_ps_z(const __m128& vec) { return _mm_extract_ps(vec, 2); } +CPPSPMD_FORCE_INLINE int extract_ps_w(const __m128& vec) { return _mm_extract_ps(vec, 3); } + +// Returns floats +CPPSPMD_FORCE_INLINE float extractf_ps_x(const __m128& vec) { int v = extract_ps_x(vec); return *(const float*)&v; } +CPPSPMD_FORCE_INLINE float extractf_ps_y(const __m128& vec) { int v = extract_ps_y(vec); return *(const float*)&v; } +CPPSPMD_FORCE_INLINE float extractf_ps_z(const __m128& vec) { int v = extract_ps_z(vec); return *(const float*)&v; } +CPPSPMD_FORCE_INLINE float extractf_ps_w(const __m128& vec) { int v = extract_ps_w(vec); return *(const float*)&v; } +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i insert_x(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 0), (uint32_t)v >> 16U, 1); } +CPPSPMD_FORCE_INLINE __m128i insert_y(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 2), (uint32_t)v >> 16U, 3); } +CPPSPMD_FORCE_INLINE __m128i insert_z(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 4), (uint32_t)v >> 16U, 5); } +CPPSPMD_FORCE_INLINE __m128i insert_w(const __m128i& vec, int v) { return _mm_insert_epi16(_mm_insert_epi16(vec, v, 6), (uint32_t)v >> 16U, 7); } +#else +CPPSPMD_FORCE_INLINE __m128i insert_x(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 0); } +CPPSPMD_FORCE_INLINE __m128i insert_y(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 1); } +CPPSPMD_FORCE_INLINE __m128i insert_z(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 2); } +CPPSPMD_FORCE_INLINE __m128i insert_w(const __m128i& vec, int v) { return _mm_insert_epi32(vec, v, 3); } +#endif + +#if CPPSPMD_SSE2 +inline __m128i shuffle_epi8(const __m128i& a, const __m128i& b) +{ + // Just emulate _mm_shuffle_epi8. This is very slow, but what else can we do? + CPPSPMD_ALIGN(16) uint8_t av[16]; + _mm_store_si128((__m128i*)av, a); + + CPPSPMD_ALIGN(16) uint8_t bvi[16]; + _mm_store_ps((float*)bvi, _mm_and_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(_mm_set1_epi32(0x0F0F0F0F)))); + + CPPSPMD_ALIGN(16) uint8_t result[16]; + + result[0] = av[bvi[0]]; + result[1] = av[bvi[1]]; + result[2] = av[bvi[2]]; + result[3] = av[bvi[3]]; + + result[4] = av[bvi[4]]; + result[5] = av[bvi[5]]; + result[6] = av[bvi[6]]; + result[7] = av[bvi[7]]; + + result[8] = av[bvi[8]]; + result[9] = av[bvi[9]]; + result[10] = av[bvi[10]]; + result[11] = av[bvi[11]]; + + result[12] = av[bvi[12]]; + result[13] = av[bvi[13]]; + result[14] = av[bvi[14]]; + result[15] = av[bvi[15]]; + + return _mm_andnot_si128(_mm_cmplt_epi8(b, _mm_setzero_si128()), _mm_load_si128((__m128i*)result)); +} +#else +CPPSPMD_FORCE_INLINE __m128i shuffle_epi8(const __m128i& a, const __m128i& b) +{ + return _mm_shuffle_epi8(a, b); +} +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i min_epi32(__m128i a, __m128i b) +{ + return blendv_mask_epi32(b, a, _mm_cmplt_epi32(a, b)); +} +CPPSPMD_FORCE_INLINE __m128i max_epi32(__m128i a, __m128i b) +{ + return blendv_mask_epi32(b, a, _mm_cmpgt_epi32(a, b)); +} +CPPSPMD_FORCE_INLINE __m128i min_epu32(__m128i a, __m128i b) +{ + __m128i n = _mm_set1_epi32(0x80000000); + __m128i ac = _mm_add_epi32(a, n); + __m128i bc = _mm_add_epi32(b, n); + return blendv_mask_epi32(b, a, _mm_cmplt_epi32(ac, bc)); +} +CPPSPMD_FORCE_INLINE __m128i max_epu32(__m128i a, __m128i b) +{ + __m128i n = _mm_set1_epi32(0x80000000); + __m128i ac = _mm_add_epi32(a, n); + __m128i bc = _mm_add_epi32(b, n); + return blendv_mask_epi32(b, a, _mm_cmpgt_epi32(ac, bc)); +} +#else +CPPSPMD_FORCE_INLINE __m128i min_epi32(__m128i a, __m128i b) +{ + return _mm_min_epi32(a, b); +} +CPPSPMD_FORCE_INLINE __m128i max_epi32(__m128i a, __m128i b) +{ + return _mm_max_epi32(a, b); +} +CPPSPMD_FORCE_INLINE __m128i min_epu32(__m128i a, __m128i b) +{ + return _mm_min_epu32(a, b); +} +CPPSPMD_FORCE_INLINE __m128i max_epu32(__m128i a, __m128i b) +{ + return _mm_max_epu32(a, b); +} +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i abs_epi32(__m128i a) +{ + __m128i sign_mask = _mm_srai_epi32(a, 31); + return _mm_sub_epi32(_mm_castps_si128(_mm_xor_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(sign_mask))), sign_mask); +} +#else +CPPSPMD_FORCE_INLINE __m128i abs_epi32(__m128i a) +{ + return _mm_abs_epi32(a); +} +#endif + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE __m128i mullo_epi32(__m128i a, __m128i b) +{ + __m128i tmp1 = _mm_mul_epu32(a, b); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); + return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); +} +#else +CPPSPMD_FORCE_INLINE __m128i mullo_epi32(__m128i a, __m128i b) +{ + return _mm_mullo_epi32(a, b); +} +#endif + +CPPSPMD_FORCE_INLINE __m128i mulhi_epu32(__m128i a, __m128i b) +{ + __m128i tmp1 = _mm_mul_epu32(a, b); + __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); + return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 3, 1)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 3, 1))); +} + +#if CPPSPMD_SSE2 +inline __m128i load_rgba32(const void* p) +{ + __m128i xmm = _mm_cvtsi32_si128(*(const int*)p); + xmm = _mm_unpacklo_epi8(xmm, _mm_setzero_si128()); + xmm = _mm_unpacklo_epi16(xmm, _mm_setzero_si128()); + return xmm; +} +#else +inline __m128i load_rgba32(const void* p) +{ + return _mm_cvtepu8_epi32(_mm_castps_si128(_mm_load_ss((const float*)p))); +} +#endif + +inline void transpose4x4(__m128i& x, __m128i& y, __m128i& z, __m128i& w, const __m128i& r0, const __m128i& r1, const __m128i& r2, const __m128i& r3) +{ + __m128i t0 = _mm_unpacklo_epi32(r0, r1); + __m128i t1 = _mm_unpacklo_epi32(r2, r3); + __m128i t2 = _mm_unpackhi_epi32(r0, r1); + __m128i t3 = _mm_unpackhi_epi32(r2, r3); + x = _mm_unpacklo_epi64(t0, t1); + y = _mm_unpackhi_epi64(t0, t1); + z = _mm_unpacklo_epi64(t2, t3); + w = _mm_unpackhi_epi64(t2, t3); +} + +const uint32_t ALL_ON_MOVEMASK = 0xF; + +struct spmd_kernel +{ + struct vint; + struct lint; + struct vbool; + struct vfloat; + + typedef int int_t; + typedef vint vint_t; + typedef lint lint_t; + + // Exec mask + struct exec_mask + { + __m128i m_mask; + + exec_mask() = default; + + CPPSPMD_FORCE_INLINE explicit exec_mask(const vbool& b); + CPPSPMD_FORCE_INLINE explicit exec_mask(const __m128i& mask) : m_mask(mask) { } + + CPPSPMD_FORCE_INLINE void enable_lane(uint32_t lane) { m_mask = _mm_load_si128((const __m128i *)&g_lane_masks_128[lane][0]); } + + static CPPSPMD_FORCE_INLINE exec_mask all_on() { return exec_mask{ _mm_load_si128((const __m128i*)g_allones_128) }; } + static CPPSPMD_FORCE_INLINE exec_mask all_off() { return exec_mask{ _mm_setzero_si128() }; } + + CPPSPMD_FORCE_INLINE uint32_t get_movemask() const { return _mm_movemask_ps(_mm_castsi128_ps(m_mask)); } + }; + + friend CPPSPMD_FORCE_INLINE bool all(const exec_mask& e); + friend CPPSPMD_FORCE_INLINE bool any(const exec_mask& e); + + CPPSPMD_FORCE_INLINE bool spmd_all() const { return all(m_exec); } + CPPSPMD_FORCE_INLINE bool spmd_any() const { return any(m_exec); } + CPPSPMD_FORCE_INLINE bool spmd_none() { return !any(m_exec); } + + // true if cond is true for all active lanes - false if no active lanes + CPPSPMD_FORCE_INLINE bool spmd_all(const vbool& e) { uint32_t m = m_exec.get_movemask(); return (m != 0) && ((exec_mask(e) & m_exec).get_movemask() == m); } + // true if cond is true for any active lanes + CPPSPMD_FORCE_INLINE bool spmd_any(const vbool& e) { return (exec_mask(e) & m_exec).get_movemask() != 0; } + CPPSPMD_FORCE_INLINE bool spmd_none(const vbool& e) { return !spmd_any(e); } + + friend CPPSPMD_FORCE_INLINE exec_mask operator^ (const exec_mask& a, const exec_mask& b); + friend CPPSPMD_FORCE_INLINE exec_mask operator& (const exec_mask& a, const exec_mask& b); + friend CPPSPMD_FORCE_INLINE exec_mask operator| (const exec_mask& a, const exec_mask& b); + + exec_mask m_exec; + exec_mask m_kernel_exec; + exec_mask m_continue_mask; +#ifdef _DEBUG + bool m_in_loop; +#endif + + CPPSPMD_FORCE_INLINE uint32_t get_movemask() const { return m_exec.get_movemask(); } + + void init(const exec_mask& kernel_exec); + + // Varying bool + + struct vbool + { + __m128i m_value; + + vbool() = default; + + CPPSPMD_FORCE_INLINE vbool(bool value) : m_value(_mm_set1_epi32(value ? UINT32_MAX : 0)) { } + + CPPSPMD_FORCE_INLINE explicit vbool(const __m128i& value) : m_value(value) { } + + CPPSPMD_FORCE_INLINE explicit operator vfloat() const; + CPPSPMD_FORCE_INLINE explicit operator vint() const; + + private: + vbool& operator=(const vbool&); + }; + + friend vbool operator!(const vbool& v); + + CPPSPMD_FORCE_INLINE vbool& store(vbool& dst, const vbool& src) + { + dst.m_value = blendv_mask_epi32(dst.m_value, src.m_value, m_exec.m_mask); + return dst; + } + + CPPSPMD_FORCE_INLINE vbool& store_all(vbool& dst, const vbool& src) + { + dst.m_value = src.m_value; + return dst; + } + + // Varying float + struct vfloat + { + __m128 m_value; + + vfloat() = default; + + CPPSPMD_FORCE_INLINE explicit vfloat(const __m128& v) : m_value(v) { } + + CPPSPMD_FORCE_INLINE vfloat(float value) : m_value(_mm_set1_ps(value)) { } + + CPPSPMD_FORCE_INLINE explicit vfloat(int value) : m_value(_mm_set1_ps((float)value)) { } + + private: + vfloat& operator=(const vfloat&); + }; + + CPPSPMD_FORCE_INLINE vfloat& store(vfloat& dst, const vfloat& src) + { + dst.m_value = blendv_mask_ps(dst.m_value, src.m_value, _mm_castsi128_ps(m_exec.m_mask)); + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat& store(vfloat&& dst, const vfloat& src) + { + dst.m_value = blendv_mask_ps(dst.m_value, src.m_value, _mm_castsi128_ps(m_exec.m_mask)); + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat& store_all(vfloat& dst, const vfloat& src) + { + dst.m_value = src.m_value; + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat& store_all(vfloat&& dst, const vfloat& src) + { + dst.m_value = src.m_value; + return dst; + } + + // Linear ref to floats + struct float_lref + { + float* m_pValue; + + private: + float_lref& operator=(const float_lref&); + }; + + CPPSPMD_FORCE_INLINE const float_lref& store(const float_lref& dst, const vfloat& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_ps(dst.m_pValue, src.m_value); + else + _mm_storeu_ps(dst.m_pValue, blendv_mask_ps(_mm_loadu_ps(dst.m_pValue), src.m_value, _mm_castsi128_ps(m_exec.m_mask))); + return dst; + } + + CPPSPMD_FORCE_INLINE const float_lref& store(const float_lref&& dst, const vfloat& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_ps(dst.m_pValue, src.m_value); + else + _mm_storeu_ps(dst.m_pValue, blendv_mask_ps(_mm_loadu_ps(dst.m_pValue), src.m_value, _mm_castsi128_ps(m_exec.m_mask))); + return dst; + } + + CPPSPMD_FORCE_INLINE const float_lref& store_all(const float_lref& dst, const vfloat& src) + { + _mm_storeu_ps(dst.m_pValue, src.m_value); + return dst; + } + + CPPSPMD_FORCE_INLINE const float_lref& store_all(const float_lref&& dst, const vfloat& src) + { + _mm_storeu_ps(dst.m_pValue, src.m_value); + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat load(const float_lref& src) + { + return vfloat{ _mm_and_ps(_mm_loadu_ps(src.m_pValue), _mm_castsi128_ps(m_exec.m_mask)) }; + } + + // Varying ref to floats + struct float_vref + { + __m128i m_vindex; + float* m_pValue; + + private: + float_vref& operator=(const float_vref&); + }; + + // Varying ref to varying float + struct vfloat_vref + { + __m128i m_vindex; + vfloat* m_pValue; + + private: + vfloat_vref& operator=(const vfloat_vref&); + }; + + // Varying ref to varying int + struct vint_vref + { + __m128i m_vindex; + vint* m_pValue; + + private: + vint_vref& operator=(const vint_vref&); + }; + + CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref& dst, const vfloat& src); + CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref&& dst, const vfloat& src); + + CPPSPMD_FORCE_INLINE const float_vref& store_all(const float_vref& dst, const vfloat& src); + CPPSPMD_FORCE_INLINE const float_vref& store_all(const float_vref&& dst, const vfloat& src); + + CPPSPMD_FORCE_INLINE vfloat load(const float_vref& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i *)vindex, src.m_vindex); + + CPPSPMD_ALIGN(16) float loaded[4]; + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + loaded[i] = src.m_pValue[vindex[i]]; + } + return vfloat{ _mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)loaded)) }; + } + + CPPSPMD_FORCE_INLINE vfloat load_all(const float_vref& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i *)vindex, src.m_vindex); + + CPPSPMD_ALIGN(16) float loaded[4]; + + for (int i = 0; i < 4; i++) + loaded[i] = src.m_pValue[vindex[i]]; + return vfloat{ _mm_load_ps((const float*)loaded) }; + } + + // Linear ref to ints + struct int_lref + { + int* m_pValue; + + private: + int_lref& operator=(const int_lref&); + }; + + CPPSPMD_FORCE_INLINE const int_lref& store(const int_lref& dst, const vint& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + { + _mm_storeu_si128((__m128i *)dst.m_pValue, src.m_value); + } + else + { + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i *)stored, src.m_value); + + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[i] = stored[i]; + } + } + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const int_lref& src) + { + __m128i v = _mm_loadu_si128((const __m128i*)src.m_pValue); + + v = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(v), _mm_castsi128_ps(m_exec.m_mask))); + + return vint{ v }; + } + + // Linear ref to int16's + struct int16_lref + { + int16_t* m_pValue; + + private: + int16_lref& operator=(const int16_lref&); + }; + + CPPSPMD_FORCE_INLINE const int16_lref& store(const int16_lref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i *)stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[i] = static_cast<int16_t>(stored[i]); + } + return dst; + } + + CPPSPMD_FORCE_INLINE const int16_lref& store_all(const int16_lref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i *)stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[i] = static_cast<int16_t>(stored[i]); + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const int16_lref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + for (int i = 0; i < 4; i++) + values[i] = static_cast<int16_t>(src.m_pValue[i]); + + __m128i t = _mm_load_si128( (const __m128i *)values ); + + return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps( t ), _mm_castsi128_ps(m_exec.m_mask))) }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const int16_lref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + for (int i = 0; i < 4; i++) + values[i] = static_cast<int16_t>(src.m_pValue[i]); + + __m128i t = _mm_load_si128( (const __m128i *)values ); + + return vint{ t }; + } + + // Linear ref to constant ints + struct cint_lref + { + const int* m_pValue; + + private: + cint_lref& operator=(const cint_lref&); + }; + + CPPSPMD_FORCE_INLINE vint load(const cint_lref& src) + { + __m128i v = _mm_loadu_si128((const __m128i *)src.m_pValue); + v = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(v), _mm_castsi128_ps(m_exec.m_mask))); + return vint{ v }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const cint_lref& src) + { + return vint{ _mm_loadu_si128((const __m128i *)src.m_pValue) }; + } + + // Varying ref to ints + struct int_vref + { + __m128i m_vindex; + int* m_pValue; + + private: + int_vref& operator=(const int_vref&); + }; + + // Varying ref to constant ints + struct cint_vref + { + __m128i m_vindex; + const int* m_pValue; + + private: + cint_vref& operator=(const cint_vref&); + }; + + // Varying int + struct vint + { + __m128i m_value; + + vint() = default; + + CPPSPMD_FORCE_INLINE explicit vint(const __m128i& value) : m_value(value) { } + + CPPSPMD_FORCE_INLINE explicit vint(const lint &other) : m_value(other.m_value) { } + + CPPSPMD_FORCE_INLINE vint& operator=(const lint& other) { m_value = other.m_value; return *this; } + + CPPSPMD_FORCE_INLINE vint(int value) : m_value(_mm_set1_epi32(value)) { } + + CPPSPMD_FORCE_INLINE explicit vint(float value) : m_value(_mm_set1_epi32((int)value)) { } + + CPPSPMD_FORCE_INLINE explicit vint(const vfloat& other) : m_value(_mm_cvttps_epi32(other.m_value)) { } + + CPPSPMD_FORCE_INLINE explicit operator vbool() const + { + return vbool{ _mm_xor_si128( _mm_load_si128((const __m128i*)g_allones_128), _mm_cmpeq_epi32(m_value, _mm_setzero_si128())) }; + } + + CPPSPMD_FORCE_INLINE explicit operator vfloat() const + { + return vfloat{ _mm_cvtepi32_ps(m_value) }; + } + + CPPSPMD_FORCE_INLINE int_vref operator[](int* ptr) const + { + return int_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE cint_vref operator[](const int* ptr) const + { + return cint_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE float_vref operator[](float* ptr) const + { + return float_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE vfloat_vref operator[](vfloat* ptr) const + { + return vfloat_vref{ m_value, ptr }; + } + + CPPSPMD_FORCE_INLINE vint_vref operator[](vint* ptr) const + { + return vint_vref{ m_value, ptr }; + } + + private: + vint& operator=(const vint&); + }; + + // Load/store linear int + CPPSPMD_FORCE_INLINE void storeu_linear(int *pDst, const vint& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_si128((__m128i *)pDst, src.m_value); + else + { + if (mask & 1) pDst[0] = extract_x(src.m_value); + if (mask & 2) pDst[1] = extract_y(src.m_value); + if (mask & 4) pDst[2] = extract_z(src.m_value); + if (mask & 8) pDst[3] = extract_w(src.m_value); + } + } + + CPPSPMD_FORCE_INLINE void storeu_linear_all(int *pDst, const vint& src) + { + _mm_storeu_si128((__m128i*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE void store_linear_all(int *pDst, const vint& src) + { + _mm_store_si128((__m128i*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE vint loadu_linear(const int *pSrc) + { + __m128i v = _mm_loadu_si128((const __m128i*)pSrc); + + v = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(v), _mm_castsi128_ps(m_exec.m_mask))); + + return vint{ v }; + } + + CPPSPMD_FORCE_INLINE vint loadu_linear_all(const int *pSrc) + { + return vint{ _mm_loadu_si128((__m128i*)pSrc) }; + } + + CPPSPMD_FORCE_INLINE vint load_linear_all(const int *pSrc) + { + return vint{ _mm_load_si128((__m128i*)pSrc) }; + } + + // Load/store linear float + CPPSPMD_FORCE_INLINE void storeu_linear(float *pDst, const vfloat& src) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + if (mask == ALL_ON_MOVEMASK) + _mm_storeu_ps((float*)pDst, src.m_value); + else + { + int *pDstI = (int *)pDst; + if (mask & 1) pDstI[0] = extract_ps_x(src.m_value); + if (mask & 2) pDstI[1] = extract_ps_y(src.m_value); + if (mask & 4) pDstI[2] = extract_ps_z(src.m_value); + if (mask & 8) pDstI[3] = extract_ps_w(src.m_value); + } + } + + CPPSPMD_FORCE_INLINE void storeu_linear_all(float *pDst, const vfloat& src) + { + _mm_storeu_ps((float*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE void store_linear_all(float *pDst, const vfloat& src) + { + _mm_store_ps((float*)pDst, src.m_value); + } + + CPPSPMD_FORCE_INLINE vfloat loadu_linear(const float *pSrc) + { + __m128 v = _mm_loadu_ps((const float*)pSrc); + + v = _mm_and_ps(v, _mm_castsi128_ps(m_exec.m_mask)); + + return vfloat{ v }; + } + + CPPSPMD_FORCE_INLINE vfloat loadu_linear_all(const float *pSrc) + { + return vfloat{ _mm_loadu_ps((float*)pSrc) }; + } + + CPPSPMD_FORCE_INLINE vfloat load_linear_all(const float *pSrc) + { + return vfloat{ _mm_load_ps((float*)pSrc) }; + } + + CPPSPMD_FORCE_INLINE vint& store(vint& dst, const vint& src) + { + dst.m_value = blendv_mask_epi32(dst.m_value, src.m_value, m_exec.m_mask); + return dst; + } + + CPPSPMD_FORCE_INLINE const int_vref& store(const int_vref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i*)stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[vindex[i]] = stored[i]; + } + return dst; + } + + CPPSPMD_FORCE_INLINE vint& store_all(vint& dst, const vint& src) + { + dst.m_value = src.m_value; + return dst; + } + + CPPSPMD_FORCE_INLINE const int_vref& store_all(const int_vref& dst, const vint& src) + { + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) int stored[4]; + _mm_store_si128((__m128i*)stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[vindex[i]] = stored[i]; + + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const int_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + values[i] = src.m_pValue[indices[i]]; + } + + return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)values))) }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const int_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + for (int i = 0; i < 4; i++) + values[i] = src.m_pValue[indices[i]]; + + return vint{ _mm_castps_si128( _mm_load_ps((const float*)values)) }; + } + + CPPSPMD_FORCE_INLINE vint load(const cint_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + values[i] = src.m_pValue[indices[i]]; + } + + return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)values))) }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const cint_vref& src) + { + CPPSPMD_ALIGN(16) int values[4]; + + CPPSPMD_ALIGN(16) int indices[4]; + _mm_store_si128((__m128i *)indices, src.m_vindex); + + for (int i = 0; i < 4; i++) + values[i] = src.m_pValue[indices[i]]; + + return vint{ _mm_castps_si128( _mm_load_ps((const float*)values)) }; + } + + CPPSPMD_FORCE_INLINE vint load_bytes_all(const cint_vref& src) + { + __m128i v0_l; + + const uint8_t* pSrc = (const uint8_t*)src.m_pValue; + v0_l = insert_x(_mm_undefined_si128(), ((int*)(pSrc + extract_x(src.m_vindex)))[0]); + v0_l = insert_y(v0_l, ((int*)(pSrc + extract_y(src.m_vindex)))[0]); + v0_l = insert_z(v0_l, ((int*)(pSrc + extract_z(src.m_vindex)))[0]); + v0_l = insert_w(v0_l, ((int*)(pSrc + extract_w(src.m_vindex)))[0]); + + return vint{ v0_l }; + } + + CPPSPMD_FORCE_INLINE vint load_words_all(const cint_vref& src) + { + __m128i v0_l; + + const uint8_t* pSrc = (const uint8_t*)src.m_pValue; + v0_l = insert_x(_mm_undefined_si128(), ((int16_t*)(pSrc + 2 * extract_x(src.m_vindex)))[0]); + v0_l = insert_y(v0_l, ((int16_t*)(pSrc + 2 * extract_y(src.m_vindex)))[0]); + v0_l = insert_z(v0_l, ((int16_t*)(pSrc + 2 * extract_z(src.m_vindex)))[0]); + v0_l = insert_w(v0_l, ((int16_t*)(pSrc + 2 * extract_w(src.m_vindex)))[0]); + + return vint{ v0_l }; + } + + CPPSPMD_FORCE_INLINE void store_strided(int *pDst, uint32_t stride, const vint &v) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) pDst[0] = extract_x(v.m_value); + if (mask & 2) pDst[stride] = extract_y(v.m_value); + if (mask & 4) pDst[stride*2] = extract_z(v.m_value); + if (mask & 8) pDst[stride*3] = extract_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE void store_strided(float *pDstF, uint32_t stride, const vfloat &v) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) ((int *)pDstF)[0] = extract_ps_x(v.m_value); + if (mask & 2) ((int *)pDstF)[stride] = extract_ps_y(v.m_value); + if (mask & 4) ((int *)pDstF)[stride*2] = extract_ps_z(v.m_value); + if (mask & 8) ((int *)pDstF)[stride*3] = extract_ps_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE void store_all_strided(int *pDst, uint32_t stride, const vint &v) + { + pDst[0] = extract_x(v.m_value); + pDst[stride] = extract_y(v.m_value); + pDst[stride*2] = extract_z(v.m_value); + pDst[stride*3] = extract_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE void store_all_strided(float *pDstF, uint32_t stride, const vfloat &v) + { + ((int *)pDstF)[0] = extract_ps_x(v.m_value); + ((int *)pDstF)[stride] = extract_ps_y(v.m_value); + ((int *)pDstF)[stride*2] = extract_ps_z(v.m_value); + ((int *)pDstF)[stride*3] = extract_ps_w(v.m_value); + } + + CPPSPMD_FORCE_INLINE vint load_strided(const int *pSrc, uint32_t stride) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) int vals[4] = { 0, 0, 0, 0 }; + if (mask & 1) vals[0] = pSrc[0]; + if (mask & 2) vals[1] = pSrc[stride]; + if (mask & 4) vals[2] = pSrc[stride * 2]; + if (mask & 8) vals[3] = pSrc[stride * 3]; + return vint{ _mm_load_si128((__m128i*)vals) }; +#else + const float* pSrcF = (const float*)pSrc; + __m128 v = _mm_setzero_ps(); + if (mask & 1) v = _mm_load_ss(pSrcF); + if (mask & 2) v = _mm_insert_ps(v, _mm_load_ss(pSrcF + stride), 0x10); + if (mask & 4) v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 2 * stride), 0x20); + if (mask & 8) v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 3 * stride), 0x30); + return vint{ _mm_castps_si128(v) }; +#endif + } + + CPPSPMD_FORCE_INLINE vfloat load_strided(const float *pSrc, uint32_t stride) + { + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) float vals[4] = { 0, 0, 0, 0 }; + if (mask & 1) vals[0] = pSrc[0]; + if (mask & 2) vals[1] = pSrc[stride]; + if (mask & 4) vals[2] = pSrc[stride * 2]; + if (mask & 8) vals[3] = pSrc[stride * 3]; + return vfloat{ _mm_load_ps(vals) }; +#else + __m128 v = _mm_setzero_ps(); + if (mask & 1) v = _mm_load_ss(pSrc); + if (mask & 2) v = _mm_insert_ps(v, _mm_load_ss(pSrc + stride), 0x10); + if (mask & 4) v = _mm_insert_ps(v, _mm_load_ss(pSrc + 2 * stride), 0x20); + if (mask & 8) v = _mm_insert_ps(v, _mm_load_ss(pSrc + 3 * stride), 0x30); + return vfloat{ v }; +#endif + } + + CPPSPMD_FORCE_INLINE vint load_all_strided(const int *pSrc, uint32_t stride) + { +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) int vals[4]; + vals[0] = pSrc[0]; + vals[1] = pSrc[stride]; + vals[2] = pSrc[stride * 2]; + vals[3] = pSrc[stride * 3]; + return vint{ _mm_load_si128((__m128i*)vals) }; +#else + const float* pSrcF = (const float*)pSrc; + __m128 v = _mm_load_ss(pSrcF); + v = _mm_insert_ps(v, _mm_load_ss(pSrcF + stride), 0x10); + v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 2 * stride), 0x20); + v = _mm_insert_ps(v, _mm_load_ss(pSrcF + 3 * stride), 0x30); + return vint{ _mm_castps_si128(v) }; +#endif + } + + CPPSPMD_FORCE_INLINE vfloat load_all_strided(const float *pSrc, uint32_t stride) + { +#if CPPSPMD_SSE2 + CPPSPMD_ALIGN(16) float vals[4]; + vals[0] = pSrc[0]; + vals[1] = pSrc[stride]; + vals[2] = pSrc[stride * 2]; + vals[3] = pSrc[stride * 3]; + return vfloat{ _mm_load_ps(vals) }; +#else + __m128 v = _mm_load_ss(pSrc); + v = _mm_insert_ps(v, _mm_load_ss(pSrc + stride), 0x10); + v = _mm_insert_ps(v, _mm_load_ss(pSrc + 2 * stride), 0x20); + v = _mm_insert_ps(v, _mm_load_ss(pSrc + 3 * stride), 0x30); + return vfloat{ v }; +#endif + } + + CPPSPMD_FORCE_INLINE const vfloat_vref& store(const vfloat_vref& dst, const vfloat& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) ((int *)(&dst.m_pValue[extract_x(dst.m_vindex)]))[0] = extract_x(_mm_castps_si128(src.m_value)); + if (mask & 2) ((int *)(&dst.m_pValue[extract_y(dst.m_vindex)]))[1] = extract_y(_mm_castps_si128(src.m_value)); + if (mask & 4) ((int *)(&dst.m_pValue[extract_z(dst.m_vindex)]))[2] = extract_z(_mm_castps_si128(src.m_value)); + if (mask & 8) ((int *)(&dst.m_pValue[extract_w(dst.m_vindex)]))[3] = extract_w(_mm_castps_si128(src.m_value)); + + return dst; + } + + CPPSPMD_FORCE_INLINE vfloat load(const vfloat_vref& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + __m128i k = _mm_setzero_si128(); + + if (mask & 1) k = insert_x(k, ((int *)(&src.m_pValue[extract_x(src.m_vindex)]))[0]); + if (mask & 2) k = insert_y(k, ((int *)(&src.m_pValue[extract_y(src.m_vindex)]))[1]); + if (mask & 4) k = insert_z(k, ((int *)(&src.m_pValue[extract_z(src.m_vindex)]))[2]); + if (mask & 8) k = insert_w(k, ((int *)(&src.m_pValue[extract_w(src.m_vindex)]))[3]); + + return vfloat{ _mm_castsi128_ps(k) }; + } + + CPPSPMD_FORCE_INLINE const vint_vref& store(const vint_vref& dst, const vint& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + if (mask & 1) ((int *)(&dst.m_pValue[extract_x(dst.m_vindex)]))[0] = extract_x(src.m_value); + if (mask & 2) ((int *)(&dst.m_pValue[extract_y(dst.m_vindex)]))[1] = extract_y(src.m_value); + if (mask & 4) ((int *)(&dst.m_pValue[extract_z(dst.m_vindex)]))[2] = extract_z(src.m_value); + if (mask & 8) ((int *)(&dst.m_pValue[extract_w(dst.m_vindex)]))[3] = extract_w(src.m_value); + + return dst; + } + + CPPSPMD_FORCE_INLINE vint load(const vint_vref& src) + { + // TODO: There's surely a better way + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + + __m128i k = _mm_setzero_si128(); + + if (mask & 1) k = insert_x(k, ((int *)(&src.m_pValue[extract_x(src.m_vindex)]))[0]); + if (mask & 2) k = insert_y(k, ((int *)(&src.m_pValue[extract_y(src.m_vindex)]))[1]); + if (mask & 4) k = insert_z(k, ((int *)(&src.m_pValue[extract_z(src.m_vindex)]))[2]); + if (mask & 8) k = insert_w(k, ((int *)(&src.m_pValue[extract_w(src.m_vindex)]))[3]); + + return vint{ k }; + } + + CPPSPMD_FORCE_INLINE vint load_all(const vint_vref& src) + { + // TODO: There's surely a better way + __m128i k; + + k = insert_x(k, ((int*)(&src.m_pValue[extract_x(src.m_vindex)]))[0]); + k = insert_y(k, ((int*)(&src.m_pValue[extract_y(src.m_vindex)]))[1]); + k = insert_z(k, ((int*)(&src.m_pValue[extract_z(src.m_vindex)]))[2]); + k = insert_w(k, ((int*)(&src.m_pValue[extract_w(src.m_vindex)]))[3]); + + return vint{ k }; + } + + // Linear integer + struct lint + { + __m128i m_value; + + CPPSPMD_FORCE_INLINE explicit lint(__m128i value) + : m_value(value) + { } + + CPPSPMD_FORCE_INLINE explicit operator vfloat() const + { + return vfloat{ _mm_cvtepi32_ps(m_value) }; + } + + CPPSPMD_FORCE_INLINE explicit operator vint() const + { + return vint{ m_value }; + } + + CPPSPMD_FORCE_INLINE int get_first_value() const + { + return _mm_cvtsi128_si32(m_value); + } + + CPPSPMD_FORCE_INLINE float_lref operator[](float* ptr) const + { + return float_lref{ ptr + get_first_value() }; + } + + CPPSPMD_FORCE_INLINE int_lref operator[](int* ptr) const + { + return int_lref{ ptr + get_first_value() }; + } + + CPPSPMD_FORCE_INLINE int16_lref operator[](int16_t* ptr) const + { + return int16_lref{ ptr + get_first_value() }; + } + + CPPSPMD_FORCE_INLINE cint_lref operator[](const int* ptr) const + { + return cint_lref{ ptr + get_first_value() }; + } + + private: + lint& operator=(const lint&); + }; + + CPPSPMD_FORCE_INLINE lint& store_all(lint& dst, const lint& src) + { + dst.m_value = src.m_value; + return dst; + } + + const lint program_index = lint{ _mm_set_epi32( 3, 2, 1, 0 ) }; + + // SPMD condition helpers + + template<typename IfBody> + CPPSPMD_FORCE_INLINE void spmd_if(const vbool& cond, const IfBody& ifBody); + + CPPSPMD_FORCE_INLINE void spmd_if_break(const vbool& cond); + + // No breaks, continues, etc. allowed + template<typename IfBody> + CPPSPMD_FORCE_INLINE void spmd_sif(const vbool& cond, const IfBody& ifBody); + + // No breaks, continues, etc. allowed + template<typename IfBody, typename ElseBody> + CPPSPMD_FORCE_INLINE void spmd_sifelse(const vbool& cond, const IfBody& ifBody, const ElseBody &elseBody); + + template<typename IfBody, typename ElseBody> + CPPSPMD_FORCE_INLINE void spmd_ifelse(const vbool& cond, const IfBody& ifBody, const ElseBody& elseBody); + + template<typename WhileCondBody, typename WhileBody> + CPPSPMD_FORCE_INLINE void spmd_while(const WhileCondBody& whileCondBody, const WhileBody& whileBody); + + template<typename ForInitBody, typename ForCondBody, typename ForIncrBody, typename ForBody> + CPPSPMD_FORCE_INLINE void spmd_for(const ForInitBody& forInitBody, const ForCondBody& forCondBody, const ForIncrBody& forIncrBody, const ForBody& forBody); + + template<typename ForeachBody> + CPPSPMD_FORCE_INLINE void spmd_foreach(int begin, int end, const ForeachBody& foreachBody); + +#ifdef _DEBUG + CPPSPMD_FORCE_INLINE void check_masks(); +#else + CPPSPMD_FORCE_INLINE void check_masks() { } +#endif + + CPPSPMD_FORCE_INLINE void spmd_break(); + CPPSPMD_FORCE_INLINE void spmd_continue(); + + CPPSPMD_FORCE_INLINE void spmd_return(); + + template<typename UnmaskedBody> + CPPSPMD_FORCE_INLINE void spmd_unmasked(const UnmaskedBody& unmaskedBody); + + template<typename SPMDKernel, typename... Args> + //CPPSPMD_FORCE_INLINE decltype(auto) spmd_call(Args&&... args); + CPPSPMD_FORCE_INLINE void spmd_call(Args&&... args); + + CPPSPMD_FORCE_INLINE void swap(vint &a, vint &b) { vint temp = a; store(a, b); store(b, temp); } + CPPSPMD_FORCE_INLINE void swap(vfloat &a, vfloat &b) { vfloat temp = a; store(a, b); store(b, temp); } + CPPSPMD_FORCE_INLINE void swap(vbool &a, vbool &b) { vbool temp = a; store(a, b); store(b, temp); } + + CPPSPMD_FORCE_INLINE float reduce_add(vfloat v) + { + __m128 k3210 = _mm_castsi128_ps(blendv_mask_epi32(_mm_setzero_si128(), _mm_castps_si128(v.m_value), m_exec.m_mask)); + +//#if CPPSPMD_SSE2 +#if 1 + // See https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-sse-vector-sum-or-other-reduction/35270026#35270026 + __m128 shuf = _mm_shuffle_ps(k3210, k3210, _MM_SHUFFLE(2, 3, 0, 1)); + __m128 sums = _mm_add_ps(k3210, shuf); + shuf = _mm_movehl_ps(shuf, sums); + sums = _mm_add_ss(sums, shuf); + return _mm_cvtss_f32(sums); +#else + // This is pretty slow. + __m128 a = _mm_hadd_ps(k3210, k3210); + __m128 b = _mm_hadd_ps(a, a); + return extractf_ps_x(b); +#endif + } + + CPPSPMD_FORCE_INLINE int reduce_add(vint v) + { + __m128i k3210 = blendv_mask_epi32(_mm_setzero_si128(), v.m_value, m_exec.m_mask); + + // See https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-sse-vector-sum-or-other-reduction/35270026#35270026 + __m128i shuf = _mm_shuffle_epi32(k3210, _MM_SHUFFLE(2, 3, 0, 1)); + __m128i sums = _mm_add_epi32(k3210, shuf); + shuf = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(shuf), _mm_castsi128_ps(sums))); + sums = _mm_add_epi32(sums, shuf); + return extract_x(sums); + } + + #include "cppspmd_math_declares.h" + +}; // struct spmd_kernel + +using exec_mask = spmd_kernel::exec_mask; +using vint = spmd_kernel::vint; +using int_lref = spmd_kernel::int_lref; +using cint_vref = spmd_kernel::cint_vref; +using cint_lref = spmd_kernel::cint_lref; +using int_vref = spmd_kernel::int_vref; +using lint = spmd_kernel::lint; +using vbool = spmd_kernel::vbool; +using vfloat = spmd_kernel::vfloat; +using float_lref = spmd_kernel::float_lref; +using float_vref = spmd_kernel::float_vref; +using vfloat_vref = spmd_kernel::vfloat_vref; +using vint_vref = spmd_kernel::vint_vref; + +CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vfloat() const +{ + return vfloat { _mm_and_ps( _mm_castsi128_ps(m_value), *(const __m128 *)g_onef_128 ) }; +} + +// Returns UINT32_MAX's for true, 0 for false. (Should it return 1's?) +CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vint() const +{ + return vint { m_value }; +} + +CPPSPMD_FORCE_INLINE vbool operator!(const vbool& v) +{ + return vbool{ _mm_castps_si128(_mm_xor_ps(_mm_load_ps((const float*)g_allones_128), _mm_castsi128_ps(v.m_value))) }; +} + +CPPSPMD_FORCE_INLINE exec_mask::exec_mask(const vbool& b) { m_mask = b.m_value; } + +CPPSPMD_FORCE_INLINE exec_mask operator^(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_xor_si128(a.m_mask, b.m_mask) }; } +CPPSPMD_FORCE_INLINE exec_mask operator&(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_and_si128(a.m_mask, b.m_mask) }; } +CPPSPMD_FORCE_INLINE exec_mask operator|(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_or_si128(a.m_mask, b.m_mask) }; } + +CPPSPMD_FORCE_INLINE bool all(const exec_mask& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_mask)) == ALL_ON_MOVEMASK; } +CPPSPMD_FORCE_INLINE bool any(const exec_mask& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_mask)) != 0; } + +// Bad pattern - doesn't factor in the current exec mask. Prefer spmd_any() instead. +CPPSPMD_FORCE_INLINE bool all(const vbool& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_value)) == ALL_ON_MOVEMASK; } +CPPSPMD_FORCE_INLINE bool any(const vbool& e) { return _mm_movemask_ps(_mm_castsi128_ps(e.m_value)) != 0; } + +CPPSPMD_FORCE_INLINE exec_mask andnot(const exec_mask& a, const exec_mask& b) { return exec_mask{ _mm_andnot_si128(a.m_mask, b.m_mask) }; } +CPPSPMD_FORCE_INLINE vbool operator||(const vbool& a, const vbool& b) { return vbool{ _mm_or_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator&&(const vbool& a, const vbool& b) { return vbool{ _mm_and_si128(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vfloat operator+(const vfloat& a, const vfloat& b) { return vfloat{ _mm_add_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, const vfloat& b) { return vfloat{ _mm_sub_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator+(float a, const vfloat& b) { return vfloat(a) + b; } +CPPSPMD_FORCE_INLINE vfloat operator+(const vfloat& a, float b) { return a + vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, const vint& b) { return a - vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(const vint& a, const vfloat& b) { return vfloat(a) - b; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, int b) { return a - vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(int a, const vfloat& b) { return vfloat(a) - b; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& a, float b) { return a - vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator-(float a, const vfloat& b) { return vfloat(a) - b; } + +CPPSPMD_FORCE_INLINE vfloat operator*(const vfloat& a, const vfloat& b) { return vfloat{ _mm_mul_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator*(const vfloat& a, float b) { return a * vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(float a, const vfloat& b) { return vfloat(a) * b; } +CPPSPMD_FORCE_INLINE vfloat operator*(const vfloat& a, int b) { return a * vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(int a, const vfloat& b) { return vfloat(a) * b; } + +CPPSPMD_FORCE_INLINE vfloat operator/(const vfloat& a, const vfloat& b) { return vfloat{ _mm_div_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat operator/(const vfloat& a, int b) { return a / vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator/(int a, const vfloat& b) { return vfloat(a) / b; } +CPPSPMD_FORCE_INLINE vfloat operator/(const vfloat& a, float b) { return a / vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator/(float a, const vfloat& b) { return vfloat(a) / b; } +CPPSPMD_FORCE_INLINE vfloat operator-(const vfloat& v) { return vfloat{ _mm_sub_ps(_mm_xor_ps(v.m_value, v.m_value), v.m_value) }; } + +CPPSPMD_FORCE_INLINE vbool operator==(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmpeq_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator==(const vfloat& a, float b) { return a == vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator!=(const vfloat& a, const vfloat& b) { return !vbool{ _mm_castps_si128(_mm_cmpeq_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator!=(const vfloat& a, float b) { return a != vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator<(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmplt_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator<(const vfloat& a, float b) { return a < vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator>(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmpgt_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator>(const vfloat& a, float b) { return a > vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator<=(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmple_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator<=(const vfloat& a, float b) { return a <= vfloat(b); } + +CPPSPMD_FORCE_INLINE vbool operator>=(const vfloat& a, const vfloat& b) { return vbool{ _mm_castps_si128(_mm_cmpge_ps(a.m_value, b.m_value)) }; } +CPPSPMD_FORCE_INLINE vbool operator>=(const vfloat& a, float b) { return a >= vfloat(b); } + +CPPSPMD_FORCE_INLINE vfloat spmd_ternaryf(const vbool& cond, const vfloat& a, const vfloat& b) { return vfloat{ blendv_mask_ps(b.m_value, a.m_value, _mm_castsi128_ps(cond.m_value)) }; } +CPPSPMD_FORCE_INLINE vint spmd_ternaryi(const vbool& cond, const vint& a, const vint& b) { return vint{ blendv_mask_epi32(b.m_value, a.m_value, cond.m_value) }; } + +CPPSPMD_FORCE_INLINE vfloat sqrt(const vfloat& v) { return vfloat{ _mm_sqrt_ps(v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat abs(const vfloat& v) { return vfloat{ _mm_andnot_ps(_mm_set1_ps(-0.0f), v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat max(const vfloat& a, const vfloat& b) { return vfloat{ _mm_max_ps(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat min(const vfloat& a, const vfloat& b) { return vfloat{ _mm_min_ps(a.m_value, b.m_value) }; } + +#if CPPSPMD_SSE2 +CPPSPMD_FORCE_INLINE vfloat round_truncate(const vfloat& a) +{ + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU) ); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); + + __m128i ai = _mm_cvttps_epi32(a.m_value); + + __m128 af = _mm_cvtepi32_ps(ai); + return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; +} + +CPPSPMD_FORCE_INLINE vfloat floor(const vfloat& a) +{ + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); + + __m128i ai = _mm_cvtps_epi32(a.m_value); + __m128 af = _mm_cvtepi32_ps(ai); + __m128 changed = _mm_cvtepi32_ps(_mm_castps_si128(_mm_cmpgt_ps(af, a.m_value))); + + af = _mm_add_ps(af, changed); + + return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; +} + +CPPSPMD_FORCE_INLINE vfloat ceil(const vfloat& a) +{ + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); + + __m128i ai = _mm_cvtps_epi32(a.m_value); + __m128 af = _mm_cvtepi32_ps(ai); + __m128 changed = _mm_cvtepi32_ps(_mm_castps_si128(_mm_cmplt_ps(af, a.m_value))); + + af = _mm_sub_ps(af, changed); + + return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; +} + +// We need to disable unsafe math optimizations for the key operations used for rounding to nearest. +// I wish there was a better way. +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__) +inline __m128 add_sub(__m128 a, __m128 b) __attribute__((optimize("-fno-unsafe-math-optimizations"))) +#elif defined(__clang__) +inline __m128 add_sub(__m128 a, __m128 b) __attribute__((optnone)) +#elif defined (_MSC_VER) +#pragma float_control(push) +#pragma float_control(precise, on) +inline __m128 add_sub(__m128 a, __m128 b) +#else +inline __m128 add_sub(__m128 a, __m128 b) +#endif +{ + return _mm_sub_ps(_mm_add_ps(a, b), b); +} + +#if defined (_MSC_VER) +#pragma float_control(pop) +#endif + +CPPSPMD_FORCE_INLINE vfloat round_nearest(const vfloat& a) +{ + __m128i no_fract_fp_bits = _mm_castps_si128(_mm_set1_ps(8388608.0f)); + + __m128i sign_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x80000000U)); + __m128 force_int = _mm_castsi128_ps(_mm_or_si128(no_fract_fp_bits, sign_a)); + + // Can't use individual _mm_add_ps/_mm_sub_ps - this will be optimized out with /fp:fast by clang and probably other compilers. + //__m128 temp1 = _mm_add_ps(a.m_value, force_int); + //__m128 temp2 = _mm_sub_ps(temp1, force_int); + __m128 temp2 = add_sub(a.m_value, force_int); + + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); + __m128i has_fractional = _mm_cmplt_epi32(abs_a, no_fract_fp_bits); + return vfloat{ blendv_mask_ps(a.m_value, temp2, _mm_castsi128_ps(has_fractional)) }; +} + +#else +CPPSPMD_FORCE_INLINE vfloat floor(const vfloat& v) { return vfloat{ _mm_floor_ps(v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat ceil(const vfloat& a) { return vfloat{ _mm_ceil_ps(a.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat round_nearest(const vfloat &a) { return vfloat{ _mm_round_ps(a.m_value, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC ) }; } +CPPSPMD_FORCE_INLINE vfloat round_truncate(const vfloat &a) { return vfloat{ _mm_round_ps(a.m_value, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC ) }; } +#endif + +CPPSPMD_FORCE_INLINE vfloat frac(const vfloat& a) { return a - floor(a); } +CPPSPMD_FORCE_INLINE vfloat fmod(vfloat a, vfloat b) { vfloat c = frac(abs(a / b)) * abs(b); return spmd_ternaryf(a < 0, -c, c); } +CPPSPMD_FORCE_INLINE vfloat sign(const vfloat& a) { return spmd_ternaryf(a < 0.0f, 1.0f, 1.0f); } + +CPPSPMD_FORCE_INLINE vint max(const vint& a, const vint& b) { return vint{ max_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint min(const vint& a, const vint& b) { return vint{ min_epi32(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint maxu(const vint& a, const vint& b) { return vint{ max_epu32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint minu(const vint& a, const vint& b) { return vint{ min_epu32(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint abs(const vint& v) { return vint{ abs_epi32(v.m_value) }; } + +CPPSPMD_FORCE_INLINE vint byteswap(const vint& v) { return vint{ shuffle_epi8(v.m_value, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)) }; } + +CPPSPMD_FORCE_INLINE vint cast_vfloat_to_vint(const vfloat& v) { return vint{ _mm_castps_si128(v.m_value) }; } +CPPSPMD_FORCE_INLINE vfloat cast_vint_to_vfloat(const vint& v) { return vfloat{ _mm_castsi128_ps(v.m_value) }; } + +CPPSPMD_FORCE_INLINE vfloat clamp(const vfloat& v, const vfloat& a, const vfloat& b) +{ + return vfloat{ _mm_min_ps(b.m_value, _mm_max_ps(v.m_value, a.m_value) ) }; +} + +CPPSPMD_FORCE_INLINE vint clamp(const vint& v, const vint& a, const vint& b) +{ + return vint{ min_epi32(b.m_value, max_epi32(v.m_value, a.m_value) ) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfma(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_add_ps(_mm_mul_ps(a.m_value, b.m_value), c.m_value) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfms(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_sub_ps(_mm_mul_ps(a.m_value, b.m_value), c.m_value) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfnma(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_sub_ps(c.m_value, _mm_mul_ps(a.m_value, b.m_value)) }; +} + +CPPSPMD_FORCE_INLINE vfloat vfnms(const vfloat& a, const vfloat& b, const vfloat& c) +{ + return vfloat{ _mm_sub_ps(_mm_sub_ps(_mm_xor_ps(a.m_value, a.m_value), _mm_mul_ps(a.m_value, b.m_value)), c.m_value) }; +} + +CPPSPMD_FORCE_INLINE vfloat lerp(const vfloat &x, const vfloat &y, const vfloat &s) { return vfma(y - x, s, x); } + +CPPSPMD_FORCE_INLINE lint operator+(int a, const lint& b) { return lint{ _mm_add_epi32(_mm_set1_epi32(a), b.m_value) }; } +CPPSPMD_FORCE_INLINE lint operator+(const lint& a, int b) { return lint{ _mm_add_epi32(a.m_value, _mm_set1_epi32(b)) }; } +CPPSPMD_FORCE_INLINE vfloat operator+(float a, const lint& b) { return vfloat(a) + vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator+(const lint& a, float b) { return vfloat(a) + vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(const lint& a, float b) { return vfloat(a) * vfloat(b); } +CPPSPMD_FORCE_INLINE vfloat operator*(float b, const lint& a) { return vfloat(a) * vfloat(b); } + +CPPSPMD_FORCE_INLINE vint operator&(const vint& a, const vint& b) { return vint{ _mm_and_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator&(const vint& a, int b) { return a & vint(b); } +CPPSPMD_FORCE_INLINE vint andnot(const vint& a, const vint& b) { return vint{ _mm_andnot_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator|(const vint& a, const vint& b) { return vint{ _mm_or_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator|(const vint& a, int b) { return a | vint(b); } +CPPSPMD_FORCE_INLINE vint operator^(const vint& a, const vint& b) { return vint{ _mm_xor_si128(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator^(const vint& a, int b) { return a ^ vint(b); } +CPPSPMD_FORCE_INLINE vbool operator==(const vint& a, const vint& b) { return vbool{ _mm_cmpeq_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator!=(const vint& a, const vint& b) { return !vbool{ _mm_cmpeq_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator<(const vint& a, const vint& b) { return vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator<=(const vint& a, const vint& b) { return !vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>=(const vint& a, const vint& b) { return !vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>(const vint& a, const vint& b) { return vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator+(const vint& a, const vint& b) { return vint{ _mm_add_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator-(const vint& a, const vint& b) { return vint{ _mm_sub_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator+(const vint& a, int b) { return a + vint(b); } +CPPSPMD_FORCE_INLINE vint operator-(const vint& a, int b) { return a - vint(b); } +CPPSPMD_FORCE_INLINE vint operator+(int a, const vint& b) { return vint(a) + b; } +CPPSPMD_FORCE_INLINE vint operator-(int a, const vint& b) { return vint(a) - b; } +CPPSPMD_FORCE_INLINE vint operator*(const vint& a, const vint& b) { return vint{ mullo_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint operator*(const vint& a, int b) { return a * vint(b); } +CPPSPMD_FORCE_INLINE vint operator*(int a, const vint& b) { return vint(a) * b; } + +CPPSPMD_FORCE_INLINE vint mulhiu(const vint& a, const vint& b) { return vint{ mulhi_epu32(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint operator-(const vint& v) { return vint{ _mm_sub_epi32(_mm_setzero_si128(), v.m_value) }; } + +CPPSPMD_FORCE_INLINE vint operator~(const vint& a) { return vint{ -a - 1 }; } + +// A few of these break the lane-based abstraction model. They are supported in SSE2, so it makes sense to support them and let the user figure it out. +CPPSPMD_FORCE_INLINE vint adds_epu8(const vint& a, const vint& b) { return vint{ _mm_adds_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epu8(const vint& a, const vint& b) { return vint{ _mm_subs_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint avg_epu8(const vint & a, const vint & b) { return vint{ _mm_avg_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint max_epu8(const vint& a, const vint& b) { return vint{ _mm_max_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint min_epu8(const vint& a, const vint& b) { return vint{ _mm_min_epu8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint sad_epu8(const vint& a, const vint& b) { return vint{ _mm_sad_epu8(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint add_epi8(const vint& a, const vint& b) { return vint{ _mm_add_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint adds_epi8(const vint& a, const vint& b) { return vint{ _mm_adds_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint sub_epi8(const vint& a, const vint& b) { return vint{ _mm_sub_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epi8(const vint& a, const vint& b) { return vint{ _mm_subs_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpeq_epi8(const vint& a, const vint& b) { return vint{ _mm_cmpeq_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpgt_epi8(const vint& a, const vint& b) { return vint{ _mm_cmpgt_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmplt_epi8(const vint& a, const vint& b) { return vint{ _mm_cmplt_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint unpacklo_epi8(const vint& a, const vint& b) { return vint{ _mm_unpacklo_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint unpackhi_epi8(const vint& a, const vint& b) { return vint{ _mm_unpackhi_epi8(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE int movemask_epi8(const vint& a) { return _mm_movemask_epi8(a.m_value); } +CPPSPMD_FORCE_INLINE int movemask_epi32(const vint& a) { return _mm_movemask_ps(_mm_castsi128_ps(a.m_value)); } + +CPPSPMD_FORCE_INLINE vint cmple_epu8(const vint& a, const vint& b) { return vint{ _mm_cmpeq_epi8(_mm_min_epu8(a.m_value, b.m_value), a.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpge_epu8(const vint& a, const vint& b) { return vint{ cmple_epu8(b, a) }; } +CPPSPMD_FORCE_INLINE vint cmpgt_epu8(const vint& a, const vint& b) { return vint{ _mm_andnot_si128(_mm_cmpeq_epi8(a.m_value, b.m_value), _mm_cmpeq_epi8(_mm_max_epu8(a.m_value, b.m_value), a.m_value)) }; } +CPPSPMD_FORCE_INLINE vint cmplt_epu8(const vint& a, const vint& b) { return vint{ cmpgt_epu8(b, a) }; } +CPPSPMD_FORCE_INLINE vint absdiff_epu8(const vint& a, const vint& b) { return vint{ _mm_or_si128(_mm_subs_epu8(a.m_value, b.m_value), _mm_subs_epu8(b.m_value, a.m_value)) }; } + +CPPSPMD_FORCE_INLINE vint blendv_epi8(const vint& a, const vint& b, const vint &mask) { return vint{ blendv_epi8(a.m_value, b.m_value, _mm_cmplt_epi8(mask.m_value, _mm_setzero_si128())) }; } +CPPSPMD_FORCE_INLINE vint blendv_epi32(const vint& a, const vint& b, const vint &mask) { return vint{ blendv_epi32(a.m_value, b.m_value, mask.m_value) }; } + +CPPSPMD_FORCE_INLINE vint add_epi16(const vint& a, const vint& b) { return vint{ _mm_add_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint adds_epi16(const vint& a, const vint& b) { return vint{ _mm_adds_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint adds_epu16(const vint& a, const vint& b) { return vint{ _mm_adds_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint avg_epu16(const vint& a, const vint& b) { return vint{ _mm_avg_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint sub_epi16(const vint& a, const vint& b) { return vint{ _mm_sub_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epi16(const vint& a, const vint& b) { return vint{ _mm_subs_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint subs_epu16(const vint& a, const vint& b) { return vint{ _mm_subs_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint mullo_epi16(const vint& a, const vint& b) { return vint{ _mm_mullo_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint mulhi_epi16(const vint& a, const vint& b) { return vint{ _mm_mulhi_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint mulhi_epu16(const vint& a, const vint& b) { return vint{ _mm_mulhi_epu16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint min_epi16(const vint& a, const vint& b) { return vint{ _mm_min_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint max_epi16(const vint& a, const vint& b) { return vint{ _mm_max_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint madd_epi16(const vint& a, const vint& b) { return vint{ _mm_madd_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpeq_epi16(const vint& a, const vint& b) { return vint{ _mm_cmpeq_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmpgt_epi16(const vint& a, const vint& b) { return vint{ _mm_cmpgt_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint cmplt_epi16(const vint& a, const vint& b) { return vint{ _mm_cmplt_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint packs_epi16(const vint& a, const vint& b) { return vint{ _mm_packs_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint packus_epi16(const vint& a, const vint& b) { return vint{ _mm_packus_epi16(a.m_value, b.m_value) }; } + +CPPSPMD_FORCE_INLINE vint uniform_shift_left_epi16(const vint& a, const vint& b) { return vint{ _mm_sll_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint uniform_arith_shift_right_epi16(const vint& a, const vint& b) { return vint{ _mm_sra_epi16(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vint uniform_shift_right_epi16(const vint& a, const vint& b) { return vint{ _mm_srl_epi16(a.m_value, b.m_value) }; } + +#define VINT_SHIFT_LEFT_EPI16(a, b) vint(_mm_slli_epi16((a).m_value, b)) +#define VINT_SHIFT_RIGHT_EPI16(a, b) vint(_mm_srai_epi16((a).m_value, b)) +#define VUINT_SHIFT_RIGHT_EPI16(a, b) vint(_mm_srli_epi16((a).m_value, b)) + +CPPSPMD_FORCE_INLINE vint undefined_vint() { return vint{ _mm_undefined_si128() }; } +CPPSPMD_FORCE_INLINE vfloat undefined_vfloat() { return vfloat{ _mm_undefined_ps() }; } + +// control is an 8-bit immediate value containing 4 2-bit indices which shuffles the int32's in each 128-bit lane. +#define VINT_LANE_SHUFFLE_EPI32(a, control) vint(_mm_shuffle_epi32((a).m_value, control)) + +// control is an 8-bit immediate value containing 4 2-bit indices which shuffles the int16's in either the high or low 64-bit lane. +#define VINT_LANE_SHUFFLELO_EPI16(a, control) vint(_mm_shufflelo_epi16((a).m_value, control)) +#define VINT_LANE_SHUFFLEHI_EPI16(a, control) vint(_mm_shufflehi_epi16((a).m_value, control)) + +#define VINT_LANE_SHUFFLE_MASK(a, b, c, d) ((a) | ((b) << 2) | ((c) << 4) | ((d) << 6)) +#define VINT_LANE_SHUFFLE_MASK_R(d, c, b, a) ((a) | ((b) << 2) | ((c) << 4) | ((d) << 6)) + +#define VINT_LANE_SHIFT_LEFT_BYTES(a, l) vint(_mm_slli_si128((a).m_value, l)) +#define VINT_LANE_SHIFT_RIGHT_BYTES(a, l) vint(_mm_srli_si128((a).m_value, l)) + +// Unpack and interleave 8-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi8(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi8(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi8(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi8(a.m_value, b.m_value)); } + +// Unpack and interleave 16-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi16(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi16(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi16(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi16(a.m_value, b.m_value)); } + +// Unpack and interleave 32-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi32(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi32(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi32(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi32(a.m_value, b.m_value)); } + +// Unpack and interleave 64-bit integers from the low or high half of a and b +CPPSPMD_FORCE_INLINE vint vint_lane_unpacklo_epi64(const vint& a, const vint& b) { return vint(_mm_unpacklo_epi64(a.m_value, b.m_value)); } +CPPSPMD_FORCE_INLINE vint vint_lane_unpackhi_epi64(const vint& a, const vint& b) { return vint(_mm_unpackhi_epi64(a.m_value, b.m_value)); } + +CPPSPMD_FORCE_INLINE vint vint_set1_epi8(int8_t a) { return vint(_mm_set1_epi8(a)); } +CPPSPMD_FORCE_INLINE vint vint_set1_epi16(int16_t a) { return vint(_mm_set1_epi16(a)); } +CPPSPMD_FORCE_INLINE vint vint_set1_epi32(int32_t a) { return vint(_mm_set1_epi32(a)); } +CPPSPMD_FORCE_INLINE vint vint_set1_epi64(int64_t a) { return vint(_mm_set1_epi64x(a)); } + +CPPSPMD_FORCE_INLINE vint mul_epu32(const vint &a, const vint& b) { return vint(_mm_mul_epu32(a.m_value, b.m_value)); } + +CPPSPMD_FORCE_INLINE vint div_epi32(const vint &a, const vint& b) +{ + __m128d al = _mm_cvtepi32_pd(a.m_value); + __m128d ah = _mm_cvtepi32_pd(_mm_unpackhi_epi64(a.m_value, a.m_value)); + + __m128d bl = _mm_cvtepi32_pd(b.m_value); + __m128d bh = _mm_cvtepi32_pd(_mm_unpackhi_epi64(b.m_value, b.m_value)); + + __m128d rl = _mm_div_pd(al, bl); + __m128d rh = _mm_div_pd(ah, bh); + + __m128i rli = _mm_cvttpd_epi32(rl); + __m128i rhi = _mm_cvttpd_epi32(rh); + + return vint(_mm_unpacklo_epi64(rli, rhi)); +} + +CPPSPMD_FORCE_INLINE vint mod_epi32(const vint &a, const vint& b) +{ + vint aa = abs(a), ab = abs(b); + vint q = div_epi32(aa, ab); + vint r = aa - q * ab; + return spmd_ternaryi(a < 0, -r, r); +} + +CPPSPMD_FORCE_INLINE vint operator/ (const vint& a, const vint& b) +{ + return div_epi32(a, b); +} + +CPPSPMD_FORCE_INLINE vint operator/ (const vint& a, int b) +{ + return div_epi32(a, vint(b)); +} + +CPPSPMD_FORCE_INLINE vint operator% (const vint& a, const vint& b) +{ + return mod_epi32(a, b); +} + +CPPSPMD_FORCE_INLINE vint operator% (const vint& a, int b) +{ + return mod_epi32(a, vint(b)); +} + +CPPSPMD_FORCE_INLINE vint operator<< (const vint& a, const vint& b) +{ +#if 0 + CPPSPMD_ALIGN(32) int result[4]; + result[0] = extract_x(a.m_value) << extract_x(b.m_value); + result[1] = extract_y(a.m_value) << extract_y(b.m_value); + result[2] = extract_z(a.m_value) << extract_z(b.m_value); + result[3] = extract_w(a.m_value) << extract_w(b.m_value); + + return vint{ _mm_load_si128((__m128i*)result) }; +#elif 0 + int x = extract_x(a.m_value) << extract_x(b.m_value); + int y = extract_y(a.m_value) << extract_y(b.m_value); + int z = extract_z(a.m_value) << extract_z(b.m_value); + int w = extract_w(a.m_value) << extract_w(b.m_value); + + __m128i v = insert_x(_mm_undefined_si128(), x); + v = insert_y(v, y); + v = insert_z(v, z); + return vint{ insert_w(v, w) }; +#else + // What this does: shift left each b lane by 23 bits (to move the shift amount into the FP exponent position), then epi32 add to the integer rep of 1.0f, then cast that to float, then convert that to int to get fast 2^x. + return a * vint(cast_vint_to_vfloat(vint(_mm_slli_epi32(b.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f)))); +#endif +} + +// uniform shift left +CPPSPMD_FORCE_INLINE vint operator<< (const vint& a, int b) +{ + __m128i bv = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(b)), _mm_castsi128_ps(_mm_load_si128((const __m128i *)g_x_128)))); + return vint{ _mm_sll_epi32(a.m_value, bv) }; +} + +// uniform arithmetic shift right +CPPSPMD_FORCE_INLINE vint operator>> (const vint& a, int b) +{ + __m128i bv = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(b)), _mm_castsi128_ps(_mm_load_si128((const __m128i *)g_x_128)))); + return vint{ _mm_sra_epi32(a.m_value, bv) }; +} + +// uniform shift right +CPPSPMD_FORCE_INLINE vint vuint_shift_right(const vint& a, int b) +{ + __m128i bv = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(_mm_set1_epi32(b)), _mm_castsi128_ps(_mm_load_si128((const __m128i *)g_x_128)))); + return vint{ _mm_srl_epi32(a.m_value, bv) }; +} + +CPPSPMD_FORCE_INLINE vint vuint_shift_right(const vint& a, const vint& b) +{ +#if 0 + CPPSPMD_ALIGN(32) int result[4]; + result[0] = ((uint32_t)extract_x(a.m_value)) >> extract_x(b.m_value); + result[1] = ((uint32_t)extract_y(a.m_value)) >> extract_y(b.m_value); + result[2] = ((uint32_t)extract_z(a.m_value)) >> extract_z(b.m_value); + result[3] = ((uint32_t)extract_w(a.m_value)) >> extract_w(b.m_value); + + return vint{ _mm_load_si128((__m128i*)result) }; +#elif 0 + uint32_t x = ((uint32_t)extract_x(a.m_value)) >> ((uint32_t)extract_x(b.m_value)); + uint32_t y = ((uint32_t)extract_y(a.m_value)) >> ((uint32_t)extract_y(b.m_value)); + uint32_t z = ((uint32_t)extract_z(a.m_value)) >> ((uint32_t)extract_z(b.m_value)); + uint32_t w = ((uint32_t)extract_w(a.m_value)) >> ((uint32_t)extract_w(b.m_value)); + + __m128i v = insert_x(_mm_undefined_si128(), x); + v = insert_y(v, y); + v = insert_z(v, z); + return vint{ insert_w(v, w) }; +#else + //vint inv_shift = 32 - b; + //vfloat f = cast_vint_to_vfloat(vint(_mm_slli_epi32(inv_shift.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f))); + + // Take float rep of 1.0f (0x3f800000), subtract (32<<23), subtract (shift<<23), cast to float. + vfloat f = cast_vint_to_vfloat(vint(_mm_sub_epi32(_mm_set1_epi32(0x4f800000), _mm_slli_epi32(b.m_value, 23)))); + + // Now convert scale factor to integer. + vint r = vint(f); + + // mulhi_epu32 (using two _mm_mul_epu32), to emulate varying shift left. + vint q(mulhi_epu32(a.m_value, r.m_value)); + + // Handle shift amounts of 0. + return spmd_ternaryi(b > 0, q, a); +#endif +} + +CPPSPMD_FORCE_INLINE vint vuint_shift_right_not_zero(const vint& a, const vint& b) +{ + //vint inv_shift = 32 - b; + //vfloat f = cast_vint_to_vfloat(vint(_mm_slli_epi32(inv_shift.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f))); + + // Take float rep of 1.0f (0x3f800000), subtract (32<<23), subtract (shift<<23), cast to float. + vfloat f = cast_vint_to_vfloat(vint(_mm_sub_epi32(_mm_set1_epi32(0x4f800000), _mm_slli_epi32(b.m_value, 23)))); + + // Now convert scale factor to integer. + vint r = vint(f); + + // mulhi_epu32 (using two _mm_mul_epu32), to emulate varying shift left. + return vint(mulhi_epu32(a.m_value, r.m_value)); +} + +CPPSPMD_FORCE_INLINE vint operator>> (const vint& a, const vint& b) +{ +#if 0 + CPPSPMD_ALIGN(32) int result[4]; + result[0] = extract_x(a.m_value) >> extract_x(b.m_value); + result[1] = extract_y(a.m_value) >> extract_y(b.m_value); + result[2] = extract_z(a.m_value) >> extract_z(b.m_value); + result[3] = extract_w(a.m_value) >> extract_w(b.m_value); + + return vint{ _mm_load_si128((__m128i*)result) }; +#elif 0 + int x = extract_x(a.m_value) >> extract_x(b.m_value); + int y = extract_y(a.m_value) >> extract_y(b.m_value); + int z = extract_z(a.m_value) >> extract_z(b.m_value); + int w = extract_w(a.m_value) >> extract_w(b.m_value); + + __m128i v = insert_x(_mm_undefined_si128(), x); + v = insert_y(v, y); + v = insert_z(v, z); + return vint{ insert_w(v, w) }; +#else + vint sign_mask(_mm_cmplt_epi32(a.m_value, _mm_setzero_si128())); + vint a_shifted = vuint_shift_right(a ^ sign_mask, b) ^ sign_mask; + return a_shifted; +#endif +} + +#undef VINT_SHIFT_LEFT +#undef VINT_SHIFT_RIGHT +#undef VUINT_SHIFT_RIGHT + +// Shift left/right by a uniform immediate constant +#define VINT_SHIFT_LEFT(a, b) vint(_mm_slli_epi32( (a).m_value, (b) ) ) +#define VINT_SHIFT_RIGHT(a, b) vint( _mm_srai_epi32( (a).m_value, (b) ) ) +#define VUINT_SHIFT_RIGHT(a, b) vint( _mm_srli_epi32( (a).m_value, (b) ) ) +#define VINT_ROT(x, k) (VINT_SHIFT_LEFT((x), (k)) | VUINT_SHIFT_RIGHT((x), 32 - (k))) + +CPPSPMD_FORCE_INLINE vbool operator==(const lint& a, const lint& b) { return vbool{ _mm_cmpeq_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator==(const lint& a, int b) { return vint(a) == vint(b); } +CPPSPMD_FORCE_INLINE vbool operator==(int a, const lint& b) { return vint(a) == vint(b); } +CPPSPMD_FORCE_INLINE vbool operator<(const lint& a, const lint& b) { return vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>(const lint& a, const lint& b) { return vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator<=(const lint& a, const lint& b) { return !vbool{ _mm_cmpgt_epi32(a.m_value, b.m_value) }; } +CPPSPMD_FORCE_INLINE vbool operator>=(const lint& a, const lint& b) { return !vbool{ _mm_cmpgt_epi32(b.m_value, a.m_value) }; } + +CPPSPMD_FORCE_INLINE float extract(const vfloat& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) float values[4]; _mm_store_ps(values, v.m_value); return values[instance]; } +CPPSPMD_FORCE_INLINE int extract(const vint& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) int values[4]; _mm_store_si128((__m128i*)values, v.m_value); return values[instance]; } +CPPSPMD_FORCE_INLINE int extract(const lint& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) int values[4]; _mm_store_si128((__m128i*)values, v.m_value); return values[instance]; } +CPPSPMD_FORCE_INLINE bool extract(const vbool& v, int instance) { assert(instance < 4); CPPSPMD_ALIGN(16) int values[4]; _mm_store_si128((__m128i*)values, v.m_value); return values[instance] != 0; } + +#undef VINT_EXTRACT +#undef VBOOL_EXTRACT +#undef VFLOAT_EXTRACT + +#if CPPSPMD_SSE2 +// Pass in an immediate constant and the compiler will optimize these expressions. +#define VINT_EXTRACT(v, instance) ( ((instance) == 0) ? extract_x((v).m_value) : (((instance) == 1) ? extract_y((v).m_value) : (((instance) == 2) ? extract_z((v).m_value) : extract_w((v).m_value))) ) +#define VBOOL_EXTRACT(v, instance) ( ((instance) == 0) ? extract_x((v).m_value) : (((instance) == 1) ? extract_y((v).m_value) : (((instance) == 2) ? extract_z((v).m_value) : extract_w((v).m_value))) ) +#define VFLOAT_EXTRACT(v, instance) ( ((instance) == 0) ? extractf_ps_x((v).m_value) : (((instance) == 1) ? extractf_ps_y((v).m_value) : (((instance) == 2) ? extractf_ps_z((v).m_value) : extractf_ps_w((v).m_value))) ) +#else +CPPSPMD_FORCE_INLINE float cast_int_bits_as_float(int v) { return *(const float*)&v; } + +#define VINT_EXTRACT(v, instance) _mm_extract_epi32((v).m_value, instance) +#define VBOOL_EXTRACT(v, instance) _mm_extract_epi32((v).m_value, instance) +#define VFLOAT_EXTRACT(v, instance) cast_int_bits_as_float(_mm_extract_ps((v).m_value, instance)) +#endif + +CPPSPMD_FORCE_INLINE vfloat &insert(vfloat& v, int instance, float f) +{ + assert(instance < 4); + CPPSPMD_ALIGN(16) float values[4]; + _mm_store_ps(values, v.m_value); + values[instance] = f; + v.m_value = _mm_load_ps(values); + return v; +} + +CPPSPMD_FORCE_INLINE vint &insert(vint& v, int instance, int i) +{ + assert(instance < 4); + CPPSPMD_ALIGN(16) int values[4]; + _mm_store_si128((__m128i *)values, v.m_value); + values[instance] = i; + v.m_value = _mm_load_si128((__m128i *)values); + return v; +} + +CPPSPMD_FORCE_INLINE vint init_lookup4(const uint8_t pTab[16]) +{ + __m128i l = _mm_loadu_si128((const __m128i*)pTab); + return vint{ l }; +} + +CPPSPMD_FORCE_INLINE vint table_lookup4_8(const vint& a, const vint& table) +{ + return vint{ shuffle_epi8(table.m_value, a.m_value) }; +} + +CPPSPMD_FORCE_INLINE void init_lookup5(const uint8_t pTab[32], vint& table_0, vint& table_1) +{ + __m128i l = _mm_loadu_si128((const __m128i*)pTab); + __m128i h = _mm_loadu_si128((const __m128i*)(pTab + 16)); + table_0.m_value = l; + table_1.m_value = h; +} + +CPPSPMD_FORCE_INLINE vint table_lookup5_8(const vint& a, const vint& table_0, const vint& table_1) +{ + __m128i l_0 = shuffle_epi8(table_0.m_value, a.m_value); + __m128i h_0 = shuffle_epi8(table_1.m_value, a.m_value); + + __m128i m_0 = _mm_slli_epi32(a.m_value, 31 - 4); + + __m128 v_0 = blendv_ps(_mm_castsi128_ps(l_0), _mm_castsi128_ps(h_0), _mm_castsi128_ps(m_0)); + + return vint{ _mm_castps_si128(v_0) }; +} + +CPPSPMD_FORCE_INLINE void init_lookup6(const uint8_t pTab[64], vint& table_0, vint& table_1, vint& table_2, vint& table_3) +{ + __m128i a = _mm_loadu_si128((const __m128i*)pTab); + __m128i b = _mm_loadu_si128((const __m128i*)(pTab + 16)); + __m128i c = _mm_loadu_si128((const __m128i*)(pTab + 32)); + __m128i d = _mm_loadu_si128((const __m128i*)(pTab + 48)); + + table_0.m_value = a; + table_1.m_value = b; + table_2.m_value = c; + table_3.m_value = d; +} + +CPPSPMD_FORCE_INLINE vint table_lookup6_8(const vint& a, const vint& table_0, const vint& table_1, const vint& table_2, const vint& table_3) +{ + __m128i m_0 = _mm_slli_epi32(a.m_value, 31 - 4); + + __m128 av_0; + { + __m128i al_0 = shuffle_epi8(table_0.m_value, a.m_value); + __m128i ah_0 = shuffle_epi8(table_1.m_value, a.m_value); + av_0 = blendv_ps(_mm_castsi128_ps(al_0), _mm_castsi128_ps(ah_0), _mm_castsi128_ps(m_0)); + } + + __m128 bv_0; + { + __m128i bl_0 = shuffle_epi8(table_2.m_value, a.m_value); + __m128i bh_0 = shuffle_epi8(table_3.m_value, a.m_value); + bv_0 = blendv_ps(_mm_castsi128_ps(bl_0), _mm_castsi128_ps(bh_0), _mm_castsi128_ps(m_0)); + } + + __m128i m2_0 = _mm_slli_epi32(a.m_value, 31 - 5); + __m128 v2_0 = blendv_ps(av_0, bv_0, _mm_castsi128_ps(m2_0)); + + return vint{ _mm_castps_si128(v2_0) }; +} + +#if 0 +template<typename SPMDKernel, typename... Args> +CPPSPMD_FORCE_INLINE decltype(auto) spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(exec_mask::all_on()); + return kernel._call(std::forward<Args>(args)...); +} +#else +template<typename SPMDKernel, typename... Args> +CPPSPMD_FORCE_INLINE void spmd_call(Args&&... args) +{ + SPMDKernel kernel; + kernel.init(exec_mask::all_on()); + kernel._call(std::forward<Args>(args)...); +} +#endif + +CPPSPMD_FORCE_INLINE void spmd_kernel::init(const spmd_kernel::exec_mask& kernel_exec) +{ + m_exec = kernel_exec; + m_kernel_exec = kernel_exec; + m_continue_mask = exec_mask::all_off(); + +#ifdef _DEBUG + m_in_loop = false; +#endif +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store(const float_vref& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[vindex[i]] = stored[i]; + } + return dst; +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store_all(const float_vref& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[vindex[i]] = stored[i]; + return dst; +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store(const float_vref&& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); + for (int i = 0; i < 4; i++) + { + if (mask & (1 << i)) + dst.m_pValue[vindex[i]] = stored[i]; + } + return dst; +} + +CPPSPMD_FORCE_INLINE const float_vref& spmd_kernel::store_all(const float_vref&& dst, const vfloat& src) +{ + CPPSPMD_ALIGN(16) int vindex[4]; + _mm_store_si128((__m128i*)vindex, dst.m_vindex); + + CPPSPMD_ALIGN(16) float stored[4]; + _mm_store_ps(stored, src.m_value); + + for (int i = 0; i < 4; i++) + dst.m_pValue[vindex[i]] = stored[i]; + return dst; +} + +#include "cppspmd_flow.h" +#include "cppspmd_math.h" + +} // namespace cppspmd_sse41 + diff --git a/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h b/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h new file mode 100644 index 0000000000..0dfb28b88f --- /dev/null +++ b/thirdparty/basis_universal/encoder/cppspmd_type_aliases.h @@ -0,0 +1,47 @@ +// cppspmd_type_aliases.h +// Do not include this file directly +// +// Copyright 2020-2021 Binomial LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifndef CPPSPMD_TYPES +#define CPPSPMD_TYPES + +using exec_mask = CPPSPMD::exec_mask; + +#if CPPSPMD_INT16 +using vint16 = CPPSPMD::vint16; +using int16_lref = CPPSPMD::int16_lref; +using cint16_vref = CPPSPMD::cint16_vref; +using int16_vref = CPPSPMD::int16_vref; +using lint16 = CPPSPMD::lint16; +using vint16_vref = CPPSPMD::vint16_vref; +#else +using vint = CPPSPMD::vint; +using int_lref = CPPSPMD::int_lref; +using cint_vref = CPPSPMD::cint_vref; +using int_vref = CPPSPMD::int_vref; +using lint = CPPSPMD::lint; +using vint_vref = CPPSPMD::vint_vref; +#endif + +using vbool = CPPSPMD::vbool; +using vfloat = CPPSPMD::vfloat; +using float_lref = CPPSPMD::float_lref; +using float_vref = CPPSPMD::float_vref; +using vfloat_vref = CPPSPMD::vfloat_vref; + +#endif // CPPSPMD_TYPES diff --git a/thirdparty/basis_universal/encoder/jpgd.cpp b/thirdparty/basis_universal/encoder/jpgd.cpp new file mode 100644 index 0000000000..460834409d --- /dev/null +++ b/thirdparty/basis_universal/encoder/jpgd.cpp @@ -0,0 +1,3241 @@ +// jpgd.cpp - C++ class for JPEG decompression. Written by Richard Geldreich <richgel99@gmail.com> between 1994-2020. +// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2. +// Supports box and linear chroma upsampling. +// +// Released under two licenses. You are free to choose which license you want: +// License 1: +// Public Domain +// +// License 2: +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Alex Evans: Linear memory allocator (taken from jpge.h). +// v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings +// v2.00, March 20, 2020: Fuzzed with zzuf and afl. Fixed several issues, converted most assert()'s to run-time checks. Added chroma upsampling. Removed freq. domain upsampling. gcc/clang warnings. +// +#ifdef _MSC_VER +#ifndef BASISU_NO_ITERATOR_DEBUG_LEVEL +#if defined(_DEBUG) || defined(DEBUG) +#define _ITERATOR_DEBUG_LEVEL 1 +#define _SECURE_SCL 1 +#else +#define _SECURE_SCL 0 +#define _ITERATOR_DEBUG_LEVEL 0 +#endif +#endif +#endif + +#include "jpgd.h" +#include <string.h> +#include <algorithm> +#include <assert.h> + +#ifdef _MSC_VER +#pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable +#endif + +#define JPGD_TRUE (1) +#define JPGD_FALSE (0) + +#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b)) +#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b)) + +namespace jpgd { + + static inline void* jpgd_malloc(size_t nSize) { return malloc(nSize); } + static inline void jpgd_free(void* p) { free(p); } + + // DCT coefficients are stored in this sequence. + static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 }; + + enum JPEG_MARKER + { + M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8, + M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC, + M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7, + M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF, + M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0 + }; + + enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }; + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define SCALEDONE ((int32)1) + +#define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */ + +#define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n)) +#define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)) + +#define MULTIPLY(var, cnst) ((var) * (cnst)) + +#define CLAMP(i) ((static_cast<uint>(i) > 255) ? (((~i) >> 31) & 0xFF) : (i)) + + static inline int left_shifti(int val, uint32_t bits) + { + return static_cast<int>(static_cast<uint32_t>(val) << bits); + } + + // Compiler creates a fast path 1D IDCT for X non-zero columns + template <int NONZERO_COLS> + struct Row + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + // ACCESS_COL() will be optimized at compile time to either an array access, or 0. Good compilers will then optimize out muls against 0. +#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0) + + const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = left_shifti(ACCESS_COL(0) + ACCESS_COL(4), CONST_BITS); + const int tmp1 = left_shifti(ACCESS_COL(0) - ACCESS_COL(4), CONST_BITS); + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, -FIX_0_899976223); + const int az2 = MULTIPLY(bz2, -FIX_2_562915447); + const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS - PASS1_BITS); + pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS - PASS1_BITS); + pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS - PASS1_BITS); + pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS - PASS1_BITS); + pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS - PASS1_BITS); + pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS - PASS1_BITS); + pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS - PASS1_BITS); + pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS - PASS1_BITS); + } + }; + + template <> + struct Row<0> + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + (void)pTemp; + (void)pSrc; + } + }; + + template <> + struct Row<1> + { + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + const int dcval = left_shifti(pSrc[0], PASS1_BITS); + + pTemp[0] = dcval; + pTemp[1] = dcval; + pTemp[2] = dcval; + pTemp[3] = dcval; + pTemp[4] = dcval; + pTemp[5] = dcval; + pTemp[6] = dcval; + pTemp[7] = dcval; + } + }; + + // Compiler creates a fast path 1D IDCT for X non-zero rows + template <int NONZERO_ROWS> + struct Col + { + static void idct(uint8* pDst_ptr, const int* pTemp) + { + // ACCESS_ROW() will be optimized at compile time to either an array access, or 0. +#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0) + + const int z2 = ACCESS_ROW(2); + const int z3 = ACCESS_ROW(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = left_shifti(ACCESS_ROW(0) + ACCESS_ROW(4), CONST_BITS); + const int tmp1 = left_shifti(ACCESS_ROW(0) - ACCESS_ROW(4), CONST_BITS); + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, -FIX_0_899976223); + const int az2 = MULTIPLY(bz2, -FIX_2_562915447); + const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 0] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 7] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 1] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 6] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 2] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 5] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 3] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS + PASS1_BITS + 3); + pDst_ptr[8 * 4] = (uint8)CLAMP(i); + } + }; + + template <> + struct Col<1> + { + static void idct(uint8* pDst_ptr, const int* pTemp) + { + int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS + 3); + const uint8 dcval_clamped = (uint8)CLAMP(dcval); + pDst_ptr[0 * 8] = dcval_clamped; + pDst_ptr[1 * 8] = dcval_clamped; + pDst_ptr[2 * 8] = dcval_clamped; + pDst_ptr[3 * 8] = dcval_clamped; + pDst_ptr[4 * 8] = dcval_clamped; + pDst_ptr[5 * 8] = dcval_clamped; + pDst_ptr[6 * 8] = dcval_clamped; + pDst_ptr[7 * 8] = dcval_clamped; + } + }; + + static const uint8 s_idct_row_table[] = + { + 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0, + 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0, + 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0, + 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0, + 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2, + 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2, + 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4, + 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8, + }; + + static const uint8 s_idct_col_table[] = + { + 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 + }; + + // Scalar "fast pathing" IDCT. + static void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag) + { + assert(block_max_zag >= 1); + assert(block_max_zag <= 64); + + if (block_max_zag <= 1) + { + int k = ((pSrc_ptr[0] + 4) >> 3) + 128; + k = CLAMP(k); + k = k | (k << 8); + k = k | (k << 16); + + for (int i = 8; i > 0; i--) + { + *(int*)&pDst_ptr[0] = k; + *(int*)&pDst_ptr[4] = k; + pDst_ptr += 8; + } + return; + } + + int temp[64]; + + const jpgd_block_t* pSrc = pSrc_ptr; + int* pTemp = temp; + + const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8]; + int i; + for (i = 8; i > 0; i--, pRow_tab++) + { + switch (*pRow_tab) + { + case 0: Row<0>::idct(pTemp, pSrc); break; + case 1: Row<1>::idct(pTemp, pSrc); break; + case 2: Row<2>::idct(pTemp, pSrc); break; + case 3: Row<3>::idct(pTemp, pSrc); break; + case 4: Row<4>::idct(pTemp, pSrc); break; + case 5: Row<5>::idct(pTemp, pSrc); break; + case 6: Row<6>::idct(pTemp, pSrc); break; + case 7: Row<7>::idct(pTemp, pSrc); break; + case 8: Row<8>::idct(pTemp, pSrc); break; + } + + pSrc += 8; + pTemp += 8; + } + + pTemp = temp; + + const int nonzero_rows = s_idct_col_table[block_max_zag - 1]; + for (i = 8; i > 0; i--) + { + switch (nonzero_rows) + { + case 1: Col<1>::idct(pDst_ptr, pTemp); break; + case 2: Col<2>::idct(pDst_ptr, pTemp); break; + case 3: Col<3>::idct(pDst_ptr, pTemp); break; + case 4: Col<4>::idct(pDst_ptr, pTemp); break; + case 5: Col<5>::idct(pDst_ptr, pTemp); break; + case 6: Col<6>::idct(pDst_ptr, pTemp); break; + case 7: Col<7>::idct(pDst_ptr, pTemp); break; + case 8: Col<8>::idct(pDst_ptr, pTemp); break; + } + + pTemp++; + pDst_ptr++; + } + } + + // Retrieve one character from the input stream. + inline uint jpeg_decoder::get_char() + { + // Any bytes remaining in buffer? + if (!m_in_buf_left) + { + // Try to get more bytes. + prep_in_buffer(); + // Still nothing to get? + if (!m_in_buf_left) + { + // Pad the end of the stream with 0xFF 0xD9 (EOI marker) + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; + } + + // Same as previous method, except can indicate if the character is a pad character or not. + inline uint jpeg_decoder::get_char(bool* pPadding_flag) + { + if (!m_in_buf_left) + { + prep_in_buffer(); + if (!m_in_buf_left) + { + *pPadding_flag = true; + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + *pPadding_flag = false; + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; + } + + // Inserts a previously retrieved character back into the input buffer. + inline void jpeg_decoder::stuff_char(uint8 q) + { + // This could write before the input buffer, but we've placed another array there. + *(--m_pIn_buf_ofs) = q; + m_in_buf_left++; + } + + // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered. + inline uint8 jpeg_decoder::get_octet() + { + bool padding_flag; + int c = get_char(&padding_flag); + + if (c == 0xFF) + { + if (padding_flag) + return 0xFF; + + c = get_char(&padding_flag); + if (padding_flag) + { + stuff_char(0xFF); + return 0xFF; + } + + if (c == 0x00) + return 0xFF; + else + { + stuff_char(static_cast<uint8>(c)); + stuff_char(0xFF); + return 0xFF; + } + } + + return static_cast<uint8>(c); + } + + // Retrieves a variable number of bits from the input stream. Does not recognize markers. + inline uint jpeg_decoder::get_bits(int num_bits) + { + if (!num_bits) + return 0; + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + uint c1 = get_char(); + uint c2 = get_char(); + m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2; + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + assert(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; + } + + // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered. + inline uint jpeg_decoder::get_bits_no_markers(int num_bits) + { + if (!num_bits) + return 0; + + assert(num_bits <= 16); + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF)) + { + uint c1 = get_octet(); + uint c2 = get_octet(); + m_bit_buf |= (c1 << 8) | c2; + } + else + { + m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1]; + m_in_buf_left -= 2; + m_pIn_buf_ofs += 2; + } + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + assert(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; + } + + // Decodes a Huffman encoded symbol. + inline int jpeg_decoder::huff_decode(huff_tables* pH) + { + if (!pH) + stop_decoding(JPGD_DECODE_ERROR); + + int symbol; + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0) + { + // Decode more bits, use a tree traversal to find symbol. + int ofs = 23; + do + { + unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1)); + + // This should never happen, but to be safe I'm turning these asserts into a run-time check. + if ((idx >= JPGD_HUFF_TREE_MAX_LENGTH) || (ofs < 0)) + stop_decoding(JPGD_DECODE_ERROR); + + symbol = pH->tree[idx]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + } + else + { + assert(symbol < JPGD_HUFF_CODE_SIZE_MAX_LENGTH); + get_bits_no_markers(pH->code_size[symbol]); + } + + return symbol; + } + + // Decodes a Huffman encoded symbol. + inline int jpeg_decoder::huff_decode(huff_tables* pH, int& extra_bits) + { + int symbol; + + if (!pH) + stop_decoding(JPGD_DECODE_ERROR); + + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0) + { + // Use a tree traversal to find symbol. + int ofs = 23; + do + { + unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1)); + + // This should never happen, but to be safe I'm turning these asserts into a run-time check. + if ((idx >= JPGD_HUFF_TREE_MAX_LENGTH) || (ofs < 0)) + stop_decoding(JPGD_DECODE_ERROR); + + symbol = pH->tree[idx]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + + extra_bits = get_bits_no_markers(symbol & 0xF); + } + else + { + if (symbol & 0x8000) + { + //get_bits_no_markers((symbol >> 8) & 31); + assert(((symbol >> 8) & 31) <= 15); + get_bits_no_markers((symbol >> 8) & 15); + extra_bits = symbol >> 16; + } + else + { + int code_size = (symbol >> 8) & 31; + int num_extra_bits = symbol & 0xF; + int bits = code_size + num_extra_bits; + + if (bits <= 16) + extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1); + else + { + get_bits_no_markers(code_size); + extra_bits = get_bits_no_markers(num_extra_bits); + } + } + + symbol &= 0xFF; + } + + return symbol; + } + + // Tables and macro used to fully decode the DPCM differences. + static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; + static const int s_extend_offset[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 }; + //static const int s_extend_mask[] = { 0, (1 << 0), (1 << 1), (1 << 2), (1 << 3), (1 << 4), (1 << 5), (1 << 6), (1 << 7), (1 << 8), (1 << 9), (1 << 10), (1 << 11), (1 << 12), (1 << 13), (1 << 14), (1 << 15), (1 << 16) }; + +#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x)) + + // Unconditionally frees all allocated m_blocks. + void jpeg_decoder::free_all_blocks() + { + m_pStream = nullptr; + for (mem_block* b = m_pMem_blocks; b; ) + { + mem_block* n = b->m_pNext; + jpgd_free(b); + b = n; + } + m_pMem_blocks = nullptr; + } + + // This method handles all errors. It will never return. + // It could easily be changed to use C++ exceptions. + JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status) + { + m_error_code = status; + free_all_blocks(); + longjmp(m_jmp_state, status); + } + + void* jpeg_decoder::alloc(size_t nSize, bool zero) + { + nSize = (JPGD_MAX(nSize, 1) + 3) & ~3; + char* rv = nullptr; + for (mem_block* b = m_pMem_blocks; b; b = b->m_pNext) + { + if ((b->m_used_count + nSize) <= b->m_size) + { + rv = b->m_data + b->m_used_count; + b->m_used_count += nSize; + break; + } + } + if (!rv) + { + int capacity = JPGD_MAX(32768 - 256, ((int)nSize + 2047) & ~2047); + mem_block* b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity); + if (!b) + { + stop_decoding(JPGD_NOTENOUGHMEM); + } + + b->m_pNext = m_pMem_blocks; + m_pMem_blocks = b; + b->m_used_count = nSize; + b->m_size = capacity; + rv = b->m_data; + } + if (zero) memset(rv, 0, nSize); + return rv; + } + + void jpeg_decoder::word_clear(void* p, uint16 c, uint n) + { + uint8* pD = (uint8*)p; + const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF; + while (n) + { + pD[0] = l; + pD[1] = h; + pD += 2; + n--; + } + } + + // Refill the input buffer. + // This method will sit in a loop until (A) the buffer is full or (B) + // the stream's read() method reports and end of file condition. + void jpeg_decoder::prep_in_buffer() + { + m_in_buf_left = 0; + m_pIn_buf_ofs = m_in_buf; + + if (m_eof_flag) + return; + + do + { + int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag); + if (bytes_read == -1) + stop_decoding(JPGD_STREAM_READ); + + m_in_buf_left += bytes_read; + } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag)); + + m_total_bytes_read += m_in_buf_left; + + // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid). + // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.) + word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64); + } + + // Read a Huffman code table. + void jpeg_decoder::read_dht_marker() + { + int i, index, count; + uint8 huff_num[17]; + uint8 huff_val[256]; + + uint num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= 2; + + while (num_left) + { + index = get_bits(8); + + huff_num[0] = 0; + + count = 0; + + for (i = 1; i <= 16; i++) + { + huff_num[i] = static_cast<uint8>(get_bits(8)); + count += huff_num[i]; + } + + if (count > 255) + stop_decoding(JPGD_BAD_DHT_COUNTS); + + bool symbol_present[256]; + memset(symbol_present, 0, sizeof(symbol_present)); + + for (i = 0; i < count; i++) + { + const int s = get_bits(8); + + // Check for obviously bogus tables. + if (symbol_present[s]) + stop_decoding(JPGD_BAD_DHT_COUNTS); + + huff_val[i] = static_cast<uint8_t>(s); + symbol_present[s] = true; + } + + i = 1 + 16 + count; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= i; + + if ((index & 0x10) > 0x10) + stop_decoding(JPGD_BAD_DHT_INDEX); + + index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1); + + if (index >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_BAD_DHT_INDEX); + + if (!m_huff_num[index]) + m_huff_num[index] = (uint8*)alloc(17); + + if (!m_huff_val[index]) + m_huff_val[index] = (uint8*)alloc(256); + + m_huff_ac[index] = (index & 0x10) != 0; + memcpy(m_huff_num[index], huff_num, 17); + memcpy(m_huff_val[index], huff_val, 256); + } + } + + // Read a quantization table. + void jpeg_decoder::read_dqt_marker() + { + int n, i, prec; + uint num_left; + uint temp; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DQT_MARKER); + + num_left -= 2; + + while (num_left) + { + n = get_bits(8); + prec = n >> 4; + n &= 0x0F; + + if (n >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_BAD_DQT_TABLE); + + if (!m_quant[n]) + m_quant[n] = (jpgd_quant_t*)alloc(64 * sizeof(jpgd_quant_t)); + + // read quantization entries, in zag order + for (i = 0; i < 64; i++) + { + temp = get_bits(8); + + if (prec) + temp = (temp << 8) + get_bits(8); + + m_quant[n][i] = static_cast<jpgd_quant_t>(temp); + } + + i = 64 + 1; + + if (prec) + i += 64; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DQT_LENGTH); + + num_left -= i; + } + } + + // Read the start of frame (SOF) marker. + void jpeg_decoder::read_sof_marker() + { + int i; + uint num_left; + + num_left = get_bits(16); + + /* precision: sorry, only 8-bit precision is supported */ + if (get_bits(8) != 8) + stop_decoding(JPGD_BAD_PRECISION); + + m_image_y_size = get_bits(16); + + if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT)) + stop_decoding(JPGD_BAD_HEIGHT); + + m_image_x_size = get_bits(16); + + if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH)) + stop_decoding(JPGD_BAD_WIDTH); + + m_comps_in_frame = get_bits(8); + + if (m_comps_in_frame > JPGD_MAX_COMPONENTS) + stop_decoding(JPGD_TOO_MANY_COMPONENTS); + + if (num_left != (uint)(m_comps_in_frame * 3 + 8)) + stop_decoding(JPGD_BAD_SOF_LENGTH); + + for (i = 0; i < m_comps_in_frame; i++) + { + m_comp_ident[i] = get_bits(8); + m_comp_h_samp[i] = get_bits(4); + m_comp_v_samp[i] = get_bits(4); + + if (!m_comp_h_samp[i] || !m_comp_v_samp[i] || (m_comp_h_samp[i] > 2) || (m_comp_v_samp[i] > 2)) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + m_comp_quant[i] = get_bits(8); + if (m_comp_quant[i] >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + } + } + + // Used to skip unrecognized markers. + void jpeg_decoder::skip_variable_marker() + { + uint num_left; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_VARIABLE_MARKER); + + num_left -= 2; + + while (num_left) + { + get_bits(8); + num_left--; + } + } + + // Read a define restart interval (DRI) marker. + void jpeg_decoder::read_dri_marker() + { + if (get_bits(16) != 4) + stop_decoding(JPGD_BAD_DRI_LENGTH); + + m_restart_interval = get_bits(16); + } + + // Read a start of scan (SOS) marker. + void jpeg_decoder::read_sos_marker() + { + uint num_left; + int i, ci, n, c, cc; + + num_left = get_bits(16); + + n = get_bits(8); + + m_comps_in_scan = n; + + num_left -= 3; + + if ((num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN)) + stop_decoding(JPGD_BAD_SOS_LENGTH); + + for (i = 0; i < n; i++) + { + cc = get_bits(8); + c = get_bits(8); + num_left -= 2; + + for (ci = 0; ci < m_comps_in_frame; ci++) + if (cc == m_comp_ident[ci]) + break; + + if (ci >= m_comps_in_frame) + stop_decoding(JPGD_BAD_SOS_COMP_ID); + + if (ci >= JPGD_MAX_COMPONENTS) + stop_decoding(JPGD_DECODE_ERROR); + + m_comp_list[i] = ci; + + m_comp_dc_tab[ci] = (c >> 4) & 15; + m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1); + + if (m_comp_dc_tab[ci] >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + + if (m_comp_ac_tab[ci] >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + } + + m_spectral_start = get_bits(8); + m_spectral_end = get_bits(8); + m_successive_high = get_bits(4); + m_successive_low = get_bits(4); + + if (!m_progressive_flag) + { + m_spectral_start = 0; + m_spectral_end = 63; + } + + num_left -= 3; + + /* read past whatever is num_left */ + while (num_left) + { + get_bits(8); + num_left--; + } + } + + // Finds the next marker. + int jpeg_decoder::next_marker() + { + uint c, bytes; + + bytes = 0; + + do + { + do + { + bytes++; + c = get_bits(8); + } while (c != 0xFF); + + do + { + c = get_bits(8); + } while (c == 0xFF); + + } while (c == 0); + + // If bytes > 0 here, there where extra bytes before the marker (not good). + + return c; + } + + // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is + // encountered. + int jpeg_decoder::process_markers() + { + int c; + + for (; ; ) + { + c = next_marker(); + + switch (c) + { + case M_SOF0: + case M_SOF1: + case M_SOF2: + case M_SOF3: + case M_SOF5: + case M_SOF6: + case M_SOF7: + // case M_JPG: + case M_SOF9: + case M_SOF10: + case M_SOF11: + case M_SOF13: + case M_SOF14: + case M_SOF15: + case M_SOI: + case M_EOI: + case M_SOS: + { + return c; + } + case M_DHT: + { + read_dht_marker(); + break; + } + // No arithmitic support - dumb patents! + case M_DAC: + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + case M_DQT: + { + read_dqt_marker(); + break; + } + case M_DRI: + { + read_dri_marker(); + break; + } + //case M_APP0: /* no need to read the JFIF marker */ + case M_JPG: + case M_RST0: /* no parameters */ + case M_RST1: + case M_RST2: + case M_RST3: + case M_RST4: + case M_RST5: + case M_RST6: + case M_RST7: + case M_TEM: + { + stop_decoding(JPGD_UNEXPECTED_MARKER); + break; + } + default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */ + { + skip_variable_marker(); + break; + } + } + } + } + + // Finds the start of image (SOI) marker. + void jpeg_decoder::locate_soi_marker() + { + uint lastchar, thischar; + uint bytesleft; + + lastchar = get_bits(8); + + thischar = get_bits(8); + + /* ok if it's a normal JPEG file without a special header */ + + if ((lastchar == 0xFF) && (thischar == M_SOI)) + return; + + bytesleft = 4096; + + for (; ; ) + { + if (--bytesleft == 0) + stop_decoding(JPGD_NOT_JPEG); + + lastchar = thischar; + + thischar = get_bits(8); + + if (lastchar == 0xFF) + { + if (thischar == M_SOI) + break; + else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end + stop_decoding(JPGD_NOT_JPEG); + } + } + + // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad. + thischar = (m_bit_buf >> 24) & 0xFF; + + if (thischar != 0xFF) + stop_decoding(JPGD_NOT_JPEG); + } + + // Find a start of frame (SOF) marker. + void jpeg_decoder::locate_sof_marker() + { + locate_soi_marker(); + + int c = process_markers(); + + switch (c) + { + case M_SOF2: + { + m_progressive_flag = JPGD_TRUE; + read_sof_marker(); + break; + } + case M_SOF0: /* baseline DCT */ + case M_SOF1: /* extended sequential DCT */ + { + read_sof_marker(); + break; + } + case M_SOF9: /* Arithmitic coding */ + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + default: + { + stop_decoding(JPGD_UNSUPPORTED_MARKER); + break; + } + } + } + + // Find a start of scan (SOS) marker. + int jpeg_decoder::locate_sos_marker() + { + int c; + + c = process_markers(); + + if (c == M_EOI) + return JPGD_FALSE; + else if (c != M_SOS) + stop_decoding(JPGD_UNEXPECTED_MARKER); + + read_sos_marker(); + + return JPGD_TRUE; + } + + // Reset everything to default/uninitialized state. + void jpeg_decoder::init(jpeg_decoder_stream* pStream, uint32_t flags) + { + m_flags = flags; + m_pMem_blocks = nullptr; + m_error_code = JPGD_SUCCESS; + m_ready_flag = false; + m_image_x_size = m_image_y_size = 0; + m_pStream = pStream; + m_progressive_flag = JPGD_FALSE; + + memset(m_huff_ac, 0, sizeof(m_huff_ac)); + memset(m_huff_num, 0, sizeof(m_huff_num)); + memset(m_huff_val, 0, sizeof(m_huff_val)); + memset(m_quant, 0, sizeof(m_quant)); + + m_scan_type = 0; + m_comps_in_frame = 0; + + memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp)); + memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp)); + memset(m_comp_quant, 0, sizeof(m_comp_quant)); + memset(m_comp_ident, 0, sizeof(m_comp_ident)); + memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks)); + memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks)); + + m_comps_in_scan = 0; + memset(m_comp_list, 0, sizeof(m_comp_list)); + memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab)); + memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab)); + + m_spectral_start = 0; + m_spectral_end = 0; + m_successive_low = 0; + m_successive_high = 0; + m_max_mcu_x_size = 0; + m_max_mcu_y_size = 0; + m_blocks_per_mcu = 0; + m_max_blocks_per_row = 0; + m_mcus_per_row = 0; + m_mcus_per_col = 0; + + memset(m_mcu_org, 0, sizeof(m_mcu_org)); + + m_total_lines_left = 0; + m_mcu_lines_left = 0; + m_num_buffered_scanlines = 0; + m_real_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_pixel = 0; + + memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs)); + + memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs)); + memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs)); + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + m_eob_run = 0; + + m_pIn_buf_ofs = m_in_buf; + m_in_buf_left = 0; + m_eof_flag = false; + m_tem_flag = 0; + + memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start)); + memset(m_in_buf, 0, sizeof(m_in_buf)); + memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end)); + + m_restart_interval = 0; + m_restarts_left = 0; + m_next_restart_num = 0; + + m_max_mcus_per_row = 0; + m_max_blocks_per_mcu = 0; + m_max_mcus_per_col = 0; + + memset(m_last_dc_val, 0, sizeof(m_last_dc_val)); + m_pMCU_coefficients = nullptr; + m_pSample_buf = nullptr; + m_pSample_buf_prev = nullptr; + m_sample_buf_prev_valid = false; + + m_total_bytes_read = 0; + + m_pScan_line_0 = nullptr; + m_pScan_line_1 = nullptr; + + // Ready the input buffer. + prep_in_buffer(); + + // Prime the bit buffer. + m_bits_left = 16; + m_bit_buf = 0; + + get_bits(16); + get_bits(16); + + for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++) + m_mcu_block_max_zag[i] = 64; + } + +#define SCALEBITS 16 +#define ONE_HALF ((int) 1 << (SCALEBITS-1)) +#define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5f)) + + // Create a few tables that allow us to quickly convert YCbCr to RGB. + void jpeg_decoder::create_look_ups() + { + for (int i = 0; i <= 255; i++) + { + int k = i - 128; + m_crr[i] = (FIX(1.40200f) * k + ONE_HALF) >> SCALEBITS; + m_cbb[i] = (FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS; + m_crg[i] = (-FIX(0.71414f)) * k; + m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF; + } + } + + // This method throws back into the stream any bytes that where read + // into the bit buffer during initial marker scanning. + void jpeg_decoder::fix_in_buffer() + { + // In case any 0xFF's where pulled into the buffer during marker scanning. + assert((m_bits_left & 7) == 0); + + if (m_bits_left == 16) + stuff_char((uint8)(m_bit_buf & 0xFF)); + + if (m_bits_left >= 8) + stuff_char((uint8)((m_bit_buf >> 8) & 0xFF)); + + stuff_char((uint8)((m_bit_buf >> 16) & 0xFF)); + stuff_char((uint8)((m_bit_buf >> 24) & 0xFF)); + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); + } + + void jpeg_decoder::transform_mcu(int mcu_row) + { + jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; + if (mcu_row * m_blocks_per_mcu >= m_max_blocks_per_row) + stop_decoding(JPGD_DECODE_ERROR); + + uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64; + + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); + pSrc_ptr += 64; + pDst_ptr += 64; + } + } + + // Loads and dequantizes the next row of (already decoded) coefficients. + // Progressive images only. + void jpeg_decoder::load_next_row() + { + int i; + jpgd_block_t* p; + jpgd_quant_t* q; + int mcu_row, mcu_block, row_block = 0; + int component_num, component_id; + int block_x_mcu[JPGD_MAX_COMPONENTS]; + + memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + if (m_comp_quant[component_id] >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + + q = m_quant[m_comp_quant[component_id]]; + + p = m_pMCU_coefficients + 64 * mcu_block; + + jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + p[0] = pDC[0]; + memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t)); + + for (i = 63; i > 0; i--) + if (p[g_ZAG[i]]) + break; + + m_mcu_block_max_zag[mcu_block] = i + 1; + + for (; i >= 0; i--) + if (p[g_ZAG[i]]) + p[g_ZAG[i]] = static_cast<jpgd_block_t>(p[g_ZAG[i]] * q[i]); + + row_block++; + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + transform_mcu(mcu_row); + } + + if (m_comps_in_scan == 1) + m_block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + + m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } + } + + // Restart interval processing. + void jpeg_decoder::process_restart() + { + int i; + int c = 0; + + // Align to a byte boundry + // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers! + //get_bits_no_markers(m_bits_left & 7); + + // Let's scan a little bit to find the marker, but not _too_ far. + // 1536 is a "fudge factor" that determines how much to scan. + for (i = 1536; i > 0; i--) + if (get_char() == 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + for (; i > 0; i--) + if ((c = get_char()) != 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Is it the expected marker? If not, something bad happened. + if (c != (m_next_restart_num + M_RST0)) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Reset each component's DC prediction values. + memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + m_restarts_left = m_restart_interval; + + m_next_restart_num = (m_next_restart_num + 1) & 7; + + // Get the bit buffer going again... + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); + } + + static inline int dequantize_ac(int c, int q) { c *= q; return c; } + + // Decodes and dequantizes the next row of coefficients. + void jpeg_decoder::decode_next_row() + { + int row_block = 0; + + for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + jpgd_block_t* p = m_pMCU_coefficients; + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64) + { + int component_id = m_mcu_org[mcu_block]; + if (m_comp_quant[component_id] >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_DECODE_ERROR); + + jpgd_quant_t* q = m_quant[m_comp_quant[component_id]]; + + int r, s; + s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r); + if (s >= 16) + stop_decoding(JPGD_DECODE_ERROR); + + s = JPGD_HUFF_EXTEND(r, s); + + m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]); + + p[0] = static_cast<jpgd_block_t>(s * q[0]); + + int prev_num_set = m_mcu_block_max_zag[mcu_block]; + + huff_tables* pH = m_pHuff_tabs[m_comp_ac_tab[component_id]]; + + int k; + for (k = 1; k < 64; k++) + { + int extra_bits; + s = huff_decode(pH, extra_bits); + + r = s >> 4; + s &= 15; + + if (s) + { + if (r) + { + if ((k + r) > 63) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(r, prev_num_set - k); + int kt = k; + while (n--) + p[g_ZAG[kt++]] = 0; + } + + k += r; + } + + s = JPGD_HUFF_EXTEND(extra_bits, s); + + if (k >= 64) + stop_decoding(JPGD_DECODE_ERROR); + + p[g_ZAG[k]] = static_cast<jpgd_block_t>(dequantize_ac(s, q[k])); //s * q[k]; + } + else + { + if (r == 15) + { + if ((k + 16) > 64) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(16, prev_num_set - k); + int kt = k; + while (n--) + { + if (kt > 63) + stop_decoding(JPGD_DECODE_ERROR); + p[g_ZAG[kt++]] = 0; + } + } + + k += 16 - 1; // - 1 because the loop counter is k + + if (p[g_ZAG[k & 63]] != 0) + stop_decoding(JPGD_DECODE_ERROR); + } + else + break; + } + } + + if (k < prev_num_set) + { + int kt = k; + while (kt < prev_num_set) + p[g_ZAG[kt++]] = 0; + } + + m_mcu_block_max_zag[mcu_block] = k; + + row_block++; + } + + transform_mcu(mcu_row); + + m_restarts_left--; + } + } + + // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB + void jpeg_decoder::H1V1Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d = m_pScan_line_0; + uint8* s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int y = s[j]; + int cb = s[64 + j]; + int cr = s[128 + j]; + + d[0] = clamp(y + m_crr[cr]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_cbb[cb]); + d[3] = 255; + + d += 4; + } + + s += 64 * 3; + } + } + + // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H2V1Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + uint8* y = m_pSample_buf + row * 8; + uint8* c = m_pSample_buf + 2 * 64 + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 4; j++) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j << 1]; + d0[0] = clamp(yy + rc); + d0[1] = clamp(yy + gc); + d0[2] = clamp(yy + bc); + d0[3] = 255; + + yy = y[(j << 1) + 1]; + d0[4] = clamp(yy + rc); + d0[5] = clamp(yy + gc); + d0[6] = clamp(yy + bc); + d0[7] = 255; + + d0 += 8; + + c++; + } + y += 64; + } + + y += 64 * 4 - 64 * 2; + c += 64 * 4 - 8; + } + } + + // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H2V1ConvertFiltered() + { + const uint BLOCKS_PER_MCU = 4; + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + + const int half_image_x_size = (m_image_x_size >> 1) - 1; + const int row_x8 = row * 8; + + for (int x = 0; x < m_image_x_size; x++) + { + int y = m_pSample_buf[check_sample_buf_ofs((x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7) + row_x8)]; + + int c_x0 = (x - 1) >> 1; + int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size); + c_x0 = JPGD_MAX(c_x0, 0); + + int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7) + row_x8 + 128; + int cb0 = m_pSample_buf[check_sample_buf_ofs(a)]; + int cr0 = m_pSample_buf[check_sample_buf_ofs(a + 64)]; + + int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7) + row_x8 + 128; + int cb1 = m_pSample_buf[check_sample_buf_ofs(b)]; + int cr1 = m_pSample_buf[check_sample_buf_ofs(b + 64)]; + + int w0 = (x & 1) ? 3 : 1; + int w1 = (x & 1) ? 1 : 3; + + int cb = (cb0 * w0 + cb1 * w1 + 2) >> 2; + int cr = (cr0 * w0 + cr1 * w1 + 2) >> 2; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y + rc); + d0[1] = clamp(y + gc); + d0[2] = clamp(y + bc); + d0[3] = 255; + + d0 += 4; + } + } + + // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H1V2Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + uint8* d1 = m_pScan_line_1; + uint8* y; + uint8* c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64 * 1 + (row & 7) * 8; + + c = m_pSample_buf + 64 * 2 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int cb = c[0 + j]; + int cr = c[64 + j]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + d0[0] = clamp(yy + rc); + d0[1] = clamp(yy + gc); + d0[2] = clamp(yy + bc); + d0[3] = 255; + + yy = y[8 + j]; + d1[0] = clamp(yy + rc); + d1[1] = clamp(yy + gc); + d1[2] = clamp(yy + bc); + d1[3] = 255; + + d0 += 4; + d1 += 4; + } + + y += 64 * 4; + c += 64 * 4; + } + } + + // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB + void jpeg_decoder::H1V2ConvertFiltered() + { + const uint BLOCKS_PER_MCU = 4; + int y = m_image_y_size - m_total_lines_left; + int row = y & 15; + + const int half_image_y_size = (m_image_y_size >> 1) - 1; + + uint8* d0 = m_pScan_line_0; + + const int w0 = (row & 1) ? 3 : 1; + const int w1 = (row & 1) ? 1 : 3; + + int c_y0 = (y - 1) >> 1; + int c_y1 = JPGD_MIN(c_y0 + 1, half_image_y_size); + + const uint8_t* p_YSamples = m_pSample_buf; + const uint8_t* p_C0Samples = m_pSample_buf; + if ((c_y0 >= 0) && (((row & 15) == 0) || ((row & 15) == 15)) && (m_total_lines_left > 1)) + { + assert(y > 0); + assert(m_sample_buf_prev_valid); + + if ((row & 15) == 15) + p_YSamples = m_pSample_buf_prev; + + p_C0Samples = m_pSample_buf_prev; + } + + const int y_sample_base_ofs = ((row & 8) ? 64 : 0) + (row & 7) * 8; + const int y0_base = (c_y0 & 7) * 8 + 128; + const int y1_base = (c_y1 & 7) * 8 + 128; + + for (int x = 0; x < m_image_x_size; x++) + { + const int base_ofs = (x >> 3) * BLOCKS_PER_MCU * 64 + (x & 7); + + int y_sample = p_YSamples[check_sample_buf_ofs(base_ofs + y_sample_base_ofs)]; + + int a = base_ofs + y0_base; + int cb0_sample = p_C0Samples[check_sample_buf_ofs(a)]; + int cr0_sample = p_C0Samples[check_sample_buf_ofs(a + 64)]; + + int b = base_ofs + y1_base; + int cb1_sample = m_pSample_buf[check_sample_buf_ofs(b)]; + int cr1_sample = m_pSample_buf[check_sample_buf_ofs(b + 64)]; + + int cb = (cb0_sample * w0 + cb1_sample * w1 + 2) >> 2; + int cr = (cr0_sample * w0 + cr1_sample * w1 + 2) >> 2; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample + rc); + d0[1] = clamp(y_sample + gc); + d0[2] = clamp(y_sample + bc); + d0[3] = 255; + + d0 += 4; + } + } + + // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB + void jpeg_decoder::H2V2Convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d0 = m_pScan_line_0; + uint8* d1 = m_pScan_line_1; + uint8* y; + uint8* c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64 * 2 + (row & 7) * 8; + + c = m_pSample_buf + 64 * 4 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 8; j += 2) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + d0[0] = clamp(yy + rc); + d0[1] = clamp(yy + gc); + d0[2] = clamp(yy + bc); + d0[3] = 255; + + yy = y[j + 1]; + d0[4] = clamp(yy + rc); + d0[5] = clamp(yy + gc); + d0[6] = clamp(yy + bc); + d0[7] = 255; + + yy = y[j + 8]; + d1[0] = clamp(yy + rc); + d1[1] = clamp(yy + gc); + d1[2] = clamp(yy + bc); + d1[3] = 255; + + yy = y[j + 8 + 1]; + d1[4] = clamp(yy + rc); + d1[5] = clamp(yy + gc); + d1[6] = clamp(yy + bc); + d1[7] = 255; + + d0 += 8; + d1 += 8; + + c++; + } + y += 64; + } + + y += 64 * 6 - 64 * 2; + c += 64 * 6 - 8; + } + } + + uint32_t jpeg_decoder::H2V2ConvertFiltered() + { + const uint BLOCKS_PER_MCU = 6; + int y = m_image_y_size - m_total_lines_left; + int row = y & 15; + + const int half_image_y_size = (m_image_y_size >> 1) - 1; + + uint8* d0 = m_pScan_line_0; + + int c_y0 = (y - 1) >> 1; + int c_y1 = JPGD_MIN(c_y0 + 1, half_image_y_size); + + const uint8_t* p_YSamples = m_pSample_buf; + const uint8_t* p_C0Samples = m_pSample_buf; + if ((c_y0 >= 0) && (((row & 15) == 0) || ((row & 15) == 15)) && (m_total_lines_left > 1)) + { + assert(y > 0); + assert(m_sample_buf_prev_valid); + + if ((row & 15) == 15) + p_YSamples = m_pSample_buf_prev; + + p_C0Samples = m_pSample_buf_prev; + } + + const int y_sample_base_ofs = ((row & 8) ? 128 : 0) + (row & 7) * 8; + const int y0_base = (c_y0 & 7) * 8 + 256; + const int y1_base = (c_y1 & 7) * 8 + 256; + + const int half_image_x_size = (m_image_x_size >> 1) - 1; + + static const uint8_t s_muls[2][2][4] = + { + { { 1, 3, 3, 9 }, { 3, 9, 1, 3 }, }, + { { 3, 1, 9, 3 }, { 9, 3, 3, 1 } } + }; + + if (((row & 15) >= 1) && ((row & 15) <= 14)) + { + assert((row & 1) == 1); + assert(((y + 1 - 1) >> 1) == c_y0); + + assert(p_YSamples == m_pSample_buf); + assert(p_C0Samples == m_pSample_buf); + + uint8* d1 = m_pScan_line_1; + const int y_sample_base_ofs1 = (((row + 1) & 8) ? 128 : 0) + ((row + 1) & 7) * 8; + + for (int x = 0; x < m_image_x_size; x++) + { + int k = (x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7); + int y_sample0 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs)]; + int y_sample1 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs1)]; + + int c_x0 = (x - 1) >> 1; + int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size); + c_x0 = JPGD_MAX(c_x0, 0); + + int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7); + int cb00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base)]; + int cr00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base + 64)]; + + int cb01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base)]; + int cr01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base + 64)]; + + int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7); + int cb10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base)]; + int cr10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base + 64)]; + + int cb11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base)]; + int cr11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base + 64)]; + + { + const uint8_t* pMuls = &s_muls[row & 1][x & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample0 + rc); + d0[1] = clamp(y_sample0 + gc); + d0[2] = clamp(y_sample0 + bc); + d0[3] = 255; + + d0 += 4; + } + + { + const uint8_t* pMuls = &s_muls[(row + 1) & 1][x & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d1[0] = clamp(y_sample1 + rc); + d1[1] = clamp(y_sample1 + gc); + d1[2] = clamp(y_sample1 + bc); + d1[3] = 255; + + d1 += 4; + } + + if (((x & 1) == 1) && (x < m_image_x_size - 1)) + { + const int nx = x + 1; + assert(c_x0 == (nx - 1) >> 1); + + k = (nx >> 4) * BLOCKS_PER_MCU * 64 + ((nx & 8) ? 64 : 0) + (nx & 7); + y_sample0 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs)]; + y_sample1 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs1)]; + + { + const uint8_t* pMuls = &s_muls[row & 1][nx & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample0 + rc); + d0[1] = clamp(y_sample0 + gc); + d0[2] = clamp(y_sample0 + bc); + d0[3] = 255; + + d0 += 4; + } + + { + const uint8_t* pMuls = &s_muls[(row + 1) & 1][nx & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d1[0] = clamp(y_sample1 + rc); + d1[1] = clamp(y_sample1 + gc); + d1[2] = clamp(y_sample1 + bc); + d1[3] = 255; + + d1 += 4; + } + + ++x; + } + } + + return 2; + } + else + { + for (int x = 0; x < m_image_x_size; x++) + { + int y_sample = p_YSamples[check_sample_buf_ofs((x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7) + y_sample_base_ofs)]; + + int c_x0 = (x - 1) >> 1; + int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size); + c_x0 = JPGD_MAX(c_x0, 0); + + int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7); + int cb00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base)]; + int cr00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base + 64)]; + + int cb01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base)]; + int cr01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base + 64)]; + + int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7); + int cb10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base)]; + int cr10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base + 64)]; + + int cb11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base)]; + int cr11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base + 64)]; + + const uint8_t* pMuls = &s_muls[row & 1][x & 1][0]; + int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4; + int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + d0[0] = clamp(y_sample + rc); + d0[1] = clamp(y_sample + gc); + d0[2] = clamp(y_sample + bc); + d0[3] = 255; + + d0 += 4; + } + + return 1; + } + } + + // Y (1 block per MCU) to 8-bit grayscale + void jpeg_decoder::gray_convert() + { + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8* d = m_pScan_line_0; + uint8* s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + *(uint*)d = *(uint*)s; + *(uint*)(&d[4]) = *(uint*)(&s[4]); + + s += 64; + d += 8; + } + } + + // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream. + void jpeg_decoder::find_eoi() + { + if (!m_progressive_flag) + { + // Attempt to read the EOI marker. + //get_bits_no_markers(m_bits_left & 7); + + // Prime the bit buffer + m_bits_left = 16; + get_bits(16); + get_bits(16); + + // The next marker _should_ be EOI + process_markers(); + } + + m_total_bytes_read -= m_in_buf_left; + } + + int jpeg_decoder::decode_next_mcu_row() + { + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + const bool chroma_y_filtering = (m_flags & cFlagLinearChromaFiltering) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2)); + if (chroma_y_filtering) + { + std::swap(m_pSample_buf, m_pSample_buf_prev); + + m_sample_buf_prev_valid = true; + } + + if (m_progressive_flag) + load_next_row(); + else + decode_next_row(); + + // Find the EOI marker if that was the last row. + if (m_total_lines_left <= m_max_mcu_y_size) + find_eoi(); + + m_mcu_lines_left = m_max_mcu_y_size; + return 0; + } + + int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len) + { + if ((m_error_code) || (!m_ready_flag)) + return JPGD_FAILED; + + if (m_total_lines_left == 0) + return JPGD_DONE; + + const bool chroma_y_filtering = (m_flags & cFlagLinearChromaFiltering) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2)); + + bool get_another_mcu_row = false; + bool got_mcu_early = false; + if (chroma_y_filtering) + { + if (m_total_lines_left == m_image_y_size) + get_another_mcu_row = true; + else if ((m_mcu_lines_left == 1) && (m_total_lines_left > 1)) + { + get_another_mcu_row = true; + got_mcu_early = true; + } + } + else + { + get_another_mcu_row = (m_mcu_lines_left == 0); + } + + if (get_another_mcu_row) + { + int status = decode_next_mcu_row(); + if (status != 0) + return status; + } + + switch (m_scan_type) + { + case JPGD_YH2V2: + { + if (m_flags & cFlagLinearChromaFiltering) + { + if (m_num_buffered_scanlines == 1) + { + *pScan_line = m_pScan_line_1; + } + else if (m_num_buffered_scanlines == 0) + { + m_num_buffered_scanlines = H2V2ConvertFiltered(); + *pScan_line = m_pScan_line_0; + } + + m_num_buffered_scanlines--; + } + else + { + if ((m_mcu_lines_left & 1) == 0) + { + H2V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + } + + break; + } + case JPGD_YH2V1: + { + if (m_flags & cFlagLinearChromaFiltering) + H2V1ConvertFiltered(); + else + H2V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_YH1V2: + { + if (chroma_y_filtering) + { + H1V2ConvertFiltered(); + *pScan_line = m_pScan_line_0; + } + else + { + if ((m_mcu_lines_left & 1) == 0) + { + H1V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + } + + break; + } + case JPGD_YH1V1: + { + H1V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_GRAYSCALE: + { + gray_convert(); + *pScan_line = m_pScan_line_0; + + break; + } + } + + *pScan_line_len = m_real_dest_bytes_per_scan_line; + + if (!got_mcu_early) + { + m_mcu_lines_left--; + } + + m_total_lines_left--; + + return JPGD_SUCCESS; + } + + // Creates the tables needed for efficient Huffman decoding. + void jpeg_decoder::make_huff_table(int index, huff_tables* pH) + { + int p, i, l, si; + uint8 huffsize[258]; + uint huffcode[258]; + uint code; + uint subtree; + int code_size; + int lastp; + int nextfreeentry; + int currententry; + + pH->ac_table = m_huff_ac[index] != 0; + + p = 0; + + for (l = 1; l <= 16; l++) + { + for (i = 1; i <= m_huff_num[index][l]; i++) + { + if (p >= 257) + stop_decoding(JPGD_DECODE_ERROR); + huffsize[p++] = static_cast<uint8>(l); + } + } + + assert(p < 258); + huffsize[p] = 0; + + lastp = p; + + code = 0; + si = huffsize[0]; + p = 0; + + while (huffsize[p]) + { + while (huffsize[p] == si) + { + if (p >= 257) + stop_decoding(JPGD_DECODE_ERROR); + huffcode[p++] = code; + code++; + } + + code <<= 1; + si++; + } + + memset(pH->look_up, 0, sizeof(pH->look_up)); + memset(pH->look_up2, 0, sizeof(pH->look_up2)); + memset(pH->tree, 0, sizeof(pH->tree)); + memset(pH->code_size, 0, sizeof(pH->code_size)); + + nextfreeentry = -1; + + p = 0; + + while (p < lastp) + { + i = m_huff_val[index][p]; + + code = huffcode[p]; + code_size = huffsize[p]; + + assert(i < JPGD_HUFF_CODE_SIZE_MAX_LENGTH); + pH->code_size[i] = static_cast<uint8>(code_size); + + if (code_size <= 8) + { + code <<= (8 - code_size); + + for (l = 1 << (8 - code_size); l > 0; l--) + { + if (code >= 256) + stop_decoding(JPGD_DECODE_ERROR); + + pH->look_up[code] = i; + + bool has_extrabits = false; + int extra_bits = 0; + int num_extra_bits = i & 15; + + int bits_to_fetch = code_size; + if (num_extra_bits) + { + int total_codesize = code_size + num_extra_bits; + if (total_codesize <= 8) + { + has_extrabits = true; + extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize)); + + if (extra_bits > 0x7FFF) + stop_decoding(JPGD_DECODE_ERROR); + + bits_to_fetch += num_extra_bits; + } + } + + if (!has_extrabits) + pH->look_up2[code] = i | (bits_to_fetch << 8); + else + pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8); + + code++; + } + } + else + { + subtree = (code >> (code_size - 8)) & 0xFF; + + currententry = pH->look_up[subtree]; + + if (currententry == 0) + { + pH->look_up[subtree] = currententry = nextfreeentry; + pH->look_up2[subtree] = currententry = nextfreeentry; + + nextfreeentry -= 2; + } + + code <<= (16 - (code_size - 8)); + + for (l = code_size; l > 9; l--) + { + if ((code & 0x8000) == 0) + currententry--; + + unsigned int idx = -currententry - 1; + + if (idx >= JPGD_HUFF_TREE_MAX_LENGTH) + stop_decoding(JPGD_DECODE_ERROR); + + if (pH->tree[idx] == 0) + { + pH->tree[idx] = nextfreeentry; + + currententry = nextfreeentry; + + nextfreeentry -= 2; + } + else + { + currententry = pH->tree[idx]; + } + + code <<= 1; + } + + if ((code & 0x8000) == 0) + currententry--; + + if ((-currententry - 1) >= JPGD_HUFF_TREE_MAX_LENGTH) + stop_decoding(JPGD_DECODE_ERROR); + + pH->tree[-currententry - 1] = i; + } + + p++; + } + } + + // Verifies the quantization tables needed for this scan are available. + void jpeg_decoder::check_quant_tables() + { + for (int i = 0; i < m_comps_in_scan; i++) + if (m_quant[m_comp_quant[m_comp_list[i]]] == nullptr) + stop_decoding(JPGD_UNDEFINED_QUANT_TABLE); + } + + // Verifies that all the Huffman tables needed for this scan are available. + void jpeg_decoder::check_huff_tables() + { + for (int i = 0; i < m_comps_in_scan; i++) + { + if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == nullptr)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + + if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == nullptr)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + } + + for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++) + if (m_huff_num[i]) + { + if (!m_pHuff_tabs[i]) + m_pHuff_tabs[i] = (huff_tables*)alloc(sizeof(huff_tables)); + + make_huff_table(i, m_pHuff_tabs[i]); + } + } + + // Determines the component order inside each MCU. + // Also calcs how many MCU's are on each row, etc. + bool jpeg_decoder::calc_mcu_block_order() + { + int component_num, component_id; + int max_h_samp = 0, max_v_samp = 0; + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + if (m_comp_h_samp[component_id] > max_h_samp) + max_h_samp = m_comp_h_samp[component_id]; + + if (m_comp_v_samp[component_id] > max_v_samp) + max_v_samp = m_comp_v_samp[component_id]; + } + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8; + m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8; + } + + if (m_comps_in_scan == 1) + { + m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]]; + m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]]; + } + else + { + m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp; + m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp; + } + + if (m_comps_in_scan == 1) + { + m_mcu_org[0] = m_comp_list[0]; + + m_blocks_per_mcu = 1; + } + else + { + m_blocks_per_mcu = 0; + + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + int num_blocks; + + component_id = m_comp_list[component_num]; + + num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id]; + + while (num_blocks--) + m_mcu_org[m_blocks_per_mcu++] = component_id; + } + } + + if (m_blocks_per_mcu > m_max_blocks_per_mcu) + return false; + + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + int comp_id = m_mcu_org[mcu_block]; + if (comp_id >= JPGD_MAX_QUANT_TABLES) + return false; + } + + return true; + } + + // Starts a new scan. + int jpeg_decoder::init_scan() + { + if (!locate_sos_marker()) + return JPGD_FALSE; + + if (!calc_mcu_block_order()) + return JPGD_FALSE; + + check_huff_tables(); + + check_quant_tables(); + + memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + if (m_restart_interval) + { + m_restarts_left = m_restart_interval; + m_next_restart_num = 0; + } + + fix_in_buffer(); + + return JPGD_TRUE; + } + + // Starts a frame. Determines if the number of components or sampling factors + // are supported. + void jpeg_decoder::init_frame() + { + int i; + + if (m_comps_in_frame == 1) + { + if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1)) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + m_scan_type = JPGD_GRAYSCALE; + m_max_blocks_per_mcu = 1; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if (m_comps_in_frame == 3) + { + if (((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) || + ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1))) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH1V1; + + m_max_blocks_per_mcu = 3; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH2V1; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH1V2; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 16; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH2V2; + m_max_blocks_per_mcu = 6; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 16; + } + else + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + } + else + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size; + m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size; + + // These values are for the *destination* pixels: after conversion. + if (m_scan_type == JPGD_GRAYSCALE) + m_dest_bytes_per_pixel = 1; + else + m_dest_bytes_per_pixel = 4; + + m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel; + + m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel); + + // Initialize two scan line buffers. + m_pScan_line_0 = (uint8*)alloc(m_dest_bytes_per_scan_line, true); + if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2)) + m_pScan_line_1 = (uint8*)alloc(m_dest_bytes_per_scan_line, true); + + m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu; + + // Should never happen + if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW) + stop_decoding(JPGD_DECODE_ERROR); + + // Allocate the coefficient buffer, enough for one MCU + m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t)); + + for (i = 0; i < m_max_blocks_per_mcu; i++) + m_mcu_block_max_zag[i] = 64; + + m_pSample_buf = (uint8*)alloc(m_max_blocks_per_row * 64); + m_pSample_buf_prev = (uint8*)alloc(m_max_blocks_per_row * 64); + + m_total_lines_left = m_image_y_size; + + m_mcu_lines_left = 0; + + create_look_ups(); + } + + // The coeff_buf series of methods originally stored the coefficients + // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache + // was used to make this process more efficient. Now, we can store the entire + // thing in RAM. + jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) + { + coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf)); + + cb->block_num_x = block_num_x; + cb->block_num_y = block_num_y; + cb->block_len_x = block_len_x; + cb->block_len_y = block_len_y; + cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t); + cb->pData = (uint8*)alloc(cb->block_size * block_num_x * block_num_y, true); + return cb; + } + + inline jpgd_block_t* jpeg_decoder::coeff_buf_getp(coeff_buf* cb, int block_x, int block_y) + { + if ((block_x >= cb->block_num_x) || (block_y >= cb->block_num_y)) + stop_decoding(JPGD_DECODE_ERROR); + + return (jpgd_block_t*)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x)); + } + + // The following methods decode the various types of m_blocks encountered + // in progressively encoded images. + void jpeg_decoder::decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + int s, r; + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0) + { + if (s >= 16) + pD->stop_decoding(JPGD_DECODE_ERROR); + + r = pD->get_bits_no_markers(s); + s = JPGD_HUFF_EXTEND(r, s); + } + + pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]); + + p[0] = static_cast<jpgd_block_t>(s << pD->m_successive_low); + } + + void jpeg_decoder::decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + if (pD->get_bits_no_markers(1)) + { + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + p[0] |= (1 << pD->m_successive_low); + } + } + + void jpeg_decoder::decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + int k, s, r; + + if (pD->m_eob_run) + { + pD->m_eob_run--; + return; + } + + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + + for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++) + { + unsigned int idx = pD->m_comp_ac_tab[component_id]; + if (idx >= JPGD_MAX_HUFF_TABLES) + pD->stop_decoding(JPGD_DECODE_ERROR); + + s = pD->huff_decode(pD->m_pHuff_tabs[idx]); + + r = s >> 4; + s &= 15; + + if (s) + { + if ((k += r) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + + r = pD->get_bits_no_markers(s); + s = JPGD_HUFF_EXTEND(r, s); + + p[g_ZAG[k]] = static_cast<jpgd_block_t>(s << pD->m_successive_low); + } + else + { + if (r == 15) + { + if ((k += 15) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + } + else + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + pD->m_eob_run--; + + break; + } + } + } + } + + void jpeg_decoder::decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y) + { + int s, k, r; + + int p1 = 1 << pD->m_successive_low; + + //int m1 = (-1) << pD->m_successive_low; + int m1 = static_cast<int>((UINT32_MAX << pD->m_successive_low)); + + jpgd_block_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + if (pD->m_spectral_end > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + + k = pD->m_spectral_start; + + if (pD->m_eob_run == 0) + { + for (; k <= pD->m_spectral_end; k++) + { + unsigned int idx = pD->m_comp_ac_tab[component_id]; + if (idx >= JPGD_MAX_HUFF_TABLES) + pD->stop_decoding(JPGD_DECODE_ERROR); + + s = pD->huff_decode(pD->m_pHuff_tabs[idx]); + + r = s >> 4; + s &= 15; + + if (s) + { + if (s != 1) + pD->stop_decoding(JPGD_DECODE_ERROR); + + if (pD->get_bits_no_markers(1)) + s = p1; + else + s = m1; + } + else + { + if (r != 15) + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + break; + } + } + + do + { + jpgd_block_t* this_coef = p + g_ZAG[k & 63]; + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast<jpgd_block_t>(*this_coef + p1); + else + *this_coef = static_cast<jpgd_block_t>(*this_coef + m1); + } + } + } + else + { + if (--r < 0) + break; + } + + k++; + + } while (k <= pD->m_spectral_end); + + if ((s) && (k < 64)) + { + p[g_ZAG[k]] = static_cast<jpgd_block_t>(s); + } + } + } + + if (pD->m_eob_run > 0) + { + for (; k <= pD->m_spectral_end; k++) + { + jpgd_block_t* this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast<jpgd_block_t>(*this_coef + p1); + else + *this_coef = static_cast<jpgd_block_t>(*this_coef + m1); + } + } + } + } + + pD->m_eob_run--; + } + } + + // Decode a scan in a progressively encoded image. + void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func) + { + int mcu_row, mcu_col, mcu_block; + int block_x_mcu[JPGD_MAX_COMPONENTS], block_y_mcu[JPGD_MAX_COMPONENTS]; + + memset(block_y_mcu, 0, sizeof(block_y_mcu)); + + for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++) + { + int component_num, component_id; + + memset(block_x_mcu, 0, sizeof(block_x_mcu)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + + decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, block_y_mcu[component_id] + block_y_mcu_ofs); + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + m_restarts_left--; + } + + if (m_comps_in_scan == 1) + block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } + } + } + + // Decode a progressively encoded image. + void jpeg_decoder::init_progressive() + { + int i; + + if (m_comps_in_frame == 4) + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + // Allocate the coefficient buffers. + for (i = 0; i < m_comps_in_frame; i++) + { + m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1); + m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8); + } + + // See https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf + uint32_t total_scans = 0; + const uint32_t MAX_SCANS_TO_PROCESS = 1000; + + for (; ; ) + { + int dc_only_scan, refinement_scan; + pDecode_block_func decode_block_func; + + if (!init_scan()) + break; + + dc_only_scan = (m_spectral_start == 0); + refinement_scan = (m_successive_high != 0); + + if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63)) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if (dc_only_scan) + { + if (m_spectral_end) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + } + else if (m_comps_in_scan != 1) /* AC scans can only contain one component */ + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) + stop_decoding(JPGD_BAD_SOS_SUCCESSIVE); + + if (dc_only_scan) + { + if (refinement_scan) + decode_block_func = decode_block_dc_refine; + else + decode_block_func = decode_block_dc_first; + } + else + { + if (refinement_scan) + decode_block_func = decode_block_ac_refine; + else + decode_block_func = decode_block_ac_first; + } + + decode_scan(decode_block_func); + + m_bits_left = 16; + get_bits(16); + get_bits(16); + + total_scans++; + if (total_scans > MAX_SCANS_TO_PROCESS) + stop_decoding(JPGD_TOO_MANY_SCANS); + } + + m_comps_in_scan = m_comps_in_frame; + + for (i = 0; i < m_comps_in_frame; i++) + m_comp_list[i] = i; + + if (!calc_mcu_block_order()) + stop_decoding(JPGD_DECODE_ERROR); + } + + void jpeg_decoder::init_sequential() + { + if (!init_scan()) + stop_decoding(JPGD_UNEXPECTED_MARKER); + } + + void jpeg_decoder::decode_start() + { + init_frame(); + + if (m_progressive_flag) + init_progressive(); + else + init_sequential(); + } + + void jpeg_decoder::decode_init(jpeg_decoder_stream* pStream, uint32_t flags) + { + init(pStream, flags); + locate_sof_marker(); + } + + jpeg_decoder::jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags) + { + if (setjmp(m_jmp_state)) + return; + decode_init(pStream, flags); + } + + int jpeg_decoder::begin_decoding() + { + if (m_ready_flag) + return JPGD_SUCCESS; + + if (m_error_code) + return JPGD_FAILED; + + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + decode_start(); + + m_ready_flag = true; + + return JPGD_SUCCESS; + } + + jpeg_decoder::~jpeg_decoder() + { + free_all_blocks(); + } + + jpeg_decoder_file_stream::jpeg_decoder_file_stream() + { + m_pFile = nullptr; + m_eof_flag = false; + m_error_flag = false; + } + + void jpeg_decoder_file_stream::close() + { + if (m_pFile) + { + fclose(m_pFile); + m_pFile = nullptr; + } + + m_eof_flag = false; + m_error_flag = false; + } + + jpeg_decoder_file_stream::~jpeg_decoder_file_stream() + { + close(); + } + + bool jpeg_decoder_file_stream::open(const char* Pfilename) + { + close(); + + m_eof_flag = false; + m_error_flag = false; + +#if defined(_MSC_VER) + m_pFile = nullptr; + fopen_s(&m_pFile, Pfilename, "rb"); +#else + m_pFile = fopen(Pfilename, "rb"); +#endif + return m_pFile != nullptr; + } + + int jpeg_decoder_file_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) + { + if (!m_pFile) + return -1; + + if (m_eof_flag) + { + *pEOF_flag = true; + return 0; + } + + if (m_error_flag) + return -1; + + int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile)); + if (bytes_read < max_bytes_to_read) + { + if (ferror(m_pFile)) + { + m_error_flag = true; + return -1; + } + + m_eof_flag = true; + *pEOF_flag = true; + } + + return bytes_read; + } + + bool jpeg_decoder_mem_stream::open(const uint8* pSrc_data, uint size) + { + close(); + m_pSrc_data = pSrc_data; + m_ofs = 0; + m_size = size; + return true; + } + + int jpeg_decoder_mem_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) + { + *pEOF_flag = false; + + if (!m_pSrc_data) + return -1; + + uint bytes_remaining = m_size - m_ofs; + if ((uint)max_bytes_to_read > bytes_remaining) + { + max_bytes_to_read = bytes_remaining; + *pEOF_flag = true; + } + + memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read); + m_ofs += max_bytes_to_read; + + return max_bytes_to_read; + } + + unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags) + { + if (!actual_comps) + return nullptr; + *actual_comps = 0; + + if ((!pStream) || (!width) || (!height) || (!req_comps)) + return nullptr; + + if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4)) + return nullptr; + + jpeg_decoder decoder(pStream, flags); + if (decoder.get_error_code() != JPGD_SUCCESS) + return nullptr; + + const int image_width = decoder.get_width(), image_height = decoder.get_height(); + *width = image_width; + *height = image_height; + *actual_comps = decoder.get_num_components(); + + if (decoder.begin_decoding() != JPGD_SUCCESS) + return nullptr; + + const int dst_bpl = image_width * req_comps; + + uint8* pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height); + if (!pImage_data) + return nullptr; + + for (int y = 0; y < image_height; y++) + { + const uint8* pScan_line; + uint scan_line_len; + if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) + { + jpgd_free(pImage_data); + return nullptr; + } + + uint8* pDst = pImage_data + y * dst_bpl; + + if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3))) + memcpy(pDst, pScan_line, dst_bpl); + else if (decoder.get_num_components() == 1) + { + if (req_comps == 3) + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst += 3; + } + } + else + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst[3] = 255; + pDst += 4; + } + } + } + else if (decoder.get_num_components() == 3) + { + if (req_comps == 1) + { + const int YR = 19595, YG = 38470, YB = 7471; + for (int x = 0; x < image_width; x++) + { + int r = pScan_line[x * 4 + 0]; + int g = pScan_line[x * 4 + 1]; + int b = pScan_line[x * 4 + 2]; + *pDst++ = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16); + } + } + else + { + for (int x = 0; x < image_width; x++) + { + pDst[0] = pScan_line[x * 4 + 0]; + pDst[1] = pScan_line[x * 4 + 1]; + pDst[2] = pScan_line[x * 4 + 2]; + pDst += 3; + } + } + } + } + + return pImage_data; + } + + unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags) + { + jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size); + return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps, flags); + } + + unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags) + { + jpgd::jpeg_decoder_file_stream file_stream; + if (!file_stream.open(pSrc_filename)) + return nullptr; + return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps, flags); + } + +} // namespace jpgd diff --git a/thirdparty/basis_universal/encoder/jpgd.h b/thirdparty/basis_universal/encoder/jpgd.h new file mode 100644 index 0000000000..86a7814cae --- /dev/null +++ b/thirdparty/basis_universal/encoder/jpgd.h @@ -0,0 +1,347 @@ +// jpgd.h - C++ class for JPEG decompression. +// Public domain, Rich Geldreich <richgel99@gmail.com> +#ifndef JPEG_DECODER_H +#define JPEG_DECODER_H + +#include <stdlib.h> +#include <stdio.h> +#include <setjmp.h> +#include <assert.h> +#include <stdint.h> + +#ifdef _MSC_VER +#define JPGD_NORETURN __declspec(noreturn) +#elif defined(__GNUC__) +#define JPGD_NORETURN __attribute__ ((noreturn)) +#else +#define JPGD_NORETURN +#endif + +#define JPGD_HUFF_TREE_MAX_LENGTH 512 +#define JPGD_HUFF_CODE_SIZE_MAX_LENGTH 256 + +namespace jpgd +{ + typedef unsigned char uint8; + typedef signed short int16; + typedef unsigned short uint16; + typedef unsigned int uint; + typedef signed int int32; + + // Loads a JPEG image from a memory buffer or a file. + // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA). + // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB). + // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly. + // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp. + unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + + // Success/failure error codes. + enum jpgd_status + { + JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1, + JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE, + JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS, + JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH, + JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER, + JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS, + JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE, + JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, + JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM, JPGD_TOO_MANY_SCANS + }; + + // Input stream interface. + // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available. + // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set. + // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer. + // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding. + class jpeg_decoder_stream + { + public: + jpeg_decoder_stream() { } + virtual ~jpeg_decoder_stream() { } + + // The read() method is called when the internal input buffer is empty. + // Parameters: + // pBuf - input buffer + // max_bytes_to_read - maximum bytes that can be written to pBuf + // pEOF_flag - set this to true if at end of stream (no more bytes remaining) + // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0). + // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full. + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) = 0; + }; + + // stdio FILE stream class. + class jpeg_decoder_file_stream : public jpeg_decoder_stream + { + jpeg_decoder_file_stream(const jpeg_decoder_file_stream&); + jpeg_decoder_file_stream& operator =(const jpeg_decoder_file_stream&); + + FILE* m_pFile; + bool m_eof_flag, m_error_flag; + + public: + jpeg_decoder_file_stream(); + virtual ~jpeg_decoder_file_stream(); + + bool open(const char* Pfilename); + void close(); + + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); + }; + + // Memory stream class. + class jpeg_decoder_mem_stream : public jpeg_decoder_stream + { + const uint8* m_pSrc_data; + uint m_ofs, m_size; + + public: + jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { } + jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { } + + virtual ~jpeg_decoder_mem_stream() { } + + bool open(const uint8* pSrc_data, uint size); + void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; } + + virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); + }; + + // Loads JPEG file from a jpeg_decoder_stream. + unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0); + + enum + { + JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4, + JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 16384, JPGD_MAX_HEIGHT = 32768, JPGD_MAX_WIDTH = 32768 + }; + + typedef int16 jpgd_quant_t; + typedef int16 jpgd_block_t; + + class jpeg_decoder + { + public: + enum + { + cFlagLinearChromaFiltering = 1 + }; + + // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc. + // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline. + jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags = cFlagLinearChromaFiltering); + + ~jpeg_decoder(); + + // Call this method after constructing the object to begin decompression. + // If JPGD_SUCCESS is returned you may then call decode() on each scanline. + + int begin_decoding(); + + // Returns the next scan line. + // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). + // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). + // Returns JPGD_SUCCESS if a scan line has been returned. + // Returns JPGD_DONE if all scan lines have been returned. + // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info. + int decode(const void** pScan_line, uint* pScan_line_len); + + inline jpgd_status get_error_code() const { return m_error_code; } + + inline int get_width() const { return m_image_x_size; } + inline int get_height() const { return m_image_y_size; } + + inline int get_num_components() const { return m_comps_in_frame; } + + inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; } + inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); } + + // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file). + inline int get_total_bytes_read() const { return m_total_bytes_read; } + + private: + jpeg_decoder(const jpeg_decoder&); + jpeg_decoder& operator =(const jpeg_decoder&); + + typedef void (*pDecode_block_func)(jpeg_decoder*, int, int, int); + + struct huff_tables + { + bool ac_table; + uint look_up[256]; + uint look_up2[256]; + uint8 code_size[JPGD_HUFF_CODE_SIZE_MAX_LENGTH]; + uint tree[JPGD_HUFF_TREE_MAX_LENGTH]; + }; + + struct coeff_buf + { + uint8* pData; + int block_num_x, block_num_y; + int block_len_x, block_len_y; + int block_size; + }; + + struct mem_block + { + mem_block* m_pNext; + size_t m_used_count; + size_t m_size; + char m_data[1]; + }; + + jmp_buf m_jmp_state; + uint32_t m_flags; + mem_block* m_pMem_blocks; + int m_image_x_size; + int m_image_y_size; + jpeg_decoder_stream* m_pStream; + + int m_progressive_flag; + + uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES]; + uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size + uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size + jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables + int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported) + int m_comps_in_frame; // # of components in frame + int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor + int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor + int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector + int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID + int m_comp_h_blocks[JPGD_MAX_COMPONENTS]; + int m_comp_v_blocks[JPGD_MAX_COMPONENTS]; + int m_comps_in_scan; // # of components in scan + int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan + int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector + int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector + int m_spectral_start; // spectral selection start + int m_spectral_end; // spectral selection end + int m_successive_low; // successive approximation low + int m_successive_high; // successive approximation high + int m_max_mcu_x_size; // MCU's max. X size in pixels + int m_max_mcu_y_size; // MCU's max. Y size in pixels + int m_blocks_per_mcu; + int m_max_blocks_per_row; + int m_mcus_per_row, m_mcus_per_col; + int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU]; + int m_total_lines_left; // total # lines left in image + int m_mcu_lines_left; // total # lines left in this MCU + int m_num_buffered_scanlines; + int m_real_dest_bytes_per_scan_line; + int m_dest_bytes_per_scan_line; // rounded up + int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y) + huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES]; + coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS]; + coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS]; + int m_eob_run; + int m_block_y_mcu[JPGD_MAX_COMPONENTS]; + uint8* m_pIn_buf_ofs; + int m_in_buf_left; + int m_tem_flag; + + uint8 m_in_buf_pad_start[64]; + uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128]; + uint8 m_in_buf_pad_end[64]; + + int m_bits_left; + uint m_bit_buf; + int m_restart_interval; + int m_restarts_left; + int m_next_restart_num; + int m_max_mcus_per_row; + int m_max_blocks_per_mcu; + + int m_max_mcus_per_col; + uint m_last_dc_val[JPGD_MAX_COMPONENTS]; + jpgd_block_t* m_pMCU_coefficients; + int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU]; + uint8* m_pSample_buf; + uint8* m_pSample_buf_prev; + int m_crr[256]; + int m_cbb[256]; + int m_crg[256]; + int m_cbg[256]; + uint8* m_pScan_line_0; + uint8* m_pScan_line_1; + jpgd_status m_error_code; + int m_total_bytes_read; + + bool m_ready_flag; + bool m_eof_flag; + bool m_sample_buf_prev_valid; + + inline int check_sample_buf_ofs(int ofs) const { assert(ofs >= 0); assert(ofs < m_max_blocks_per_row * 64); return ofs; } + void free_all_blocks(); + JPGD_NORETURN void stop_decoding(jpgd_status status); + void* alloc(size_t n, bool zero = false); + void word_clear(void* p, uint16 c, uint n); + void prep_in_buffer(); + void read_dht_marker(); + void read_dqt_marker(); + void read_sof_marker(); + void skip_variable_marker(); + void read_dri_marker(); + void read_sos_marker(); + int next_marker(); + int process_markers(); + void locate_soi_marker(); + void locate_sof_marker(); + int locate_sos_marker(); + void init(jpeg_decoder_stream* pStream, uint32_t flags); + void create_look_ups(); + void fix_in_buffer(); + void transform_mcu(int mcu_row); + coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y); + inline jpgd_block_t* coeff_buf_getp(coeff_buf* cb, int block_x, int block_y); + void load_next_row(); + void decode_next_row(); + void make_huff_table(int index, huff_tables* pH); + void check_quant_tables(); + void check_huff_tables(); + bool calc_mcu_block_order(); + int init_scan(); + void init_frame(); + void process_restart(); + void decode_scan(pDecode_block_func decode_block_func); + void init_progressive(); + void init_sequential(); + void decode_start(); + void decode_init(jpeg_decoder_stream* pStream, uint32_t flags); + void H2V2Convert(); + uint32_t H2V2ConvertFiltered(); + void H2V1Convert(); + void H2V1ConvertFiltered(); + void H1V2Convert(); + void H1V2ConvertFiltered(); + void H1V1Convert(); + void gray_convert(); + void find_eoi(); + inline uint get_char(); + inline uint get_char(bool* pPadding_flag); + inline void stuff_char(uint8 q); + inline uint8 get_octet(); + inline uint get_bits(int num_bits); + inline uint get_bits_no_markers(int numbits); + inline int huff_decode(huff_tables* pH); + inline int huff_decode(huff_tables* pH, int& extrabits); + + // Clamps a value between 0-255. + static inline uint8 clamp(int i) + { + if (static_cast<uint>(i) > 255) + i = (((~i) >> 31) & 0xFF); + return static_cast<uint8>(i); + } + int decode_next_mcu_row(); + + static void decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y); + static void decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y); + }; + +} // namespace jpgd + +#endif // JPEG_DECODER_H diff --git a/thirdparty/basis_universal/lodepng.cpp b/thirdparty/basis_universal/encoder/lodepng.cpp index cf964d0555..63adcf49b6 100644 --- a/thirdparty/basis_universal/lodepng.cpp +++ b/thirdparty/basis_universal/encoder/lodepng.cpp @@ -29,6 +29,7 @@ Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for */ #ifdef _MSC_VER +#define _CRT_SECURE_NO_DEPRECATE #pragma warning (disable : 4201) #ifndef BASISU_NO_ITERATOR_DEBUG_LEVEL @@ -200,6 +201,7 @@ static void uivector_init(uivector* p) { /*returns 1 if success, 0 if failure ==> nothing done*/ static unsigned uivector_push_back(uivector* p, unsigned c) { if(!uivector_resize(p, p->size + 1)) return 0; + if (!p->data) return 0; p->data[p->size - 1] = c; return 1; } diff --git a/thirdparty/basis_universal/lodepng.h b/thirdparty/basis_universal/encoder/lodepng.h index 476a2061e2..476a2061e2 100644 --- a/thirdparty/basis_universal/lodepng.h +++ b/thirdparty/basis_universal/encoder/lodepng.h diff --git a/thirdparty/basis_universal/transcoder/basisu.h b/thirdparty/basis_universal/transcoder/basisu.h index 25600a69bf..f33baf67c8 100644 --- a/thirdparty/basis_universal/transcoder/basisu.h +++ b/thirdparty/basis_universal/transcoder/basisu.h @@ -1,5 +1,5 @@ // basisu.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -41,10 +41,6 @@ #endif #endif // defined(_DEBUG) || defined(DEBUG) - #ifndef NOMINMAX - #define NOMINMAX - #endif - #endif // BASISU_NO_ITERATOR_DEBUG_LEVEL #endif // _MSC_VER @@ -63,10 +59,11 @@ #include <functional> #include <iterator> #include <type_traits> -#include <vector> #include <assert.h> #include <random> +#include "basisu_containers.h" + #ifdef max #undef max #endif @@ -79,20 +76,20 @@ #define strcasecmp _stricmp #endif -// Set to one to enable debug printf()'s when any errors occur, for development/debugging. -#ifndef BASISU_DEVEL_MESSAGES -#define BASISU_DEVEL_MESSAGES 0 +// Set to one to enable debug printf()'s when any errors occur, for development/debugging. Especially useful for WebGL development. +#ifndef BASISU_FORCE_DEVEL_MESSAGES +#define BASISU_FORCE_DEVEL_MESSAGES 0 #endif #define BASISU_NOTE_UNUSED(x) (void)(x) #define BASISU_ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(x) x(const x &) = delete; x& operator= (const x &) = delete; #define BASISU_ASSUME(x) static_assert(x, #x); -#define BASISU_OFFSETOF(s, m) (uint32_t)(intptr_t)(&((s *)(0))->m) +#define BASISU_OFFSETOF(s, m) offsetof(s, m) #define BASISU_STRINGIZE(x) #x #define BASISU_STRINGIZE2(x) BASISU_STRINGIZE(x) -#if BASISU_DEVEL_MESSAGES +#if BASISU_FORCE_DEVEL_MESSAGES #define BASISU_DEVEL_ERROR(...) do { basisu::debug_printf(__VA_ARGS__); } while(0) #else #define BASISU_DEVEL_ERROR(...) @@ -108,26 +105,43 @@ namespace basisu const char BASISU_PATH_SEPERATOR_CHAR = '/'; #endif - typedef std::vector<uint8_t> uint8_vec; - typedef std::vector<int16_t> int16_vec; - typedef std::vector<uint16_t> uint16_vec; - typedef std::vector<uint32_t> uint_vec; - typedef std::vector<uint64_t> uint64_vec; - typedef std::vector<int> int_vec; - typedef std::vector<bool> bool_vec; + typedef basisu::vector<uint8_t> uint8_vec; + typedef basisu::vector<int16_t> int16_vec; + typedef basisu::vector<uint16_t> uint16_vec; + typedef basisu::vector<uint32_t> uint_vec; + typedef basisu::vector<uint64_t> uint64_vec; + typedef basisu::vector<int> int_vec; + typedef basisu::vector<bool> bool_vec; void enable_debug_printf(bool enabled); void debug_printf(const char *pFmt, ...); + template <typename T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); } template <typename T0, typename T1> inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; } template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; } template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } + template <typename S> inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } template <typename S> inline S minimum(S a, S b) { return (a < b) ? a : b; } template <typename S> inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } + template <typename S> inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } + + inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + inline float saturate(float value) { return clampf(value, 0, 1.0f); } + inline uint8_t minimumub(uint8_t a, uint8_t b) { return (a < b) ? a : b; } + inline uint32_t minimumu(uint32_t a, uint32_t b) { return (a < b) ? a : b; } + inline int32_t minimumi(int32_t a, int32_t b) { return (a < b) ? a : b; } + inline float minimumf(float a, float b) { return (a < b) ? a : b; } + inline uint8_t maximumub(uint8_t a, uint8_t b) { return (a > b) ? a : b; } + inline uint32_t maximumu(uint32_t a, uint32_t b) { return (a > b) ? a : b; } + inline int32_t maximumi(int32_t a, int32_t b) { return (a > b) ? a : b; } + inline float maximumf(float a, float b) { return (a > b) ? a : b; } + inline int squarei(int i) { return i * i; } + inline float squaref(float i) { return i * i; } + template<typename T> inline T square(T a) { return a * a; } template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } @@ -137,12 +151,10 @@ namespace basisu template<typename T> inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } template<typename T> inline typename T::value_type *enlarge_vector(T &vec, size_t n) { size_t cs = vec.size(); vec.resize(cs + n); return &vec[cs]; } - template<typename S> inline S square(S val) { return val * val; } - inline bool is_pow2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } inline bool is_pow2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); } - template<typename T> inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); return v; } + template<typename T> inline T open_range_check(T v, T minv, T maxv) { assert(v >= minv && v < maxv); BASISU_NOTE_UNUSED(minv); BASISU_NOTE_UNUSED(maxv); return v; } template<typename T> inline T open_range_check(T v, T maxv) { assert(v < maxv); BASISU_NOTE_UNUSED(maxv); return v; } inline uint32_t total_bits(uint32_t v) { uint32_t l = 0; for ( ; v > 0U; ++l) v >>= 1; return l; } @@ -244,27 +256,92 @@ namespace basisu if ((ha <= lb) || (la >= hb)) return false; return true; } + + static inline uint32_t read_le_dword(const uint8_t *pBytes) + { + return (pBytes[3] << 24U) | (pBytes[2] << 16U) | (pBytes[1] << 8U) | (pBytes[0]); + } + + static inline void write_le_dword(uint8_t* pBytes, uint32_t val) + { + pBytes[0] = (uint8_t)val; + pBytes[1] = (uint8_t)(val >> 8U); + pBytes[2] = (uint8_t)(val >> 16U); + pBytes[3] = (uint8_t)(val >> 24U); + } - // Always little endian 2-4 byte unsigned int + // Always little endian 1-8 byte unsigned int template<uint32_t NumBytes> struct packed_uint { uint8_t m_bytes[NumBytes]; - inline packed_uint() { static_assert(NumBytes <= 4, "NumBytes <= 4"); } - inline packed_uint(uint32_t v) { *this = v; } + inline packed_uint() { static_assert(NumBytes <= sizeof(uint64_t), "Invalid NumBytes"); } + inline packed_uint(uint64_t v) { *this = v; } inline packed_uint(const packed_uint& other) { *this = other; } + + inline packed_uint& operator= (uint64_t v) + { + for (uint32_t i = 0; i < NumBytes; i++) + m_bytes[i] = static_cast<uint8_t>(v >> (i * 8)); + return *this; + } - inline packed_uint& operator= (uint32_t v) { for (uint32_t i = 0; i < NumBytes; i++) m_bytes[i] = static_cast<uint8_t>(v >> (i * 8)); return *this; } + inline packed_uint& operator= (const packed_uint& rhs) + { + memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); + return *this; + } inline operator uint32_t() const { switch (NumBytes) { - case 1: return m_bytes[0]; - case 2: return (m_bytes[1] << 8U) | m_bytes[0]; - case 3: return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | (m_bytes[0]); - default: return (m_bytes[3] << 24U) | (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | (m_bytes[0]); + case 1: + { + return m_bytes[0]; + } + case 2: + { + return (m_bytes[1] << 8U) | m_bytes[0]; + } + case 3: + { + return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | m_bytes[0]; + } + case 4: + { + return read_le_dword(m_bytes); + } + case 5: + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = m_bytes[4]; + return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U); + } + case 6: + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = (m_bytes[5] << 8U) | m_bytes[4]; + return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U); + } + case 7: + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = (m_bytes[6] << 16U) | (m_bytes[5] << 8U) | m_bytes[4]; + return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U); + } + case 8: + { + uint32_t l = read_le_dword(m_bytes); + uint32_t h = read_le_dword(m_bytes + 4); + return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U); + } + default: + { + assert(0); + return 0; + } } } }; @@ -278,7 +355,7 @@ namespace basisu enum { cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, - cHuffmanFastLookupBits = 10, cHuffmanFastLookupSize = 1 << cHuffmanFastLookupBits, + cHuffmanFastLookupBits = 10, cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, // Small zero runs @@ -308,15 +385,15 @@ namespace basisu // Block-based formats cETC1, // ETC1 cETC1S, // ETC1 (subset: diff colors only, no subblocks) - cETC2_RGB, // ETC2 color block - cETC2_RGBA, // ETC2 alpha block followed by ETC2 color block + cETC2_RGB, // ETC2 color block (basisu doesn't support ETC2 planar/T/H modes - just basic ETC1) + cETC2_RGBA, // ETC2 EAC alpha block followed by ETC2 color block cETC2_ALPHA, // ETC2 EAC alpha block cBC1, // DXT1 - cBC3, // DXT5 (DXT5A block followed by a DXT1 block) + cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block) cBC4, // DXT5A - cBC5, // 3DC/DXN (two DXT5A blocks) + cBC5, // 3DC/DXN (two BC4/DXT5A blocks) cBC7, - cASTC4x4, + cASTC4x4, // LDR only cPVRTC1_4_RGB, cPVRTC1_4_RGBA, cATC_RGB, @@ -325,6 +402,9 @@ namespace basisu cPVRTC2_4_RGBA, cETC2_R11_EAC, cETC2_RG11_EAC, + cUASTC4x4, + cBC1_NV, + cBC1_AMD, // Uncompressed/raw pixels cRGBA32, @@ -343,6 +423,8 @@ namespace basisu case texture_format::cETC2_RGB: case texture_format::cETC2_ALPHA: case texture_format::cBC1: + case texture_format::cBC1_NV: + case texture_format::cBC1_AMD: case texture_format::cBC4: case texture_format::cPVRTC1_4_RGB: case texture_format::cPVRTC1_4_RGBA: diff --git a/thirdparty/basis_universal/transcoder/basisu_containers.h b/thirdparty/basis_universal/transcoder/basisu_containers.h new file mode 100644 index 0000000000..1ca4bab307 --- /dev/null +++ b/thirdparty/basis_universal/transcoder/basisu_containers.h @@ -0,0 +1,1908 @@ +// basisu_containers.h +#pragma once +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <assert.h> +#include <algorithm> + +#if defined(__linux__) && !defined(ANDROID) +// Only for malloc_usable_size() in basisu_containers_impl.h +#include <malloc.h> +#define HAS_MALLOC_USABLE_SIZE 1 +#endif + +#ifdef _MSC_VER +#define BASISU_FORCE_INLINE __forceinline +#else +#define BASISU_FORCE_INLINE inline +#endif + +namespace basisu +{ + enum { cInvalidIndex = -1 }; + + namespace helpers + { + inline bool is_power_of_2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } + inline bool is_power_of_2(uint64_t x) { return x && ((x & (x - 1U)) == 0U); } + template<class T> const T& minimum(const T& a, const T& b) { return (b < a) ? b : a; } + template<class T> const T& maximum(const T& a, const T& b) { return (a < b) ? b : a; } + + inline uint32_t floor_log2i(uint32_t v) + { + uint32_t l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint32_t next_pow2(uint32_t val) + { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + inline uint64_t next_pow2(uint64_t val) + { + val--; + val |= val >> 32; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + } // namespace helpers + + template <typename T> + inline T* construct(T* p) + { + return new (static_cast<void*>(p)) T; + } + + template <typename T, typename U> + inline T* construct(T* p, const U& init) + { + return new (static_cast<void*>(p)) T(init); + } + + template <typename T> + inline void construct_array(T* p, size_t n) + { + T* q = p + n; + for (; p != q; ++p) + new (static_cast<void*>(p)) T; + } + + template <typename T, typename U> + inline void construct_array(T* p, size_t n, const U& init) + { + T* q = p + n; + for (; p != q; ++p) + new (static_cast<void*>(p)) T(init); + } + + template <typename T> + inline void destruct(T* p) + { + (void)p; + p->~T(); + } + + template <typename T> inline void destruct_array(T* p, size_t n) + { + T* q = p + n; + for (; p != q; ++p) + p->~T(); + } + + template<typename T> struct int_traits { enum { cMin = INT32_MIN, cMax = INT32_MAX, cSigned = true }; }; + + template<> struct int_traits<int8_t> { enum { cMin = INT8_MIN, cMax = INT8_MAX, cSigned = true }; }; + template<> struct int_traits<int16_t> { enum { cMin = INT16_MIN, cMax = INT16_MAX, cSigned = true }; }; + template<> struct int_traits<int32_t> { enum { cMin = INT32_MIN, cMax = INT32_MAX, cSigned = true }; }; + + template<> struct int_traits<uint8_t> { enum { cMin = 0, cMax = UINT8_MAX, cSigned = false }; }; + template<> struct int_traits<uint16_t> { enum { cMin = 0, cMax = UINT16_MAX, cSigned = false }; }; + template<> struct int_traits<uint32_t> { enum { cMin = 0, cMax = UINT32_MAX, cSigned = false }; }; + + template<typename T> + struct scalar_type + { + enum { cFlag = false }; + static inline void construct(T* p) { basisu::construct(p); } + static inline void construct(T* p, const T& init) { basisu::construct(p, init); } + static inline void construct_array(T* p, size_t n) { basisu::construct_array(p, n); } + static inline void destruct(T* p) { basisu::destruct(p); } + static inline void destruct_array(T* p, size_t n) { basisu::destruct_array(p, n); } + }; + + template<typename T> struct scalar_type<T*> + { + enum { cFlag = true }; + static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } + static inline void construct(T** p, T* init) { *p = init; } + static inline void construct_array(T** p, size_t n) { memset(p, 0, sizeof(T*) * n); } + static inline void destruct(T** p) { p; } + static inline void destruct_array(T** p, size_t n) { p, n; } + }; + +#define BASISU_DEFINE_BUILT_IN_TYPE(X) \ + template<> struct scalar_type<X> { \ + enum { cFlag = true }; \ + static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ + static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ + static inline void construct_array(X* p, size_t n) { memset(p, 0, sizeof(X) * n); } \ + static inline void destruct(X* p) { p; } \ + static inline void destruct_array(X* p, size_t n) { p, n; } }; + + BASISU_DEFINE_BUILT_IN_TYPE(bool) + BASISU_DEFINE_BUILT_IN_TYPE(char) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned char) + BASISU_DEFINE_BUILT_IN_TYPE(short) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned short) + BASISU_DEFINE_BUILT_IN_TYPE(int) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned int) + BASISU_DEFINE_BUILT_IN_TYPE(long) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned long) +#ifdef __GNUC__ + BASISU_DEFINE_BUILT_IN_TYPE(long long) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned long long) +#else + BASISU_DEFINE_BUILT_IN_TYPE(__int64) + BASISU_DEFINE_BUILT_IN_TYPE(unsigned __int64) +#endif + BASISU_DEFINE_BUILT_IN_TYPE(float) + BASISU_DEFINE_BUILT_IN_TYPE(double) + BASISU_DEFINE_BUILT_IN_TYPE(long double) + +#undef BASISU_DEFINE_BUILT_IN_TYPE + + template<typename T> + struct bitwise_movable { enum { cFlag = false }; }; + +#define BASISU_DEFINE_BITWISE_MOVABLE(Q) template<> struct bitwise_movable<Q> { enum { cFlag = true }; }; + + template<typename T> + struct bitwise_copyable { enum { cFlag = false }; }; + +#define BASISU_DEFINE_BITWISE_COPYABLE(Q) template<> struct bitwise_copyable<Q> { enum { cFlag = true }; }; + +#define BASISU_IS_POD(T) __is_pod(T) + +#define BASISU_IS_SCALAR_TYPE(T) (scalar_type<T>::cFlag) + +#if defined(__GNUC__) && __GNUC__<5 + #define BASISU_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__) +#else + #define BASISU_IS_TRIVIALLY_COPYABLE(...) std::is_trivially_copyable<__VA_ARGS__>::value +#endif + +// TODO: clean this up +#define BASISU_IS_BITWISE_COPYABLE(T) (BASISU_IS_SCALAR_TYPE(T) || BASISU_IS_POD(T) || BASISU_IS_TRIVIALLY_COPYABLE(T) || (bitwise_copyable<T>::cFlag)) + +#define BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T) (BASISU_IS_BITWISE_COPYABLE(T) || (bitwise_movable<T>::cFlag)) + +#define BASISU_HAS_DESTRUCTOR(T) ((!scalar_type<T>::cFlag) && (!__is_pod(T))) + + typedef char(&yes_t)[1]; + typedef char(&no_t)[2]; + + template <class U> yes_t class_test(int U::*); + template <class U> no_t class_test(...); + + template <class T> struct is_class + { + enum { value = (sizeof(class_test<T>(0)) == sizeof(yes_t)) }; + }; + + template <typename T> struct is_pointer + { + enum { value = false }; + }; + + template <typename T> struct is_pointer<T*> + { + enum { value = true }; + }; + + struct empty_type { }; + + BASISU_DEFINE_BITWISE_COPYABLE(empty_type); + BASISU_DEFINE_BITWISE_MOVABLE(empty_type); + + template<typename T> struct rel_ops + { + friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } + friend bool operator> (const T& x, const T& y) { return (y < x); } + friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } + friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } + }; + + struct elemental_vector + { + void* m_p; + uint32_t m_size; + uint32_t m_capacity; + + typedef void (*object_mover)(void* pDst, void* pSrc, uint32_t num); + + bool increase_capacity(uint32_t min_new_capacity, bool grow_hint, uint32_t element_size, object_mover pRelocate, bool nofail); + }; + + template<typename T> + class vector : public rel_ops< vector<T> > + { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector() : + m_p(NULL), + m_size(0), + m_capacity(0) + { + } + + inline vector(uint32_t n, const T& init) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + increase_capacity(n, false); + construct_array(m_p, n, init); + m_size = n; + } + + inline vector(const vector& other) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + increase_capacity(other.m_size, false); + + m_size = other.m_size; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + memcpy(m_p, other.m_p, m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint32_t i = m_size; i > 0; i--) + construct(pDst++, *pSrc++); + } + } + + inline explicit vector(size_t size) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + resize(size); + } + + inline ~vector() + { + if (m_p) + { + scalar_type<T>::destruct_array(m_p, m_size); + free(m_p); + } + } + + inline vector& operator= (const vector& other) + { + if (this == &other) + return *this; + + if (m_capacity >= other.m_size) + resize(0); + else + { + clear(); + increase_capacity(other.m_size, false); + } + + if (BASISU_IS_BITWISE_COPYABLE(T)) + memcpy(m_p, other.m_p, other.m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint32_t i = other.m_size; i > 0; i--) + construct(pDst++, *pSrc++); + } + + m_size = other.m_size; + + return *this; + } + + BASISU_FORCE_INLINE const T* begin() const { return m_p; } + BASISU_FORCE_INLINE T* begin() { return m_p; } + + BASISU_FORCE_INLINE const T* end() const { return m_p + m_size; } + BASISU_FORCE_INLINE T* end() { return m_p + m_size; } + + BASISU_FORCE_INLINE bool empty() const { return !m_size; } + BASISU_FORCE_INLINE uint32_t size() const { return m_size; } + BASISU_FORCE_INLINE uint32_t size_in_bytes() const { return m_size * sizeof(T); } + BASISU_FORCE_INLINE uint32_t capacity() const { return m_capacity; } + + // operator[] will assert on out of range indices, but in final builds there is (and will never be) any range checking on this method. + //BASISU_FORCE_INLINE const T& operator[] (uint32_t i) const { assert(i < m_size); return m_p[i]; } + //BASISU_FORCE_INLINE T& operator[] (uint32_t i) { assert(i < m_size); return m_p[i]; } + + BASISU_FORCE_INLINE const T& operator[] (size_t i) const { assert(i < m_size); return m_p[i]; } + BASISU_FORCE_INLINE T& operator[] (size_t i) { assert(i < m_size); return m_p[i]; } + + // at() always includes range checking, even in final builds, unlike operator []. + // The first element is returned if the index is out of range. + BASISU_FORCE_INLINE const T& at(size_t i) const { assert(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } + BASISU_FORCE_INLINE T& at(size_t i) { assert(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } + + BASISU_FORCE_INLINE const T& front() const { assert(m_size); return m_p[0]; } + BASISU_FORCE_INLINE T& front() { assert(m_size); return m_p[0]; } + + BASISU_FORCE_INLINE const T& back() const { assert(m_size); return m_p[m_size - 1]; } + BASISU_FORCE_INLINE T& back() { assert(m_size); return m_p[m_size - 1]; } + + BASISU_FORCE_INLINE const T* get_ptr() const { return m_p; } + BASISU_FORCE_INLINE T* get_ptr() { return m_p; } + + BASISU_FORCE_INLINE const T* data() const { return m_p; } + BASISU_FORCE_INLINE T* data() { return m_p; } + + // clear() sets the container to empty, then frees the allocated block. + inline void clear() + { + if (m_p) + { + scalar_type<T>::destruct_array(m_p, m_size); + free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + } + + inline void clear_no_destruction() + { + if (m_p) + { + free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + } + + inline void reserve(size_t new_capacity_size_t) + { + if (new_capacity_size_t > UINT32_MAX) + { + assert(0); + return; + } + + uint32_t new_capacity = (uint32_t)new_capacity_size_t; + + if (new_capacity > m_capacity) + increase_capacity(new_capacity, false); + else if (new_capacity < m_capacity) + { + // Must work around the lack of a "decrease_capacity()" method. + // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. + vector tmp; + tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false); + tmp = *this; + swap(tmp); + } + } + + inline bool try_reserve(size_t new_capacity_size_t) + { + if (new_capacity_size_t > UINT32_MAX) + { + assert(0); + return false; + } + + uint32_t new_capacity = (uint32_t)new_capacity_size_t; + + if (new_capacity > m_capacity) + { + if (!increase_capacity(new_capacity, false)) + return false; + } + else if (new_capacity < m_capacity) + { + // Must work around the lack of a "decrease_capacity()" method. + // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. + vector tmp; + tmp.increase_capacity(helpers::maximum(m_size, new_capacity), false); + tmp = *this; + swap(tmp); + } + + return true; + } + + // resize(0) sets the container to empty, but does not free the allocated block. + inline void resize(size_t new_size_size_t, bool grow_hint = false) + { + if (new_size_size_t > UINT32_MAX) + { + assert(0); + return; + } + + uint32_t new_size = (uint32_t)new_size_size_t; + + if (m_size != new_size) + { + if (new_size < m_size) + scalar_type<T>::destruct_array(m_p + new_size, m_size - new_size); + else + { + if (new_size > m_capacity) + increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint); + + scalar_type<T>::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + } + + inline bool try_resize(size_t new_size_size_t, bool grow_hint = false) + { + if (new_size_size_t > UINT32_MAX) + { + assert(0); + return false; + } + + uint32_t new_size = (uint32_t)new_size_size_t; + + if (m_size != new_size) + { + if (new_size < m_size) + scalar_type<T>::destruct_array(m_p + new_size, m_size - new_size); + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true)) + return false; + } + + scalar_type<T>::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + return true; + } + + // If size >= capacity/2, reset() sets the container's size to 0 but doesn't free the allocated block (because the container may be similarly loaded in the future). + // Otherwise it blows away the allocated block. See http://www.codercorner.com/blog/?p=494 + inline void reset() + { + if (m_size >= (m_capacity >> 1)) + resize(0); + else + clear(); + } + + inline T* enlarge(uint32_t i) + { + uint32_t cur_size = m_size; + resize(cur_size + i, true); + return get_ptr() + cur_size; + } + + inline T* try_enlarge(uint32_t i) + { + uint32_t cur_size = m_size; + if (!try_resize(cur_size + i, true)) + return NULL; + return get_ptr() + cur_size; + } + + BASISU_FORCE_INLINE void push_back(const T& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + increase_capacity(m_size + 1, true); + + scalar_type<T>::construct(m_p + m_size, obj); + m_size++; + } + + inline bool try_push_back(const T& obj) + { + assert(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + scalar_type<T>::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + inline void push_back_value(T obj) + { + if (m_size >= m_capacity) + increase_capacity(m_size + 1, true); + + scalar_type<T>::construct(m_p + m_size, obj); + m_size++; + } + + inline void pop_back() + { + assert(m_size); + + if (m_size) + { + m_size--; + scalar_type<T>::destruct(&m_p[m_size]); + } + } + + inline void insert(uint32_t index, const T* p, uint32_t n) + { + assert(index <= m_size); + if (!n) + return; + + const uint32_t orig_size = m_size; + resize(m_size + n, true); + + const uint32_t num_to_move = orig_size - index; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + // This overwrites the destination object bits, but bitwise copyable means we don't need to worry about destruction. + memmove(m_p + index + n, m_p + index, sizeof(T) * num_to_move); + } + else + { + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast<T*>(pSrc) + n; + + for (uint32_t i = 0; i < num_to_move; i++) + { + assert((pDst - m_p) < (int)m_size); + *pDst-- = *pSrc--; + } + } + + T* pDst = m_p + index; + + if (BASISU_IS_BITWISE_COPYABLE(T)) + { + // This copies in the new bits, overwriting the existing objects, which is OK for copyable types that don't need destruction. + memcpy(pDst, p, sizeof(T) * n); + } + else + { + for (uint32_t i = 0; i < n; i++) + { + assert((pDst - m_p) < (int)m_size); + *pDst++ = *p++; + } + } + } + + inline void insert(T* p, const T& obj) + { + int64_t ofs = p - begin(); + if ((ofs < 0) || (ofs > UINT32_MAX)) + { + assert(0); + return; + } + + insert((uint32_t)ofs, &obj, 1); + } + + // push_front() isn't going to be very fast - it's only here for usability. + inline void push_front(const T& obj) + { + insert(0, &obj, 1); + } + + vector& append(const vector& other) + { + if (other.m_size) + insert(m_size, &other[0], other.m_size); + return *this; + } + + vector& append(const T* p, uint32_t n) + { + if (n) + insert(m_size, p, n); + return *this; + } + + inline void erase(uint32_t start, uint32_t n) + { + assert((start + n) <= m_size); + if ((start + n) > m_size) + return; + + if (!n) + return; + + const uint32_t num_to_move = m_size - (start + n); + + T* pDst = m_p + start; + + const T* pSrc = m_p + start + n; + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) + { + // This test is overly cautious. + if ((!BASISU_IS_BITWISE_COPYABLE(T)) || (BASISU_HAS_DESTRUCTOR(T))) + { + // Type has been marked explictly as bitwise movable, which means we can move them around but they may need to be destructed. + // First destroy the erased objects. + scalar_type<T>::destruct_array(pDst, n); + } + + // Copy "down" the objects to preserve, filling in the empty slots. + memmove(pDst, pSrc, num_to_move * sizeof(T)); + } + else + { + // Type is not bitwise copyable or movable. + // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. + T* pDst_end = pDst + num_to_move; + while (pDst != pDst_end) + *pDst++ = *pSrc++; + + scalar_type<T>::destruct_array(pDst_end, n); + } + + m_size -= n; + } + + inline void erase(uint32_t index) + { + erase(index, 1); + } + + inline void erase(T* p) + { + assert((p >= m_p) && (p < (m_p + m_size))); + erase(static_cast<uint32_t>(p - m_p)); + } + + inline void erase(T *pFirst, T *pEnd) + { + assert(pFirst <= pEnd); + assert(pFirst >= begin() && pFirst <= end()); + assert(pEnd >= begin() && pEnd <= end()); + + int64_t ofs = pFirst - begin(); + if ((ofs < 0) || (ofs > UINT32_MAX)) + { + assert(0); + return; + } + + int64_t n = pEnd - pFirst; + if ((n < 0) || (n > UINT32_MAX)) + { + assert(0); + return; + } + + erase((uint32_t)ofs, (uint32_t)n); + } + + void erase_unordered(uint32_t index) + { + assert(index < m_size); + + if ((index + 1) < m_size) + (*this)[index] = back(); + + pop_back(); + } + + inline bool operator== (const vector& rhs) const + { + if (m_size != rhs.m_size) + return false; + else if (m_size) + { + if (scalar_type<T>::cFlag) + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + else + { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (uint32_t i = m_size; i; i--) + if (!(*pSrc++ == *pDst++)) + return false; + } + } + + return true; + } + + inline bool operator< (const vector& rhs) const + { + const uint32_t min_size = helpers::minimum(m_size, rhs.m_size); + + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; + + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) + { + pSrc++; + pDst++; + } + + if (pSrc < pSrc_end) + return *pSrc < *pDst; + + return m_size < rhs.m_size; + } + + inline void swap(vector& other) + { + std::swap(m_p, other.m_p); + std::swap(m_size, other.m_size); + std::swap(m_capacity, other.m_capacity); + } + + inline void sort() + { + std::sort(begin(), end()); + } + + inline void unique() + { + if (!empty()) + { + sort(); + + resize(std::unique(begin(), end()) - begin()); + } + } + + inline void reverse() + { + uint32_t j = m_size >> 1; + for (uint32_t i = 0; i < j; i++) + std::swap(m_p[i], m_p[m_size - 1 - i]); + } + + inline int find(const T& key) const + { + const T* p = m_p; + const T* p_end = m_p + m_size; + + uint32_t index = 0; + + while (p != p_end) + { + if (key == *p) + return index; + + p++; + index++; + } + + return cInvalidIndex; + } + + inline int find_sorted(const T& key) const + { + if (m_size) + { + // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. + int i = ((m_size + 1) >> 1) - 1; + int m = m_size; + + for (; ; ) + { + assert(i >= 0 && i < (int)m_size); + const T* pKey_i = m_p + i; + int cmp = key < *pKey_i; +#if defined(_DEBUG) || defined(DEBUG) + int cmp2 = *pKey_i < key; + assert((cmp != cmp2) || (key == *pKey_i)); +#endif + if ((!cmp) && (key == *pKey_i)) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + if (i < 0) + break; + + assert(i >= 0 && i < (int)m_size); + pKey_i = m_p + i; + cmp = key < *pKey_i; +#if defined(_DEBUG) || defined(DEBUG) + cmp2 = *pKey_i < key; + assert((cmp != cmp2) || (key == *pKey_i)); +#endif + if ((!cmp) && (key == *pKey_i)) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + if (i < 0) + break; + } + } + + return cInvalidIndex; + } + + template<typename Q> + inline int find_sorted(const T& key, Q less_than) const + { + if (m_size) + { + // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. + int i = ((m_size + 1) >> 1) - 1; + int m = m_size; + + for (; ; ) + { + assert(i >= 0 && i < (int)m_size); + const T* pKey_i = m_p + i; + int cmp = less_than(key, *pKey_i); + if ((!cmp) && (!less_than(*pKey_i, key))) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + if (i < 0) + break; + + assert(i >= 0 && i < (int)m_size); + pKey_i = m_p + i; + cmp = less_than(key, *pKey_i); + if ((!cmp) && (!less_than(*pKey_i, key))) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + if (i < 0) + break; + } + } + + return cInvalidIndex; + } + + inline uint32_t count_occurences(const T& key) const + { + uint32_t c = 0; + + const T* p = m_p; + const T* p_end = m_p + m_size; + + while (p != p_end) + { + if (key == *p) + c++; + + p++; + } + + return c; + } + + inline void set_all(const T& o) + { + if ((sizeof(T) == 1) && (scalar_type<T>::cFlag)) + memset(m_p, *reinterpret_cast<const uint8_t*>(&o), m_size); + else + { + T* pDst = m_p; + T* pDst_end = pDst + m_size; + while (pDst != pDst_end) + *pDst++ = o; + } + } + + // Caller assumes ownership of the heap block associated with the container. Container is cleared. + inline void* assume_ownership() + { + T* p = m_p; + m_p = NULL; + m_size = 0; + m_capacity = 0; + return p; + } + + // Caller is granting ownership of the indicated heap block. + // Block must have size constructed elements, and have enough room for capacity elements. + inline bool grant_ownership(T* p, uint32_t size, uint32_t capacity) + { + // To to prevent the caller from obviously shooting themselves in the foot. + if (((p + capacity) > m_p) && (p < (m_p + m_capacity))) + { + // Can grant ownership of a block inside the container itself! + assert(0); + return false; + } + + if (size > capacity) + { + assert(0); + return false; + } + + if (!p) + { + if (capacity) + { + assert(0); + return false; + } + } + else if (!capacity) + { + assert(0); + return false; + } + + clear(); + m_p = p; + m_size = size; + m_capacity = capacity; + return true; + } + + private: + T* m_p; + uint32_t m_size; + uint32_t m_capacity; + + template<typename Q> struct is_vector { enum { cFlag = false }; }; + template<typename Q> struct is_vector< vector<Q> > { enum { cFlag = true }; }; + + static void object_mover(void* pDst_void, void* pSrc_void, uint32_t num) + { + T* pSrc = static_cast<T*>(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast<T*>(pDst_void); + + while (pSrc != pSrc_end) + { + // placement new + new (static_cast<void*>(pDst)) T(*pSrc); + pSrc->~T(); + ++pSrc; + ++pDst; + } + } + + inline bool increase_capacity(uint32_t min_new_capacity, bool grow_hint, bool nofail = false) + { + return reinterpret_cast<elemental_vector*>(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector<T>::cFlag)) ? NULL : object_mover, nofail); + } + }; + + template<typename T> struct bitwise_movable< vector<T> > { enum { cFlag = true }; }; + + // Hash map + + template <typename T> + struct hasher + { + inline size_t operator() (const T& key) const { return static_cast<size_t>(key); } + }; + + template <typename T> + struct equal_to + { + inline bool operator()(const T& a, const T& b) const { return a == b; } + }; + + // Important: The Hasher and Equals objects must be bitwise movable! + template<typename Key, typename Value = empty_type, typename Hasher = hasher<Key>, typename Equals = equal_to<Key> > + class hash_map + { + public: + class iterator; + class const_iterator; + + private: + friend class iterator; + friend class const_iterator; + + enum state + { + cStateInvalid = 0, + cStateValid = 1 + }; + + enum + { + cMinHashSize = 4U + }; + + public: + typedef hash_map<Key, Value, Hasher, Equals> hash_map_type; + typedef std::pair<Key, Value> value_type; + typedef Key key_type; + typedef Value referent_type; + typedef Hasher hasher_type; + typedef Equals equals_type; + + hash_map() : + m_hash_shift(32), m_num_valid(0), m_grow_threshold(0) + { + } + + hash_map(const hash_map& other) : + m_values(other.m_values), + m_hash_shift(other.m_hash_shift), + m_hasher(other.m_hasher), + m_equals(other.m_equals), + m_num_valid(other.m_num_valid), + m_grow_threshold(other.m_grow_threshold) + { + } + + hash_map& operator= (const hash_map& other) + { + if (this == &other) + return *this; + + clear(); + + m_values = other.m_values; + m_hash_shift = other.m_hash_shift; + m_num_valid = other.m_num_valid; + m_grow_threshold = other.m_grow_threshold; + m_hasher = other.m_hasher; + m_equals = other.m_equals; + + return *this; + } + + inline ~hash_map() + { + clear(); + } + + const Equals& get_equals() const { return m_equals; } + Equals& get_equals() { return m_equals; } + + void set_equals(const Equals& equals) { m_equals = equals; } + + const Hasher& get_hasher() const { return m_hasher; } + Hasher& get_hasher() { return m_hasher; } + + void set_hasher(const Hasher& hasher) { m_hasher = hasher; } + + inline void clear() + { + if (!m_values.empty()) + { + if (BASISU_HAS_DESTRUCTOR(Key) || BASISU_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint32_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + + m_values.clear_no_destruction(); + + m_hash_shift = 32; + m_num_valid = 0; + m_grow_threshold = 0; + } + } + + inline void reset() + { + if (!m_num_valid) + return; + + if (BASISU_HAS_DESTRUCTOR(Key) || BASISU_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint32_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + else if (sizeof(node) <= 32) + { + memset(&m_values[0], 0, m_values.size_in_bytes()); + } + else + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint32_t num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + + m_num_valid = 0; + } + + inline uint32_t size() + { + return m_num_valid; + } + + inline uint32_t get_table_size() + { + return m_values.size(); + } + + inline bool empty() + { + return !m_num_valid; + } + + inline void reserve(uint32_t new_capacity) + { + uint64_t new_hash_size = helpers::maximum(1U, new_capacity); + + new_hash_size = new_hash_size * 2ULL; + + if (!helpers::is_power_of_2(new_hash_size)) + new_hash_size = helpers::next_pow2(new_hash_size); + + new_hash_size = helpers::maximum<uint64_t>(cMinHashSize, new_hash_size); + + new_hash_size = helpers::minimum<uint64_t>(0x80000000UL, new_hash_size); + + if (new_hash_size > m_values.size()) + rehash((uint32_t)new_hash_size); + } + + class iterator + { + friend class hash_map<Key, Value, Hasher, Equals>; + friend class hash_map<Key, Value, Hasher, Equals>::const_iterator; + + public: + inline iterator() : m_pTable(NULL), m_index(0) { } + inline iterator(hash_map_type& table, uint32_t index) : m_pTable(&table), m_index(index) { } + inline iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + + inline iterator& operator= (const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline iterator operator++(int) + { + iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline iterator& operator++() + { + probe(); + return *this; + } + + inline value_type& operator*() const { return *get_cur(); } + inline value_type* operator->() const { return get_cur(); } + + inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const iterator& b) const { return !(*this == b); } + inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const const_iterator& b) const { return !(*this == b); } + + private: + hash_map_type* m_pTable; + uint32_t m_index; + + inline value_type* get_cur() const + { + assert(m_pTable && (m_index < m_pTable->m_values.size())); + assert(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + assert(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + class const_iterator + { + friend class hash_map<Key, Value, Hasher, Equals>; + friend class hash_map<Key, Value, Hasher, Equals>::iterator; + + public: + inline const_iterator() : m_pTable(NULL), m_index(0) { } + inline const_iterator(const hash_map_type& table, uint32_t index) : m_pTable(&table), m_index(index) { } + inline const_iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + inline const_iterator(const const_iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + + inline const_iterator& operator= (const const_iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + inline const_iterator& operator= (const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline const_iterator operator++(int) + { + const_iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline const_iterator& operator++() + { + probe(); + return *this; + } + + inline const value_type& operator*() const { return *get_cur(); } + inline const value_type* operator->() const { return get_cur(); } + + inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const const_iterator& b) const { return !(*this == b); } + inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const iterator& b) const { return !(*this == b); } + + private: + const hash_map_type* m_pTable; + uint32_t m_index; + + inline const value_type* get_cur() const + { + assert(m_pTable && (m_index < m_pTable->m_values.size())); + assert(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + assert(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + inline const_iterator begin() const + { + if (!m_num_valid) + return end(); + + return const_iterator(*this, find_next(UINT32_MAX)); + } + + inline const_iterator end() const + { + return const_iterator(*this, m_values.size()); + } + + inline iterator begin() + { + if (!m_num_valid) + return end(); + + return iterator(*this, find_next(UINT32_MAX)); + } + + inline iterator end() + { + return iterator(*this, m_values.size()); + } + + // insert_result.first will always point to inserted key/value (or the already existing key/value). + // insert_resutt.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value). + typedef std::pair<iterator, bool> insert_result; + + inline insert_result insert(const Key& k, const Value& v = Value()) + { + insert_result result; + if (!insert_no_grow(result, k, v)) + { + grow(); + + // This must succeed. + if (!insert_no_grow(result, k, v)) + { + fprintf(stderr, "insert() failed"); + abort(); + } + } + + return result; + } + + inline insert_result insert(const value_type& v) + { + return insert(v.first, v.second); + } + + inline const_iterator find(const Key& k) const + { + return const_iterator(*this, find_index(k)); + } + + inline iterator find(const Key& k) + { + return iterator(*this, find_index(k)); + } + + inline bool erase(const Key& k) + { + uint32_t i = find_index(k); + + if (i >= m_values.size()) + return false; + + node* pDst = &get_node(i); + destruct_value_type(pDst); + pDst->state = cStateInvalid; + + m_num_valid--; + + for (; ; ) + { + uint32_t r, j = i; + + node* pSrc = pDst; + + do + { + if (!i) + { + i = m_values.size() - 1; + pSrc = &get_node(i); + } + else + { + i--; + pSrc--; + } + + if (!pSrc->state) + return true; + + r = hash_key(pSrc->first); + + } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); + + move_node(pDst, pSrc); + + pDst = pSrc; + } + } + + inline void swap(hash_map_type& other) + { + m_values.swap(other.m_values); + std::swap(m_hash_shift, other.m_hash_shift); + std::swap(m_num_valid, other.m_num_valid); + std::swap(m_grow_threshold, other.m_grow_threshold); + std::swap(m_hasher, other.m_hasher); + std::swap(m_equals, other.m_equals); + } + + private: + struct node : public value_type + { + uint8_t state; + }; + + static inline void construct_value_type(value_type* pDst, const Key& k, const Value& v) + { + if (BASISU_IS_BITWISE_COPYABLE(Key)) + memcpy(&pDst->first, &k, sizeof(Key)); + else + scalar_type<Key>::construct(&pDst->first, k); + + if (BASISU_IS_BITWISE_COPYABLE(Value)) + memcpy(&pDst->second, &v, sizeof(Value)); + else + scalar_type<Value>::construct(&pDst->second, v); + } + + static inline void construct_value_type(value_type* pDst, const value_type* pSrc) + { + if ((BASISU_IS_BITWISE_COPYABLE(Key)) && (BASISU_IS_BITWISE_COPYABLE(Value))) + { + memcpy(pDst, pSrc, sizeof(value_type)); + } + else + { + if (BASISU_IS_BITWISE_COPYABLE(Key)) + memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + else + scalar_type<Key>::construct(&pDst->first, pSrc->first); + + if (BASISU_IS_BITWISE_COPYABLE(Value)) + memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + else + scalar_type<Value>::construct(&pDst->second, pSrc->second); + } + } + + static inline void destruct_value_type(value_type* p) + { + scalar_type<Key>::destruct(&p->first); + scalar_type<Value>::destruct(&p->second); + } + + // Moves *pSrc to *pDst efficiently. + // pDst should NOT be constructed on entry. + static inline void move_node(node* pDst, node* pSrc, bool update_src_state = true) + { + assert(!pDst->state); + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) + { + memcpy(pDst, pSrc, sizeof(node)); + } + else + { + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Key)) + memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + else + { + scalar_type<Key>::construct(&pDst->first, pSrc->first); + scalar_type<Key>::destruct(&pSrc->first); + } + + if (BASISU_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) + memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + else + { + scalar_type<Value>::construct(&pDst->second, pSrc->second); + scalar_type<Value>::destruct(&pSrc->second); + } + + pDst->state = cStateValid; + } + + if (update_src_state) + pSrc->state = cStateInvalid; + } + + struct raw_node + { + inline raw_node() + { + node* p = reinterpret_cast<node*>(this); + p->state = cStateInvalid; + } + + inline ~raw_node() + { + node* p = reinterpret_cast<node*>(this); + if (p->state) + hash_map_type::destruct_value_type(p); + } + + inline raw_node(const raw_node& other) + { + node* pDst = reinterpret_cast<node*>(this); + const node* pSrc = reinterpret_cast<const node*>(&other); + + if (pSrc->state) + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + else + pDst->state = cStateInvalid; + } + + inline raw_node& operator= (const raw_node& rhs) + { + if (this == &rhs) + return *this; + + node* pDst = reinterpret_cast<node*>(this); + const node* pSrc = reinterpret_cast<const node*>(&rhs); + + if (pSrc->state) + { + if (pDst->state) + { + pDst->first = pSrc->first; + pDst->second = pSrc->second; + } + else + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + } + else if (pDst->state) + { + hash_map_type::destruct_value_type(pDst); + pDst->state = cStateInvalid; + } + + return *this; + } + + uint8_t m_bits[sizeof(node)]; + }; + + typedef basisu::vector<raw_node> node_vector; + + node_vector m_values; + uint32_t m_hash_shift; + + Hasher m_hasher; + Equals m_equals; + + uint32_t m_num_valid; + + uint32_t m_grow_threshold; + + inline uint32_t hash_key(const Key& k) const + { + assert((1U << (32U - m_hash_shift)) == m_values.size()); + + uint32_t hash = static_cast<uint32_t>(m_hasher(k)); + + // Fibonacci hashing + hash = (2654435769U * hash) >> m_hash_shift; + + assert(hash < m_values.size()); + return hash; + } + + inline const node& get_node(uint32_t index) const + { + return *reinterpret_cast<const node*>(&m_values[index]); + } + + inline node& get_node(uint32_t index) + { + return *reinterpret_cast<node*>(&m_values[index]); + } + + inline state get_node_state(uint32_t index) const + { + return static_cast<state>(get_node(index).state); + } + + inline void set_node_state(uint32_t index, bool valid) + { + get_node(index).state = valid; + } + + inline void grow() + { + uint64_t n = m_values.size() * 3ULL; // was * 2 + + if (!helpers::is_power_of_2(n)) + n = helpers::next_pow2(n); + + if (n > 0x80000000UL) + n = 0x80000000UL; + + rehash(helpers::maximum<uint32_t>(cMinHashSize, (uint32_t)n)); + } + + inline void rehash(uint32_t new_hash_size) + { + assert(new_hash_size >= m_num_valid); + assert(helpers::is_power_of_2(new_hash_size)); + + if ((new_hash_size < m_num_valid) || (new_hash_size == m_values.size())) + return; + + hash_map new_map; + new_map.m_values.resize(new_hash_size); + new_map.m_hash_shift = 32U - helpers::floor_log2i(new_hash_size); + assert(new_hash_size == (1U << (32U - new_map.m_hash_shift))); + new_map.m_grow_threshold = UINT_MAX; + + node* pNode = reinterpret_cast<node*>(m_values.begin()); + node* pNode_end = pNode + m_values.size(); + + while (pNode != pNode_end) + { + if (pNode->state) + { + new_map.move_into(pNode); + + if (new_map.m_num_valid == m_num_valid) + break; + } + + pNode++; + } + + new_map.m_grow_threshold = (new_hash_size + 1U) >> 1U; + + m_values.clear_no_destruction(); + m_hash_shift = 32; + + swap(new_map); + } + + inline uint32_t find_next(uint32_t index) const + { + index++; + + if (index >= m_values.size()) + return index; + + const node* pNode = &get_node(index); + + for (; ; ) + { + if (pNode->state) + break; + + if (++index >= m_values.size()) + break; + + pNode++; + } + + return index; + } + + inline uint32_t find_index(const Key& k) const + { + if (m_num_valid) + { + uint32_t index = hash_key(k); + const node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + return index; + + const uint32_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (index == orig_index) + break; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + return index; + } + } + } + + return m_values.size(); + } + + inline bool insert_no_grow(insert_result& result, const Key& k, const Value& v = Value()) + { + if (!m_values.size()) + return false; + + uint32_t index = hash_key(k); + node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + + const uint32_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (orig_index == index) + return false; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + } + } + + if (m_num_valid >= m_grow_threshold) + return false; + + construct_value_type(pNode, k, v); + + pNode->state = cStateValid; + + m_num_valid++; + assert(m_num_valid <= m_values.size()); + + result.first = iterator(*this, index); + result.second = true; + + return true; + } + + inline void move_into(node* pNode) + { + uint32_t index = hash_key(pNode->first); + node* pDst_node = &get_node(index); + + if (pDst_node->state) + { + const uint32_t orig_index = index; + + for (; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pDst_node = &get_node(index); + } + else + { + index--; + pDst_node--; + } + + if (index == orig_index) + { + assert(false); + return; + } + + if (!pDst_node->state) + break; + } + } + + move_node(pDst_node, pNode, false); + + m_num_valid++; + } + }; + + template<typename Key, typename Value, typename Hasher, typename Equals> + struct bitwise_movable< hash_map<Key, Value, Hasher, Equals> > { enum { cFlag = true }; }; + +#if BASISU_HASHMAP_TEST + extern void hash_map_test(); +#endif + +} // namespace basisu + +namespace std +{ + template<typename T> + inline void swap(basisu::vector<T>& a, basisu::vector<T>& b) + { + a.swap(b); + } + + template<typename Key, typename Value, typename Hasher, typename Equals> + inline void swap(basisu::hash_map<Key, Value, Hasher, Equals>& a, basisu::hash_map<Key, Value, Hasher, Equals>& b) + { + a.swap(b); + } + +} // namespace std diff --git a/thirdparty/basis_universal/transcoder/basisu_containers_impl.h b/thirdparty/basis_universal/transcoder/basisu_containers_impl.h new file mode 100644 index 0000000000..6555171419 --- /dev/null +++ b/thirdparty/basis_universal/transcoder/basisu_containers_impl.h @@ -0,0 +1,311 @@ +// basisu_containers_impl.h +// Do not include directly + +#ifdef _MSC_VER +#pragma warning (disable:4127) // warning C4127: conditional expression is constant +#endif + +namespace basisu +{ + bool elemental_vector::increase_capacity(uint32_t min_new_capacity, bool grow_hint, uint32_t element_size, object_mover pMover, bool nofail) + { + assert(m_size <= m_capacity); + + if (sizeof(void *) == sizeof(uint64_t)) + assert(min_new_capacity < (0x400000000ULL / element_size)); + else + assert(min_new_capacity < (0x7FFF0000U / element_size)); + + if (m_capacity >= min_new_capacity) + return true; + + size_t new_capacity = min_new_capacity; + if ((grow_hint) && (!helpers::is_power_of_2((uint64_t)new_capacity))) + { + new_capacity = (size_t)helpers::next_pow2((uint64_t)new_capacity); + + assert(new_capacity && (new_capacity > m_capacity)); + + if (new_capacity < min_new_capacity) + { + if (nofail) + return false; + fprintf(stderr, "vector too large\n"); + abort(); + } + } + + const size_t desired_size = element_size * new_capacity; + size_t actual_size = 0; + if (!pMover) + { + void* new_p = realloc(m_p, desired_size); + if (!new_p) + { + if (nofail) + return false; + + char buf[256]; +#ifdef _MSC_VER + sprintf_s(buf, sizeof(buf), "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size); +#else + sprintf(buf, "vector: realloc() failed allocating %u bytes", (uint32_t)desired_size); +#endif + fprintf(stderr, "%s", buf); + abort(); + } + +#ifdef _MSC_VER + actual_size = _msize(new_p); +#elif HAS_MALLOC_USABLE_SIZE + actual_size = malloc_usable_size(new_p); +#else + actual_size = desired_size; +#endif + m_p = new_p; + } + else + { + void* new_p = malloc(desired_size); + if (!new_p) + { + if (nofail) + return false; + + char buf[256]; +#ifdef _MSC_VER + sprintf_s(buf, sizeof(buf), "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size); +#else + sprintf(buf, "vector: malloc() failed allocating %u bytes", (uint32_t)desired_size); +#endif + fprintf(stderr, "%s", buf); + abort(); + } + +#ifdef _MSC_VER + actual_size = _msize(new_p); +#elif HAS_MALLOC_USABLE_SIZE + actual_size = malloc_usable_size(new_p); +#else + actual_size = desired_size; +#endif + + (*pMover)(new_p, m_p, m_size); + + if (m_p) + free(m_p); + + m_p = new_p; + } + + if (actual_size > desired_size) + m_capacity = static_cast<uint32_t>(actual_size / element_size); + else + m_capacity = static_cast<uint32_t>(new_capacity); + + return true; + } + +#if BASISU_HASHMAP_TEST + +#define HASHMAP_TEST_VERIFY(c) do { if (!(c)) handle_hashmap_test_verify_failure(__LINE__); } while(0) + + static void handle_hashmap_test_verify_failure(int line) + { + fprintf(stderr, "HASHMAP_TEST_VERIFY() faild on line %i\n", line); + abort(); + } + + class counted_obj + { + public: + counted_obj(uint32_t v = 0) : + m_val(v) + { + m_count++; + } + + counted_obj(const counted_obj& obj) : + m_val(obj.m_val) + { + m_count++; + } + + ~counted_obj() + { + assert(m_count > 0); + m_count--; + } + + static uint32_t m_count; + + uint32_t m_val; + + operator size_t() const { return m_val; } + + bool operator== (const counted_obj& rhs) const { return m_val == rhs.m_val; } + bool operator== (const uint32_t rhs) const { return m_val == rhs; } + + }; + + uint32_t counted_obj::m_count; + + static uint32_t urand32() + { + uint32_t a = rand(); + uint32_t b = rand() << 15; + uint32_t c = rand() << (32 - 15); + return a ^ b ^ c; + } + + static int irand32(int l, int h) + { + assert(l < h); + if (l >= h) + return l; + + uint32_t range = static_cast<uint32_t>(h - l); + + uint32_t rnd = urand32(); + + uint32_t rnd_range = static_cast<uint32_t>((((uint64_t)range) * ((uint64_t)rnd)) >> 32U); + + int result = l + rnd_range; + assert((result >= l) && (result < h)); + return result; + } + + void hash_map_test() + { + { + basisu::hash_map<uint64_t, uint64_t> k; + basisu::hash_map<uint64_t, uint64_t> l; + std::swap(k, l); + + k.begin(); + k.end(); + k.clear(); + k.empty(); + k.erase(0); + k.insert(0, 1); + k.find(0); + k.get_equals(); + k.get_hasher(); + k.get_table_size(); + k.reset(); + k.reserve(1); + k = l; + k.set_equals(l.get_equals()); + k.set_hasher(l.get_hasher()); + k.get_table_size(); + } + + uint32_t seed = 0; + for (; ; ) + { + seed++; + + typedef basisu::hash_map<counted_obj, counted_obj> my_hash_map; + my_hash_map m; + + const uint32_t n = irand32(0, 100000); + + printf("%u\n", n); + + srand(seed); // r1.seed(seed); + + basisu::vector<int> q; + + uint32_t count = 0; + for (uint32_t i = 0; i < n; i++) + { + uint32_t v = urand32() & 0x7FFFFFFF; + my_hash_map::insert_result res = m.insert(counted_obj(v), counted_obj(v ^ 0xdeadbeef)); + if (res.second) + { + count++; + q.push_back(v); + } + } + + HASHMAP_TEST_VERIFY(m.size() == count); + + srand(seed); + + my_hash_map cm(m); + m.clear(); + m = cm; + cm.reset(); + + for (uint32_t i = 0; i < n; i++) + { + uint32_t v = urand32() & 0x7FFFFFFF; + my_hash_map::const_iterator it = m.find(counted_obj(v)); + HASHMAP_TEST_VERIFY(it != m.end()); + HASHMAP_TEST_VERIFY(it->first == v); + HASHMAP_TEST_VERIFY(it->second == (v ^ 0xdeadbeef)); + } + + for (uint32_t t = 0; t < 2; t++) + { + const uint32_t nd = irand32(1, q.size() + 1); + for (uint32_t i = 0; i < nd; i++) + { + uint32_t p = irand32(0, q.size()); + + int k = q[p]; + if (k >= 0) + { + q[p] = -k - 1; + + bool s = m.erase(counted_obj(k)); + HASHMAP_TEST_VERIFY(s); + } + } + + typedef basisu::hash_map<uint32_t, empty_type> uint_hash_set; + uint_hash_set s; + + for (uint32_t i = 0; i < q.size(); i++) + { + int v = q[i]; + + if (v >= 0) + { + my_hash_map::const_iterator it = m.find(counted_obj(v)); + HASHMAP_TEST_VERIFY(it != m.end()); + HASHMAP_TEST_VERIFY(it->first == (uint32_t)v); + HASHMAP_TEST_VERIFY(it->second == ((uint32_t)v ^ 0xdeadbeef)); + + s.insert(v); + } + else + { + my_hash_map::const_iterator it = m.find(counted_obj(-v - 1)); + HASHMAP_TEST_VERIFY(it == m.end()); + } + } + + uint32_t found_count = 0; + for (my_hash_map::const_iterator it = m.begin(); it != m.end(); ++it) + { + HASHMAP_TEST_VERIFY(it->second == ((uint32_t)it->first ^ 0xdeadbeef)); + + uint_hash_set::const_iterator fit(s.find((uint32_t)it->first)); + HASHMAP_TEST_VERIFY(fit != s.end()); + + HASHMAP_TEST_VERIFY(fit->first == it->first); + + found_count++; + } + + HASHMAP_TEST_VERIFY(found_count == s.size()); + } + + HASHMAP_TEST_VERIFY(counted_obj::m_count == m.size() * 2); + } + } + +#endif // BASISU_HASHMAP_TEST + +} // namespace basisu diff --git a/thirdparty/basis_universal/transcoder/basisu_file_headers.h b/thirdparty/basis_universal/transcoder/basisu_file_headers.h index c90b3f3af0..4316d738e6 100644 --- a/thirdparty/basis_universal/transcoder/basisu_file_headers.h +++ b/thirdparty/basis_universal/transcoder/basisu_file_headers.h @@ -1,5 +1,5 @@ // basis_file_headers.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,11 @@ namespace basist // Slice desc header flags enum basis_slice_desc_flags { - cSliceDescFlagsIsAlphaData = 1, - cSliceDescFlagsFrameIsIFrame = 2 // Video only: Frame doesn't refer to previous frame (no usage of conditional replenishment pred symbols) + cSliceDescFlagsHasAlpha = 1, + + // Video only: Frame doesn't refer to previous frame (no usage of conditional replenishment pred symbols) + // Currently the first frame is always an I-Frame, all subsequent frames are P-Frames. This will eventually be changed to periodic I-Frames. + cSliceDescFlagsFrameIsIFrame = 2 }; #pragma pack(push) @@ -38,7 +41,7 @@ namespace basist basisu::packed_uint<2> m_num_blocks_x; // The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2. basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. - basisu::packed_uint<4> m_file_ofs; // Offset from the header to the start of the slice's data + basisu::packed_uint<4> m_file_ofs; // Offset from the start of the file to the start of the slice's data basisu::packed_uint<4> m_file_size; // The size of the compressed slice data in bytes basisu::packed_uint<2> m_slice_data_crc16; // The CRC16 of the compressed slice data, for extra-paranoid use cases @@ -47,9 +50,21 @@ namespace basist // File header files enum basis_header_flags { - cBASISHeaderFlagETC1S = 1, // Always set for basis universal files - cBASISHeaderFlagYFlipped = 2, // Set if the texture had to be Y flipped before encoding - cBASISHeaderFlagHasAlphaSlices = 4 // True if the odd slices contain alpha data + // Always set for ETC1S files. Not set for UASTC files. + cBASISHeaderFlagETC1S = 1, + + // Set if the texture had to be Y flipped before encoding. The actual interpretation of this (is Y up or down?) is up to the user. + cBASISHeaderFlagYFlipped = 2, + + // Set if any slices contain alpha (for ETC1S, if the odd slices contain alpha data) + cBASISHeaderFlagHasAlphaSlices = 4, + + // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. + cBASISHeaderFlagUsesGlobalCodebook = 8, + + // Set if the texture data is sRGB, otherwise it's linear. + // In reality, we have no idea if the texture data is actually linear or sRGB. This is the m_perceptual parameter passed to the compressor. + cBASISHeaderFlagSRGB = 16, }; // The image type field attempts to describe how to interpret the image data in a Basis file. @@ -71,6 +86,12 @@ namespace basist cBASISMaxUSPerFrame = 0xFFFFFF }; + enum class basis_tex_format + { + cETC1S = 0, + cUASTC4x4 = 1 + }; + struct basis_file_header { enum @@ -82,16 +103,16 @@ namespace basist basisu::packed_uint<2> m_sig; // 2 byte file signature basisu::packed_uint<2> m_ver; // Baseline file version basisu::packed_uint<2> m_header_size; // Header size in bytes, sizeof(basis_file_header) - basisu::packed_uint<2> m_header_crc16; // crc16 of the remaining header data + basisu::packed_uint<2> m_header_crc16; // CRC16 of the remaining header data basisu::packed_uint<4> m_data_size; // The total size of all data after the header basisu::packed_uint<2> m_data_crc16; // The CRC16 of all data after the header - basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha basis files) + basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha .basis files) basisu::packed_uint<3> m_total_images; // The total # of images - basisu::packed_uint<1> m_format; // enum basist::block_format + basisu::packed_uint<1> m_tex_format; // enum basis_tex_format basisu::packed_uint<2> m_flags; // enum basist::header_flags basisu::packed_uint<1> m_tex_type; // enum basist::basis_texture_type basisu::packed_uint<3> m_us_per_frame; // Framerate of video, in microseconds per frame @@ -101,11 +122,11 @@ namespace basist basisu::packed_uint<4> m_userdata1; // For client use basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook - basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the header + basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the start of the file basisu::packed_uint<3> m_endpoint_cb_file_size; // The compressed endpoint codebook's size in bytes basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook - basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the header + basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the start of the file basisu::packed_uint<3> m_selector_cb_file_size; // The compressed selector codebook's size in bytes basisu::packed_uint<4> m_tables_file_ofs; // The file offset of the compressed Huffman codelength tables, for decompressing slices diff --git a/thirdparty/basis_universal/transcoder/basisu_global_selector_cb.h b/thirdparty/basis_universal/transcoder/basisu_global_selector_cb.h index 695b0b3b97..8ab5098898 100644 --- a/thirdparty/basis_universal/transcoder/basisu_global_selector_cb.h +++ b/thirdparty/basis_universal/transcoder/basisu_global_selector_cb.h @@ -1,4 +1,4 @@ -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2020 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/thirdparty/basis_universal/transcoder/basisu_global_selector_palette.h b/thirdparty/basis_universal/transcoder/basisu_global_selector_palette.h index b0260541c3..8bedf94710 100644 --- a/thirdparty/basis_universal/transcoder/basisu_global_selector_palette.h +++ b/thirdparty/basis_universal/transcoder/basisu_global_selector_palette.h @@ -1,5 +1,7 @@ // basisu_global_selector_palette.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. +// +// TODO: NONE of this is used in .basis/.ktx2 files. It will be deleted soon. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -609,7 +611,7 @@ namespace basist uint8_t m_selectors[16]; }; - typedef std::vector<etc1_selector_palette_entry> etc1_selector_palette_entry_vec; + typedef basisu::vector<etc1_selector_palette_entry> etc1_selector_palette_entry_vec; extern const uint32_t g_global_selector_cb[]; extern const uint32_t g_global_selector_cb_size; @@ -628,7 +630,7 @@ namespace basist void set(uint32_t palette_index, const etc1_global_palette_entry_modifier &modifier) { m_palette_index = palette_index; m_modifier = modifier; } }; - typedef std::vector<etc1_global_selector_codebook_entry_id> etc1_global_selector_codebook_entry_id_vec; + typedef basisu::vector<etc1_global_selector_codebook_entry_id> etc1_global_selector_codebook_entry_id_vec; class etc1_global_selector_codebook { diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp index d15b6013d9..29eb3c0d55 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp @@ -1,5 +1,5 @@ // basisu_transcoder.cpp -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,65 +15,88 @@ #include "basisu_transcoder.h" #include <limits.h> -#include <vector> +#include "basisu_containers_impl.h" + +#ifndef BASISD_IS_BIG_ENDIAN +// TODO: This doesn't work on OSX. How can this be so difficult? +//#if defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) || defined(BIG_ENDIAN) +// #define BASISD_IS_BIG_ENDIAN (1) +//#else + #define BASISD_IS_BIG_ENDIAN (0) +//#endif +#endif + +#ifndef BASISD_USE_UNALIGNED_WORD_READS + #ifdef __EMSCRIPTEN__ + // Can't use unaligned loads/stores with WebAssembly. + #define BASISD_USE_UNALIGNED_WORD_READS (0) + #elif defined(_M_AMD64) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__) + #define BASISD_USE_UNALIGNED_WORD_READS (1) + #else + #define BASISD_USE_UNALIGNED_WORD_READS (0) + #endif +#endif -// The supported .basis file header version. Keep in sync with BASIS_FILE_VERSION. #define BASISD_SUPPORTED_BASIS_VERSION (0x13) +#ifndef BASISD_SUPPORT_KTX2 + #error Must have defined BASISD_SUPPORT_KTX2 +#endif + +#ifndef BASISD_SUPPORT_KTX2_ZSTD +#error Must have defined BASISD_SUPPORT_KTX2_ZSTD +#endif + // Set to 1 for fuzz testing. This will disable all CRC16 checks on headers and compressed data. #ifndef BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS -#define BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS 0 + #define BASISU_NO_HEADER_OR_DATA_CRC16_CHECKS 0 #endif #ifndef BASISD_SUPPORT_DXT1 -#define BASISD_SUPPORT_DXT1 1 + #define BASISD_SUPPORT_DXT1 1 #endif #ifndef BASISD_SUPPORT_DXT5A -#define BASISD_SUPPORT_DXT5A 1 + #define BASISD_SUPPORT_DXT5A 1 #endif // Disable all BC7 transcoders if necessary (useful when cross compiling to Javascript) #if defined(BASISD_SUPPORT_BC7) && !BASISD_SUPPORT_BC7 - #ifndef BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - #define BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY 0 - #endif #ifndef BASISD_SUPPORT_BC7_MODE5 - #define BASISD_SUPPORT_BC7_MODE5 0 + #define BASISD_SUPPORT_BC7_MODE5 0 #endif #endif // !BASISD_SUPPORT_BC7 -// BC7 mode 6 opaque only is the highest quality (compared to ETC1), but the tables are massive. -// For web/mobile use you probably should disable this. -#ifndef BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY -#define BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY 1 -#endif - -// BC7 mode 5 supports both opaque and opaque+alpha textures, and uses substantially less memory than BC7 mode 6 and even BC1. +// BC7 mode 5 supports both opaque and opaque+alpha textures, and uses less memory BC1. #ifndef BASISD_SUPPORT_BC7_MODE5 -#define BASISD_SUPPORT_BC7_MODE5 1 + #define BASISD_SUPPORT_BC7_MODE5 1 #endif #ifndef BASISD_SUPPORT_PVRTC1 -#define BASISD_SUPPORT_PVRTC1 1 + #define BASISD_SUPPORT_PVRTC1 1 #endif #ifndef BASISD_SUPPORT_ETC2_EAC_A8 -#define BASISD_SUPPORT_ETC2_EAC_A8 1 + #define BASISD_SUPPORT_ETC2_EAC_A8 1 +#endif + +// Set BASISD_SUPPORT_UASTC to 0 to completely disable support for transcoding UASTC files. +#ifndef BASISD_SUPPORT_UASTC + #define BASISD_SUPPORT_UASTC 1 #endif #ifndef BASISD_SUPPORT_ASTC -#define BASISD_SUPPORT_ASTC 1 + #define BASISD_SUPPORT_ASTC 1 #endif // Note that if BASISD_SUPPORT_ATC is enabled, BASISD_SUPPORT_DXT5A should also be enabled for alpha support. #ifndef BASISD_SUPPORT_ATC -#define BASISD_SUPPORT_ATC 1 + #define BASISD_SUPPORT_ATC 1 #endif // Support for ETC2 EAC R11 and ETC2 EAC RG11 #ifndef BASISD_SUPPORT_ETC2_EAC_RG11 -#define BASISD_SUPPORT_ETC2_EAC_RG11 1 + #define BASISD_SUPPORT_ETC2_EAC_RG11 1 #endif // If BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY is 1, opaque blocks will be transcoded to ASTC at slightly higher quality (higher than BC1), but the transcoder tables will be 2x as large. @@ -89,26 +112,25 @@ #endif #ifndef BASISD_SUPPORT_FXT1 -#define BASISD_SUPPORT_FXT1 1 + #define BASISD_SUPPORT_FXT1 1 #endif #ifndef BASISD_SUPPORT_PVRTC2 -#define BASISD_SUPPORT_PVRTC2 1 + #define BASISD_SUPPORT_PVRTC2 1 #endif #if BASISD_SUPPORT_PVRTC2 -#if !BASISD_SUPPORT_ATC -#error BASISD_SUPPORT_ATC must be 1 if BASISD_SUPPORT_PVRTC2 is 1 -#endif + #if !BASISD_SUPPORT_ATC + #error BASISD_SUPPORT_ATC must be 1 if BASISD_SUPPORT_PVRTC2 is 1 + #endif #endif #if BASISD_SUPPORT_ATC -#if !BASISD_SUPPORT_DXT5A -#error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1 -#endif + #if !BASISD_SUPPORT_DXT5A + #error BASISD_SUPPORT_DXT5A must be 1 if BASISD_SUPPORT_ATC is 1 + #endif #endif -#define BASISD_WRITE_NEW_BC7_TABLES 0 #define BASISD_WRITE_NEW_BC7_MODE5_TABLES 0 #define BASISD_WRITE_NEW_DXT1_TABLES 0 #define BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES 0 @@ -117,7 +139,16 @@ #define BASISD_WRITE_NEW_ETC2_EAC_R11_TABLES 0 #ifndef BASISD_ENABLE_DEBUG_FLAGS -#define BASISD_ENABLE_DEBUG_FLAGS 0 + #define BASISD_ENABLE_DEBUG_FLAGS 0 +#endif + +// If KTX2 support is enabled, we may need Zstd for decompression of supercompressed UASTC files. Include this header. +#if BASISD_SUPPORT_KTX2 + // If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded. + #if BASISD_SUPPORT_KTX2_ZSTD + // We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError() + #include "../zstd/zstd.h" + #endif #endif namespace basisu @@ -131,7 +162,7 @@ namespace basisu void debug_printf(const char* pFmt, ...) { -#if BASISU_DEVEL_MESSAGES +#if BASISU_FORCE_DEVEL_MESSAGES g_debug_printf = true; #endif if (g_debug_printf) @@ -146,9 +177,6 @@ namespace basisu namespace basist { -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY -#include "basisu_transcoder_tables_bc7_m6.inc" -#endif #if BASISD_ENABLE_DEBUG_FLAGS static uint32_t g_debug_flags = 0; @@ -165,16 +193,28 @@ namespace basist void set_debug_flags(uint32_t f) { - (void)f; + BASISU_NOTE_UNUSED(f); #if BASISD_ENABLE_DEBUG_FLAGS g_debug_flags = f; #endif } + + inline uint16_t byteswap_uint16(uint16_t v) + { + return static_cast<uint16_t>((v >> 8) | (v << 8)); + } + + static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } + static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + static inline float saturate(float value) { return clampf(value, 0, 1.0f); } + + static inline uint8_t mul_8(uint32_t v, uint32_t q) { v = v * q + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + uint16_t crc16(const void* r, size_t size, uint16_t crc) { crc = ~crc; - const uint8_t* p = reinterpret_cast<const uint8_t*>(r); + const uint8_t* p = static_cast<const uint8_t*>(r); for (; size; --size) { const uint16_t q = *p++ ^ (crc >> 8); @@ -279,6 +319,9 @@ namespace basist DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables, 1); DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables16, 16); DECLARE_ETC1_INTEN_TABLE(g_etc1_inten_tables48, 3 * 16); + + //const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 }; @@ -522,11 +565,39 @@ namespace basist return static_cast<uint16_t>(b | (g << 5U) | (r << 10U)); } + inline uint16_t get_base4_color(uint32_t idx) const + { + uint32_t r, g, b; + if (idx) + { + r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); + } + else + { + r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); + } + return static_cast<uint16_t>(b | (g << 4U) | (r << 8U)); + } + inline color32 get_base5_color_unscaled() const { return color32(m_differential.m_red1, m_differential.m_green1, m_differential.m_blue1, 255); } + inline bool get_flip_bit() const + { + return (m_bytes[3] & 1) != 0; + } + + inline bool get_diff_bit() const + { + return (m_bytes[3] & 2) != 0; + } + inline uint32_t get_inten_table(uint32_t subblock_id) const { assert(subblock_id < 2); @@ -534,6 +605,38 @@ namespace basist return (m_bytes[3] >> ofs) & 7; } + inline uint16_t get_delta3_color() const + { + const uint32_t r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); + const uint32_t g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); + const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); + return static_cast<uint16_t>(b | (g << 3U) | (r << 6U)); + } + + void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const + { + color32 b; + + if (get_diff_bit()) + { + if (subblock_index) + unpack_color5(b, get_base5_color(), get_delta3_color(), true, 255); + else + unpack_color5(b, get_base5_color(), true); + } + else + { + b = unpack_color4(get_base4_color(subblock_index), true, 255); + } + + const int* pInten_table = g_etc1_inten_tables[get_inten_table(subblock_index)]; + + pBlock_colors[0].set_noclamp_rgba(clamp255(b.r + pInten_table[0]), clamp255(b.g + pInten_table[0]), clamp255(b.b + pInten_table[0]), 255); + pBlock_colors[1].set_noclamp_rgba(clamp255(b.r + pInten_table[1]), clamp255(b.g + pInten_table[1]), clamp255(b.b + pInten_table[1]), 255); + pBlock_colors[2].set_noclamp_rgba(clamp255(b.r + pInten_table[2]), clamp255(b.g + pInten_table[2]), clamp255(b.b + pInten_table[2]), 255); + pBlock_colors[3].set_noclamp_rgba(clamp255(b.r + pInten_table[3]), clamp255(b.g + pInten_table[3]), clamp255(b.b + pInten_table[3]), 255); + } + static uint16_t pack_color4(const color32& color, bool scaled, uint32_t bias = 127U) { return pack_color4(color.r, color.g, color.b, scaled, bias); @@ -592,7 +695,17 @@ namespace basist return static_cast<uint16_t>(b | (g << 3) | (r << 6)); } - static color32 unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha = 255) + static void unpack_delta3(int& r, int& g, int& b, uint16_t packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + } + + static color32 unpack_color5(uint16_t packed_color5, bool scaled, uint32_t alpha) { uint32_t b = packed_color5 & 31U; uint32_t g = (packed_color5 >> 5U) & 31U; @@ -605,7 +718,9 @@ namespace basist r = (r << 3U) | (r >> 2U); } - return color32(r, g, b, alpha); + assert(alpha <= 255); + + return color32(cNoClamp, r, g, b, alpha); } static void unpack_color5(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color5, bool scaled) @@ -615,6 +730,64 @@ namespace basist g = c.g; b = c.b; } + + static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled) + { + result = unpack_color5(packed_color5, scaled, 255); + } + + static bool unpack_color5(color32& result, uint16_t packed_color5, uint16_t packed_delta3, bool scaled, uint32_t alpha) + { + int dr, dg, db; + unpack_delta3(dr, dg, db, packed_delta3); + + int r = ((packed_color5 >> 10U) & 31U) + dr; + int g = ((packed_color5 >> 5U) & 31U) + dg; + int b = (packed_color5 & 31U) + db; + + bool success = true; + if (static_cast<uint32_t>(r | g | b) > 31U) + { + success = false; + r = basisu::clamp<int>(r, 0, 31); + g = basisu::clamp<int>(g, 0, 31); + b = basisu::clamp<int>(b, 0, 31); + } + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + result.set_noclamp_rgba(r, g, b, basisu::minimum(alpha, 255U)); + return success; + } + + static color32 unpack_color4(uint16_t packed_color4, bool scaled, uint32_t alpha) + { + uint32_t b = packed_color4 & 15U; + uint32_t g = (packed_color4 >> 4U) & 15U; + uint32_t r = (packed_color4 >> 8U) & 15U; + + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } + + return color32(cNoClamp, r, g, b, basisu::minimum(alpha, 255U)); + } + + static void unpack_color4(uint32_t& r, uint32_t& g, uint32_t& b, uint16_t packed_color4, bool scaled) + { + color32 c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } static void get_diff_subblock_colors(color32* pDst, uint16_t packed_color5, uint32_t table_idx) { @@ -823,197 +996,6 @@ namespace basist uint32_t m_high; }; -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - static dxt_selector_range g_etc1_to_bc7_selector_ranges[] = - { - { 0, 0 }, - { 1, 1 }, - { 2, 2 }, - { 3, 3 }, - - { 0, 3 }, - - { 1, 3 }, - { 0, 2 }, - - { 1, 2 }, - - { 2, 3 }, - { 0, 1 }, - }; - const uint32_t NUM_ETC1_TO_BC7_M6_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_selector_ranges) / sizeof(g_etc1_to_bc7_selector_ranges[0]); - - static uint32_t g_etc1_to_bc7_m6_selector_range_index[4][4]; - - static const uint8_t g_etc1_to_bc7_selector_mappings[][4] = - { -#if 1 - { 5 * 0, 5 * 0, 5 * 0, 5 * 0 }, - { 5 * 0, 5 * 0, 5 * 0, 5 * 1 }, - { 5 * 0, 5 * 0, 5 * 0, 5 * 2 }, - { 5 * 0, 5 * 0, 5 * 0, 5 * 3 }, - { 5 * 0, 5 * 0, 5 * 1, 5 * 1 }, - { 5 * 0, 5 * 0, 5 * 1, 5 * 2 }, - { 5 * 0, 5 * 0, 5 * 1, 5 * 3 }, - { 5 * 0, 5 * 0, 5 * 2, 5 * 2 }, - { 5 * 0, 5 * 0, 5 * 2, 5 * 3 }, - { 5 * 0, 5 * 0, 5 * 3, 5 * 3 }, - { 5 * 0, 5 * 1, 5 * 1, 5 * 1 }, - { 5 * 0, 5 * 1, 5 * 1, 5 * 2 }, - { 5 * 0, 5 * 1, 5 * 1, 5 * 3 }, - { 5 * 0, 5 * 1, 5 * 2, 5 * 2 }, - { 5 * 0, 5 * 1, 5 * 2, 5 * 3 }, - { 5 * 0, 5 * 1, 5 * 3, 5 * 3 }, - { 5 * 0, 5 * 2, 5 * 2, 5 * 2 }, - { 5 * 0, 5 * 2, 5 * 2, 5 * 3 }, - { 5 * 0, 5 * 2, 5 * 3, 5 * 3 }, - { 5 * 0, 5 * 3, 5 * 3, 5 * 3 }, - { 5 * 1, 5 * 1, 5 * 1, 5 * 1 }, - { 5 * 1, 5 * 1, 5 * 1, 5 * 2 }, - { 5 * 1, 5 * 1, 5 * 1, 5 * 3 }, - { 5 * 1, 5 * 1, 5 * 2, 5 * 2 }, - { 5 * 1, 5 * 1, 5 * 2, 5 * 3 }, - { 5 * 1, 5 * 1, 5 * 3, 5 * 3 }, - { 5 * 1, 5 * 2, 5 * 2, 5 * 2 }, - { 5 * 1, 5 * 2, 5 * 2, 5 * 3 }, - { 5 * 1, 5 * 2, 5 * 3, 5 * 3 }, - { 5 * 1, 5 * 3, 5 * 3, 5 * 3 }, - { 5 * 2, 5 * 2, 5 * 2, 5 * 2 }, - { 5 * 2, 5 * 2, 5 * 2, 5 * 3 }, - { 5 * 2, 5 * 2, 5 * 3, 5 * 3 }, - { 5 * 2, 5 * 3, 5 * 3, 5 * 3 }, - { 5 * 3, 5 * 3, 5 * 3, 5 * 3 }, - - { 0, 1, 2, 3 }, - { 0, 0, 1, 1 }, - { 0, 0, 0, 1 }, - { 0, 2, 4, 6 }, - { 0, 3, 6, 9 }, - { 0, 4, 8, 12 }, - - { 0, 4, 9, 15 }, - { 0, 6, 11, 15 }, - - { 1, 2, 3, 4 }, - { 1, 3, 5, 7 }, - - { 1, 8, 8, 14 }, -#else - { 5 * 0, 5 * 0, 5 * 1, 5 * 1 }, - { 5 * 0, 5 * 0, 5 * 1, 5 * 2 }, - { 5 * 0, 5 * 0, 5 * 1, 5 * 3 }, - { 5 * 0, 5 * 0, 5 * 2, 5 * 3 }, - { 5 * 0, 5 * 1, 5 * 1, 5 * 1 }, - { 5 * 0, 5 * 1, 5 * 2, 5 * 2 }, - { 5 * 0, 5 * 1, 5 * 2, 5 * 3 }, - { 5 * 0, 5 * 2, 5 * 3, 5 * 3 }, - { 5 * 1, 5 * 2, 5 * 2, 5 * 2 }, -#endif - { 5 * 1, 5 * 2, 5 * 3, 5 * 3 }, - { 8, 8, 8, 8 }, - }; - const uint32_t NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS = sizeof(g_etc1_to_bc7_selector_mappings) / sizeof(g_etc1_to_bc7_selector_mappings[0]); - - static uint8_t g_etc1_to_bc7_selector_mappings_inv[NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS][4]; - - // encoding from LSB to MSB: low8, high8, error16, size is [32*8][NUM_ETC1_TO_BC7_M6_SELECTOR_RANGES][NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS] - extern const uint32_t* g_etc1_to_bc7_m6_table[]; - - const uint16_t s_bptc_table_aWeight4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; - -#if BASISD_WRITE_NEW_BC7_TABLES - static void create_etc1_to_bc7_m6_conversion_table() - { - FILE* pFile = NULL; - - pFile = fopen("basisu_decoder_tables_bc7_m6.inc", "w"); - - for (int inten = 0; inten < 8; inten++) - { - for (uint32_t g = 0; g < 32; g++) - { - color32 block_colors[4]; - decoder_etc_block::get_diff_subblock_colors(block_colors, decoder_etc_block::pack_color5(color32(g, g, g, 255), false), inten); - - fprintf(pFile, "static const uint32_t g_etc1_to_bc7_m6_table%u[] = {\n", g + inten * 32); - uint32_t n = 0; - - for (uint32_t sr = 0; sr < NUM_ETC1_TO_BC7_M6_SELECTOR_RANGES; sr++) - { - const uint32_t low_selector = g_etc1_to_bc7_selector_ranges[sr].m_low; - const uint32_t high_selector = g_etc1_to_bc7_selector_ranges[sr].m_high; - - for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS; m++) - { - uint32_t best_lo = 0; - uint32_t best_hi = 0; - uint64_t best_err = UINT64_MAX; - - for (uint32_t hi = 0; hi <= 127; hi++) - { - for (uint32_t lo = 0; lo <= 127; lo++) - { - uint32_t bc7_block_colors[16]; - - bc7_block_colors[0] = lo << 1; - bc7_block_colors[15] = (hi << 1) | 1; - - for (uint32_t i = 1; i < 15; i++) - bc7_block_colors[i] = (bc7_block_colors[0] * (64 - s_bptc_table_aWeight4[i]) + bc7_block_colors[15] * s_bptc_table_aWeight4[i] + 32) >> 6; - - uint64_t total_err = 0; - - for (uint32_t s = low_selector; s <= high_selector; s++) - { - int err = (int)block_colors[s].g - (int)bc7_block_colors[g_etc1_to_bc7_selector_mappings[m][s]]; - - total_err += err * err; - } - - if (total_err < best_err) - { - best_err = total_err; - best_lo = lo; - best_hi = hi; - } - } // lo - - } // hi - - best_err = basisu::minimum<uint32_t>(best_err, 0xFFFF); - - const uint32_t index = (g + inten * 32) * (NUM_ETC1_TO_BC7_M6_SELECTOR_RANGES * NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS) + (sr * NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS) + m; - - uint32_t v = best_err | (best_lo << 18) | (best_hi << 25); - - fprintf(pFile, "0x%X,", v); - n++; - if ((n & 31) == 31) - fprintf(pFile, "\n"); - - } // m - } // sr - - fprintf(pFile, "};\n"); - - } // g - } // inten - - fprintf(pFile, "const uint32_t *g_etc1_to_bc7_m6_table[] = {\n"); - - for (uint32_t i = 0; i < 32 * 8; i++) - { - fprintf(pFile, "g_etc1_to_bc7_m6_table%u, ", i); - if ((i & 15) == 15) - fprintf(pFile, "\n"); - } - - fprintf(pFile, "};\n"); - fclose(pFile); - } -#endif -#endif - struct etc1_to_dxt1_56_solution { uint8_t m_lo; @@ -1064,7 +1046,9 @@ namespace basist static const etc1_to_dxt1_56_solution g_etc1_to_dxt_5[32 * 8 * NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS * NUM_ETC1_TO_DXT1_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_dxt1_5.inc" }; +#endif // BASISD_SUPPORT_DXT1 +#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC // First saw the idea for optimal BC1 single-color block encoding using lookup tables in ryg_dxt. struct bc1_match_entry { @@ -1089,14 +1073,15 @@ namespace basist if (sel == 1) { // Selector 1 - e = abs(((hi_e * 2 + lo_e) / 3) - i) + ((abs(hi_e - lo_e) >> 5)); + e = basisu::iabs(((hi_e * 2 + lo_e) / 3) - i); + e += (basisu::iabs(hi_e - lo_e) * 3) / 100; } else { assert(sel == 0); // Selector 0 - e = abs(hi_e - i); + e = basisu::iabs(hi_e - i); } if (e < lowest_e) @@ -1111,7 +1096,7 @@ namespace basist } // lo } } -#endif // BASISD_SUPPORT_DXT1 +#endif #if BASISD_WRITE_NEW_DXT1_TABLES static void create_etc1_to_dxt1_5_conversion_table() @@ -1268,7 +1253,8 @@ namespace basist } #endif -#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 + +#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 static const int8_t g_eac_modifier_table[16][8] = { { -3, -6, -9, -15, 2, 5, 8, 14 }, @@ -1344,6 +1330,9 @@ namespace basist } }; +#endif // #if BASISD_SUPPORT_UASTC BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 + +#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_ETC2_EAC_RG11 static const dxt_selector_range s_etc2_eac_selector_ranges[] = { { 0, 3 }, @@ -1372,8 +1361,8 @@ namespace basist uint32_t m_base; uint32_t m_table; uint32_t m_multiplier; - std::vector<uint8_t> m_selectors; - std::vector<uint8_t> m_selectors_temp; + basisu::vector<uint8_t> m_selectors; + basisu::vector<uint8_t> m_selectors_temp; }; static uint64_t pack_eac_a8_exhaustive(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels) @@ -1769,8 +1758,8 @@ namespace basist uint32_t m_base; uint32_t m_table; uint32_t m_multiplier; - std::vector<uint8_t> m_selectors; - std::vector<uint8_t> m_selectors_temp; + basisu::vector<uint8_t> m_selectors; + basisu::vector<uint8_t> m_selectors_temp; }; static uint64_t pack_eac_r11_exhaustive(pack_eac_r11_results& results, const uint8_t* pPixels, uint32_t num_pixels) @@ -1924,14 +1913,28 @@ namespace basist #if BASISD_SUPPORT_PVRTC2 static void transcoder_init_pvrtc2(); #endif + +#if BASISD_SUPPORT_UASTC + void uastc_init(); +#endif + + static bool g_transcoder_initialized; // Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz. // If this is too slow, these computed tables can easilky be moved to be compiled in. void basisu_transcoder_init() { - static bool s_initialized; - if (s_initialized) + if (g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); return; + } + + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); + +#if BASISD_SUPPORT_UASTC + uastc_init(); +#endif #if BASISD_SUPPORT_ASTC transcoder_init_astc(); @@ -1943,11 +1946,6 @@ namespace basist exit(0); #endif -#if BASISD_WRITE_NEW_BC7_TABLES - create_etc1_to_bc7_m6_conversion_table(); - exit(0); -#endif - #if BASISD_WRITE_NEW_BC7_MODE5_TABLES create_etc1_to_bc7_m5_color_conversion_table(); create_etc1_to_bc7_m5_alpha_conversion_table(); @@ -1975,7 +1973,7 @@ namespace basist exit(0); #endif -#if BASISD_SUPPORT_DXT1 +#if BASISD_SUPPORT_DXT1 || BASISD_SUPPORT_UASTC uint8_t bc1_expand5[32]; for (int i = 0; i < 32; i++) bc1_expand5[i] = static_cast<uint8_t>((i << 3) | (i >> 2)); @@ -1988,6 +1986,17 @@ namespace basist prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, 64, 1); prepare_bc1_single_color_table(g_bc1_match6_equals_0, bc1_expand6, 1, 64, 0); +#if 0 + for (uint32_t i = 0; i < 256; i++) + { + printf("%u %u %u\n", i, (i * 63 + 127) / 255, g_bc1_match6_equals_0[i].m_hi); + } + exit(0); +#endif + +#endif + +#if BASISD_SUPPORT_DXT1 for (uint32_t i = 0; i < NUM_ETC1_TO_DXT1_SELECTOR_RANGES; i++) { uint32_t l = g_etc1_to_dxt1_selector_ranges[i].m_low; @@ -2023,19 +2032,6 @@ namespace basist } #endif -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - for (uint32_t i = 0; i < NUM_ETC1_TO_BC7_M6_SELECTOR_RANGES; i++) - { - uint32_t l = g_etc1_to_bc7_selector_ranges[i].m_low; - uint32_t h = g_etc1_to_bc7_selector_ranges[i].m_high; - g_etc1_to_bc7_m6_selector_range_index[l][h] = i; - } - - for (uint32_t sm = 0; sm < NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS; sm++) - for (uint32_t j = 0; j < 4; j++) - g_etc1_to_bc7_selector_mappings_inv[sm][j] = 15 - g_etc1_to_bc7_selector_mappings[sm][j]; -#endif - #if BASISD_SUPPORT_BC7_MODE5 transcoder_init_bc7_mode5(); #endif @@ -2048,7 +2044,7 @@ namespace basist transcoder_init_pvrtc2(); #endif - s_initialized = true; + g_transcoder_initialized = true; } #if BASISD_SUPPORT_DXT1 @@ -2780,7 +2776,7 @@ namespace basist // PVRTC -#if BASISD_SUPPORT_PVRTC1 +#if BASISD_SUPPORT_PVRTC1 || BASISD_SUPPORT_UASTC static const uint16_t g_pvrtc_swizzle_table[256] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155, @@ -3304,6 +3300,7 @@ namespace basist } }; +#if 0 static const uint8_t g_pvrtc_bilinear_weights[16][4] = { { 4, 4, 4, 4 }, { 2, 6, 2, 6 }, { 8, 0, 8, 0 }, { 6, 2, 6, 2 }, @@ -3311,6 +3308,7 @@ namespace basist { 8, 8, 0, 0 }, { 4, 12, 0, 0 }, { 16, 0, 0, 0 }, { 12, 4, 0, 0 }, { 6, 6, 2, 2 }, { 3, 9, 1, 3 }, { 12, 0, 4, 0 }, { 9, 3, 3, 1 }, }; +#endif struct pvrtc1_temp_block { @@ -3402,7 +3400,9 @@ namespace basist color32 c(get_endpoint_8888(endpoints, endpoint_index)); return c.r + c.g + c.b + c.a; } +#endif +#if BASISD_SUPPORT_PVRTC1 // TODO: Support decoding a non-pow2 ETC1S texture into the next larger pow2 PVRTC texture. static void fixup_pvrtc1_4_modulation_rgb(const decoder_etc_block* pETC_Blocks, const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y) { @@ -3411,7 +3411,7 @@ namespace basist const uint32_t x_bits = basisu::total_bits(x_mask); const uint32_t y_bits = basisu::total_bits(y_mask); const uint32_t min_bits = basisu::minimum(x_bits, y_bits); - const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; uint32_t block_index = 0; @@ -3592,7 +3592,7 @@ namespace basist const uint32_t x_bits = basisu::total_bits(x_mask); const uint32_t y_bits = basisu::total_bits(y_mask); const uint32_t min_bits = basisu::minimum(x_bits, y_bits); - const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; uint32_t block_index = 0; @@ -3763,257 +3763,6 @@ namespace basist } #endif // BASISD_SUPPORT_PVRTC1 -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - struct bc7_mode_6 - { - struct - { - uint64_t m_mode : 7; - uint64_t m_r0 : 7; - uint64_t m_r1 : 7; - uint64_t m_g0 : 7; - uint64_t m_g1 : 7; - uint64_t m_b0 : 7; - uint64_t m_b1 : 7; - uint64_t m_a0 : 7; - uint64_t m_a1 : 7; - uint64_t m_p0 : 1; - } m_lo; - - union - { - struct - { - uint64_t m_p1 : 1; - uint64_t m_s00 : 3; - uint64_t m_s10 : 4; - uint64_t m_s20 : 4; - uint64_t m_s30 : 4; - - uint64_t m_s01 : 4; - uint64_t m_s11 : 4; - uint64_t m_s21 : 4; - uint64_t m_s31 : 4; - - uint64_t m_s02 : 4; - uint64_t m_s12 : 4; - uint64_t m_s22 : 4; - uint64_t m_s32 : 4; - - uint64_t m_s03 : 4; - uint64_t m_s13 : 4; - uint64_t m_s23 : 4; - uint64_t m_s33 : 4; - - } m_hi; - - uint64_t m_hi_bits; - }; - }; - - static void convert_etc1s_to_bc7_m6(bc7_mode_6* pDst_block, const endpoint *pEndpoint, const selector* pSelector) - { -#if !BASISD_WRITE_NEW_BC7_TABLES - const uint32_t low_selector = pSelector->m_lo_selector; - const uint32_t high_selector = pSelector->m_hi_selector; - - const uint32_t base_color_r = pEndpoint->m_color5.r; - const uint32_t base_color_g = pEndpoint->m_color5.g; - const uint32_t base_color_b = pEndpoint->m_color5.b; - const uint32_t inten_table = pEndpoint->m_inten5; - - if (pSelector->m_num_unique_selectors <= 2) - { - // Only two unique selectors so just switch to block truncation coding (BTC) to avoid quality issues on extreme blocks. - pDst_block->m_lo.m_mode = 64; - - pDst_block->m_lo.m_a0 = 127; - pDst_block->m_lo.m_a1 = 127; - - color32 block_colors[4]; - - decoder_etc_block::get_block_colors5(block_colors, color32(base_color_r, base_color_g, base_color_b, 255), inten_table); - - const uint32_t r0 = block_colors[low_selector].r; - const uint32_t g0 = block_colors[low_selector].g; - const uint32_t b0 = block_colors[low_selector].b; - const uint32_t low_bits0 = (r0 & 1) + (g0 & 1) + (b0 & 1); - uint32_t p0 = low_bits0 >= 2; - - const uint32_t r1 = block_colors[high_selector].r; - const uint32_t g1 = block_colors[high_selector].g; - const uint32_t b1 = block_colors[high_selector].b; - const uint32_t low_bits1 = (r1 & 1) + (g1 & 1) + (b1 & 1); - uint32_t p1 = low_bits1 >= 2; - - pDst_block->m_lo.m_r0 = r0 >> 1; - pDst_block->m_lo.m_g0 = g0 >> 1; - pDst_block->m_lo.m_b0 = b0 >> 1; - pDst_block->m_lo.m_p0 = p0; - - pDst_block->m_lo.m_r1 = r1 >> 1; - pDst_block->m_lo.m_g1 = g1 >> 1; - pDst_block->m_lo.m_b1 = b1 >> 1; - - uint32_t output_low_selector = 0; - uint32_t output_bit_offset = 1; - uint64_t output_hi_bits = p1; - - for (uint32_t y = 0; y < 4; y++) - { - for (uint32_t x = 0; x < 4; x++) - { - uint32_t s = pSelector->get_selector(x, y); - uint32_t os = (s == low_selector) ? output_low_selector : (15 ^ output_low_selector); - - uint32_t num_bits = 4; - - if ((x | y) == 0) - { - if (os & 8) - { - pDst_block->m_lo.m_r0 = r1 >> 1; - pDst_block->m_lo.m_g0 = g1 >> 1; - pDst_block->m_lo.m_b0 = b1 >> 1; - pDst_block->m_lo.m_p0 = p1; - - pDst_block->m_lo.m_r1 = r0 >> 1; - pDst_block->m_lo.m_g1 = g0 >> 1; - pDst_block->m_lo.m_b1 = b0 >> 1; - - output_hi_bits &= ~1ULL; - output_hi_bits |= p0; - std::swap(p0, p1); - - output_low_selector = 15; - os = 0; - } - - num_bits = 3; - } - - output_hi_bits |= (static_cast<uint64_t>(os) << output_bit_offset); - output_bit_offset += num_bits; - } - } - - pDst_block->m_hi_bits = output_hi_bits; - - assert(pDst_block->m_hi.m_p1 == p1); - - return; - } - - uint32_t selector_range_table = g_etc1_to_bc7_m6_selector_range_index[low_selector][high_selector]; - - const uint32_t* pTable_r = g_etc1_to_bc7_m6_table[base_color_r + inten_table * 32] + (selector_range_table * NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS); - const uint32_t* pTable_g = g_etc1_to_bc7_m6_table[base_color_g + inten_table * 32] + (selector_range_table * NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS); - const uint32_t* pTable_b = g_etc1_to_bc7_m6_table[base_color_b + inten_table * 32] + (selector_range_table * NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS); - -#if 1 - assert(NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS == 48); - - uint32_t best_err0 = UINT_MAX, best_err1 = UINT_MAX; - -#define DO_ITER2(idx) \ - { \ - uint32_t v0 = ((pTable_r[(idx)+0] + pTable_g[(idx)+0] + pTable_b[(idx)+0]) << 14) | ((idx) + 0); if (v0 < best_err0) best_err0 = v0; \ - uint32_t v1 = ((pTable_r[(idx)+1] + pTable_g[(idx)+1] + pTable_b[(idx)+1]) << 14) | ((idx) + 1); if (v1 < best_err1) best_err1 = v1; \ - } -#define DO_ITER4(idx) DO_ITER2(idx); DO_ITER2((idx) + 2); -#define DO_ITER8(idx) DO_ITER4(idx); DO_ITER4((idx) + 4); -#define DO_ITER16(idx) DO_ITER8(idx); DO_ITER8((idx) + 8); - - DO_ITER16(0); - DO_ITER16(16); - DO_ITER16(32); -#undef DO_ITER2 -#undef DO_ITER4 -#undef DO_ITER8 -#undef DO_ITER16 - - uint32_t best_err = basisu::minimum(best_err0, best_err1); - uint32_t best_mapping = best_err & 0xFF; - //best_err >>= 14; -#else - uint32_t best_err = UINT_MAX; - uint32_t best_mapping = 0; - assert((NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS % 2) == 0); - for (uint32_t m = 0; m < NUM_ETC1_TO_BC7_M6_SELECTOR_MAPPINGS; m += 2) - { -#define DO_ITER(idx) { uint32_t total_err = (pTable_r[idx] + pTable_g[idx] + pTable_b[idx]) & 0x3FFFF; if (total_err < best_err) { best_err = total_err; best_mapping = idx; } } - DO_ITER(m); - DO_ITER(m + 1); -#undef DO_ITER - } -#endif - - pDst_block->m_lo.m_mode = 64; - - pDst_block->m_lo.m_a0 = 127; - pDst_block->m_lo.m_a1 = 127; - - uint64_t v = 0; - const uint8_t* pSelectors_xlat; - - if (g_etc1_to_bc7_selector_mappings[best_mapping][pSelector->get_selector(0, 0)] & 8) - { - pDst_block->m_lo.m_r1 = (pTable_r[best_mapping] >> 18) & 0x7F; - pDst_block->m_lo.m_g1 = (pTable_g[best_mapping] >> 18) & 0x7F; - pDst_block->m_lo.m_b1 = (pTable_b[best_mapping] >> 18) & 0x7F; - - pDst_block->m_lo.m_r0 = (pTable_r[best_mapping] >> 25) & 0x7F; - pDst_block->m_lo.m_g0 = (pTable_g[best_mapping] >> 25) & 0x7F; - pDst_block->m_lo.m_b0 = (pTable_b[best_mapping] >> 25) & 0x7F; - - pDst_block->m_lo.m_p0 = 1; - pDst_block->m_hi.m_p1 = 0; - - v = 0; - pSelectors_xlat = &g_etc1_to_bc7_selector_mappings_inv[best_mapping][0]; - } - else - { - pDst_block->m_lo.m_r0 = (pTable_r[best_mapping] >> 18) & 0x7F; - pDst_block->m_lo.m_g0 = (pTable_g[best_mapping] >> 18) & 0x7F; - pDst_block->m_lo.m_b0 = (pTable_b[best_mapping] >> 18) & 0x7F; - - pDst_block->m_lo.m_r1 = (pTable_r[best_mapping] >> 25) & 0x7F; - pDst_block->m_lo.m_g1 = (pTable_g[best_mapping] >> 25) & 0x7F; - pDst_block->m_lo.m_b1 = (pTable_b[best_mapping] >> 25) & 0x7F; - - pDst_block->m_lo.m_p0 = 0; - pDst_block->m_hi.m_p1 = 1; - - v = 1; - pSelectors_xlat = &g_etc1_to_bc7_selector_mappings[best_mapping][0]; - } - - uint64_t v1 = 0, v2 = 0, v3 = 0; - -#define DO_X(x, s0, s1, s2, s3) { \ - v |= ((uint64_t)pSelectors_xlat[(pSelector->m_selectors[0] >> ((x) * 2)) & 3] << (s0)); \ - v1 |= ((uint64_t)pSelectors_xlat[(pSelector->m_selectors[1] >> ((x) * 2)) & 3] << (s1)); \ - v2 |= ((uint64_t)pSelectors_xlat[(pSelector->m_selectors[2] >> ((x) * 2)) & 3] << (s2)); \ - v3 |= ((uint64_t)pSelectors_xlat[(pSelector->m_selectors[3] >> ((x) * 2)) & 3] << (s3)); } - - // 1 4 8 12 - // 16 20 24 28 - // 32 36 40 44 - // 48 52 56 60 - - DO_X(0, 1, 16, 32, 48); - DO_X(1, 4, 20, 36, 52); - DO_X(2, 8, 24, 40, 56); - DO_X(3, 12, 28, 44, 60); -#undef DO_X - - pDst_block->m_hi_bits = v | v1 | v2 | v3; -#endif - - } -#endif // BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - #if BASISD_SUPPORT_BC7_MODE5 static dxt_selector_range g_etc1_to_bc7_m5_selector_ranges[] = { @@ -4085,7 +3834,7 @@ namespace basist assert(num_bits < 32); assert(val < (1ULL << num_bits)); - uint32_t mask = (1 << num_bits) - 1; + uint32_t mask = static_cast<uint32_t>((1ULL << num_bits) - 1); while (num_bits) { @@ -4425,9 +4174,11 @@ namespace basist { bc7_mode_5* pDst_block = static_cast<bc7_mode_5*>(pDst); + // First ensure the block is cleared to all 0's static_cast<uint64_t*>(pDst)[0] = 0; static_cast<uint64_t*>(pDst)[1] = 0; + // Set alpha to 255 pDst_block->m_lo.m_mode = 1 << 5; pDst_block->m_lo.m_a0 = 255; pDst_block->m_lo.m_a1_0 = 63; @@ -4690,7 +4441,11 @@ namespace basist set_block_bits((uint8_t*)pDst, output_bits, 31, 97); } #endif // BASISD_SUPPORT_BC7_MODE5 - + +#if BASISD_SUPPORT_ETC2_EAC_A8 || BASISD_SUPPORT_UASTC + static const uint8_t g_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 }; +#endif + #if BASISD_SUPPORT_ETC2_EAC_A8 static void convert_etc1s_to_etc2_eac_a8(eac_block* pDst_block, const endpoint* pEndpoints, const selector* pSelector) { @@ -4712,8 +4467,7 @@ namespace basist pDst_block->m_multiplier = 1; // selectors are all 4's - static const uint8_t s_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 }; - memcpy(pDst_block->m_selectors, s_etc2_eac_a8_sel4, sizeof(s_etc2_eac_a8_sel4)); + memcpy(pDst_block->m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); return; } @@ -5325,31 +5079,30 @@ namespace basist #endif -#if BASISD_SUPPORT_ASTC - struct astc_block_params - { - // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00) - uint8_t m_endpoints[10]; - uint8_t m_weights[32]; - }; - +#if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC // Table encodes 5 trits to 8 output bits. 3^5 entries. // Inverse of the trit bit manipulation process in https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding - static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39, - 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154, - 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202, - 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224, - 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159, + static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39, + 43, 51, 55, 59, 44, 45, 46, 64, 65, 66, 68, 69, 70, 72, 73, 74, 80, 81, 82, 84, 85, 86, 88, 89, 90, 67, 71, 75, 83, 87, 91, 76, 77, 78, 128, 129, 130, 132, 133, 134, 136, 137, 138, 144, 145, 146, 148, 149, 150, 152, 153, 154, + 131, 135, 139, 147, 151, 155, 140, 141, 142, 160, 161, 162, 164, 165, 166, 168, 169, 170, 176, 177, 178, 180, 181, 182, 184, 185, 186, 163, 167, 171, 179, 183, 187, 172, 173, 174, 192, 193, 194, 196, 197, 198, 200, 201, 202, + 208, 209, 210, 212, 213, 214, 216, 217, 218, 195, 199, 203, 211, 215, 219, 204, 205, 206, 96, 97, 98, 100, 101, 102, 104, 105, 106, 112, 113, 114, 116, 117, 118, 120, 121, 122, 99, 103, 107, 115, 119, 123, 108, 109, 110, 224, + 225, 226, 228, 229, 230, 232, 233, 234, 240, 241, 242, 244, 245, 246, 248, 249, 250, 227, 231, 235, 243, 247, 251, 236, 237, 238, 28, 29, 30, 60, 61, 62, 92, 93, 94, 156, 157, 158, 188, 189, 190, 220, 221, 222, 31, 63, 95, 159, 191, 223, 124, 125, 126 }; + // Extracts bits [low,high] + static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high) + { + return (bits >> low) & ((1 << (high - low + 1)) - 1); + } + // Writes bits to output in an endian safe way - static inline void astc_set_bits(uint32_t *pOutput, int &bit_pos, uint32_t value, int total_bits) + static inline void astc_set_bits(uint32_t* pOutput, int& bit_pos, uint32_t value, uint32_t total_bits) { uint8_t* pBytes = reinterpret_cast<uint8_t*>(pOutput); - + while (total_bits) { - const uint32_t bits_to_write = std::min(total_bits, 8 - (bit_pos & 7)); + const uint32_t bits_to_write = basisu::minimum<int>(total_bits, 8 - (bit_pos & 7)); pBytes[bit_pos >> 3] |= static_cast<uint8_t>(value << (bit_pos & 7)); @@ -5359,14 +5112,8 @@ namespace basist } } - // Extracts bits [low,high] - static inline uint32_t astc_extract_bits(uint32_t bits, int low, int high) - { - return (bits >> low) & ((1 << (high - low + 1)) - 1); - } - // Encodes 5 values to output, usable for any range that uses trits and bits - static void astc_encode_trits(uint32_t *pOutput, const uint8_t *pValues, int& bit_pos, int n) + static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n) { // First extract the trits and the bits from the 5 input values int trits = 0, bits[5]; @@ -5374,9 +5121,9 @@ namespace basist for (int i = 0; i < 5; i++) { static const int s_muls[5] = { 1, 3, 9, 27, 81 }; - + const int t = pValues[i] >> n; - + trits += t * s_muls[i]; bits[i] = pValues[i] & bit_mask; } @@ -5386,14 +5133,23 @@ namespace basist assert(trits < 243); const int T = g_astc_trit_encode[trits]; - + // Now interleave the 8 encoded trit bits with the bits to form the encoded output. See table 94. astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 1) << n) | (bits[1] << (2 + n)), n * 2 + 2); - astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) | + astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) | (bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6); } +#endif // #if BASISD_SUPPORT_UASTC || BASISD_SUPPORT_ASTC +#if BASISD_SUPPORT_ASTC + struct astc_block_params + { + // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00) + uint8_t m_endpoints[10]; + uint8_t m_weights[32]; + }; + // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). // We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity. // Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color. @@ -6255,12 +6011,15 @@ namespace basist static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_45[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_pvrtc2_45.inc" }; - + +#if 0 static const etc1s_to_atc_solution g_etc1s_to_pvrtc2_alpha_33[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_pvrtc2_alpha_33.inc" }; #endif +#endif + static const etc1s_to_atc_solution g_etc1s_to_atc_55[32 * 8 * NUM_ETC1S_TO_ATC_SELECTOR_MAPPINGS * NUM_ETC1S_TO_ATC_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_atc_55.inc" }; @@ -7167,10 +6926,7 @@ namespace basist } typedef struct { float c[4]; } vec4F; - - static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } - static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } - static inline float saturate(float value) { return clampf(value, 0, 1.0f); } + static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; } static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; } static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; } @@ -7188,7 +6944,7 @@ namespace basist } static inline int sq(int x) { return x * x; } - + // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it. @@ -7461,7 +7217,7 @@ namespace basist } vec4F_normalize_in_place(&axis); - + if (vec4F_dot(&axis, &axis) < .5f) vec4F_set_scalar(&axis, .5f); @@ -7488,7 +7244,15 @@ namespace basist minColor = vec4F_saturate(&c0); maxColor = vec4F_saturate(&c1); if (minColor.c[3] > maxColor.c[3]) - std::swap(minColor, maxColor); + { + // VS 2019 release Code Generator issue + //std::swap(minColor, maxColor); + + float a = minColor.c[0], b = minColor.c[1], c = minColor.c[2], d = minColor.c[3]; + minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3]; + minColor.c[0] = maxColor.c[0]; minColor.c[1] = maxColor.c[1]; minColor.c[2] = maxColor.c[2]; minColor.c[3] = maxColor.c[3]; + maxColor.c[0] = a; maxColor.c[1] = b; maxColor.c[2] = c; maxColor.c[3] = d; + } } else { @@ -7648,7 +7412,7 @@ namespace basist uint32_t m = (le * 5 + he * 3) / 8; - int err = labs((int)v - (int)m); + int err = (int)labs((int)v - (int)m); if (err < lowest_err) { lowest_err = err; @@ -7671,7 +7435,7 @@ namespace basist uint32_t le = (l << 1); le = (le << 4) | le; - int err = labs((int)v - (int)le); + int err = (int)labs((int)v - (int)le); if (err < lowest_err) { lowest_err = err; @@ -7693,7 +7457,7 @@ namespace basist uint32_t he = (h << 1) | 1; he = (he << 4) | he; - int err = labs((int)v - (int)he); + int err = (int)labs((int)v - (int)he); if (err < lowest_err) { lowest_err = err; @@ -7722,7 +7486,7 @@ namespace basist uint32_t m = (le * 5 + he * 3) / 8; - int err = labs((int)v - (int)m); + int err = (int)labs((int)v - (int)m); if (err < lowest_err) { lowest_err = err; @@ -7752,7 +7516,7 @@ namespace basist uint32_t m = (le * 5 + he * 3) / 8; - int err = labs((int)v - (int)m); + int err = (int)labs((int)v - (int)m); if (err < lowest_err) { lowest_err = err; @@ -7768,59 +7532,65 @@ namespace basist } #endif // BASISD_SUPPORT_PVRTC2 - basisu_lowlevel_transcoder::basisu_lowlevel_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) : + basisu_lowlevel_etc1s_transcoder::basisu_lowlevel_etc1s_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) : + m_pGlobal_codebook(nullptr), m_pGlobal_sel_codebook(pGlobal_sel_codebook), m_selector_history_buf_size(0) { } - bool basisu_lowlevel_transcoder::decode_palettes( + bool basisu_lowlevel_etc1s_transcoder::decode_palettes( uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size, uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size) { + if (m_pGlobal_codebook) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 11\n"); + return false; + } bitwise_decoder sym_codec; huffman_decoding_table color5_delta_model0, color5_delta_model1, color5_delta_model2, inten_delta_model; if (!sym_codec.init(pEndpoints_data, endpoints_data_size)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 0\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 0\n"); return false; } if (!sym_codec.read_huffman_table(color5_delta_model0)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 1\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1\n"); return false; } if (!sym_codec.read_huffman_table(color5_delta_model1)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 1a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 1a\n"); return false; } if (!sym_codec.read_huffman_table(color5_delta_model2)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 2a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2a\n"); return false; } if (!sym_codec.read_huffman_table(inten_delta_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 2b\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n"); return false; } if (!color5_delta_model0.is_valid() || !color5_delta_model1.is_valid() || !color5_delta_model2.is_valid() || !inten_delta_model.is_valid()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 2b\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 2b\n"); return false; } const bool endpoints_are_grayscale = sym_codec.get_bits(1) != 0; - m_endpoints.resize(num_endpoints); + m_local_endpoints.resize(num_endpoints); color32 prev_color5(16, 16, 16, 0); uint32_t prev_inten = 0; @@ -7828,8 +7598,8 @@ namespace basist for (uint32_t i = 0; i < num_endpoints; i++) { uint32_t inten_delta = sym_codec.decode_huffman(inten_delta_model); - m_endpoints[i].m_inten5 = static_cast<uint8_t>((inten_delta + prev_inten) & 7); - prev_inten = m_endpoints[i].m_inten5; + m_local_endpoints[i].m_inten5 = static_cast<uint8_t>((inten_delta + prev_inten) & 7); + prev_inten = m_local_endpoints[i].m_inten5; for (uint32_t c = 0; c < (endpoints_are_grayscale ? 1U : 3U); c++) { @@ -7843,25 +7613,25 @@ namespace basist int v = (prev_color5[c] + delta) & 31; - m_endpoints[i].m_color5[c] = static_cast<uint8_t>(v); + m_local_endpoints[i].m_color5[c] = static_cast<uint8_t>(v); prev_color5[c] = static_cast<uint8_t>(v); } if (endpoints_are_grayscale) { - m_endpoints[i].m_color5[1] = m_endpoints[i].m_color5[0]; - m_endpoints[i].m_color5[2] = m_endpoints[i].m_color5[0]; + m_local_endpoints[i].m_color5[1] = m_local_endpoints[i].m_color5[0]; + m_local_endpoints[i].m_color5[2] = m_local_endpoints[i].m_color5[0]; } } sym_codec.stop(); - m_selectors.resize(num_selectors); + m_local_selectors.resize(num_selectors); if (!sym_codec.init(pSelectors_data, selectors_data_size)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 5\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n"); return false; } @@ -7880,12 +7650,12 @@ namespace basist { if (!sym_codec.read_huffman_table(mod_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 6\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 6\n"); return false; } if (!mod_model.is_valid()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 6a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 6a\n"); return false; } } @@ -7902,7 +7672,7 @@ namespace basist if (pal_index >= m_pGlobal_sel_codebook->size()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 7z\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 7z\n"); return false; } @@ -7911,9 +7681,9 @@ namespace basist // TODO: Optimize this for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) - m_selectors[i].set_selector(x, y, e[x + y * 4]); + m_local_selectors[i].set_selector(x, y, e[x + y * 4]); - m_selectors[i].init_flags(); + m_local_selectors[i].init_flags(); } } else @@ -7928,12 +7698,12 @@ namespace basist basist::huffman_decoding_table uses_global_cb_bitflags_model; if (!sym_codec.read_huffman_table(uses_global_cb_bitflags_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 7\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 7\n"); return false; } if (!uses_global_cb_bitflags_model.is_valid()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 7a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 7a\n"); return false; } @@ -7942,12 +7712,12 @@ namespace basist { if (!sym_codec.read_huffman_table(global_mod_indices_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 8\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 8\n"); return false; } if (!global_mod_indices_model.is_valid()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 8a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 8a\n"); return false; } } @@ -7975,7 +7745,7 @@ namespace basist if (pal_index >= m_pGlobal_sel_codebook->size()) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 8b\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 8b\n"); return false; } @@ -7983,7 +7753,7 @@ namespace basist for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) - m_selectors[q].set_selector(x, y, e[x + y * 4]); + m_local_selectors[q].set_selector(x, y, e[x + y * 4]); } else { @@ -7992,11 +7762,11 @@ namespace basist uint32_t cur_byte = sym_codec.get_bits(8); for (uint32_t k = 0; k < 4; k++) - m_selectors[q].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + m_local_selectors[q].set_selector(k, j, (cur_byte >> (k * 2)) & 3); } } - m_selectors[q].init_flags(); + m_local_selectors[q].init_flags(); } } else @@ -8012,23 +7782,23 @@ namespace basist uint32_t cur_byte = sym_codec.get_bits(8); for (uint32_t k = 0; k < 4; k++) - m_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); } - m_selectors[i].init_flags(); + m_local_selectors[i].init_flags(); } } else { if (!sym_codec.read_huffman_table(delta_selector_pal_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 10\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10\n"); return false; } if ((num_selectors > 1) && (!delta_selector_pal_model.is_valid())) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_palettes: fail 10a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 10a\n"); return false; } @@ -8044,9 +7814,9 @@ namespace basist prev_bytes[j] = static_cast<uint8_t>(cur_byte); for (uint32_t k = 0; k < 4; k++) - m_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); } - m_selectors[i].init_flags(); + m_local_selectors[i].init_flags(); continue; } @@ -8058,9 +7828,9 @@ namespace basist prev_bytes[j] = static_cast<uint8_t>(cur_byte); for (uint32_t k = 0; k < 4; k++) - m_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); + m_local_selectors[i].set_selector(k, j, (cur_byte >> (k * 2)) & 3); } - m_selectors[i].init_flags(); + m_local_selectors[i].init_flags(); } } } @@ -8071,60 +7841,60 @@ namespace basist return true; } - bool basisu_lowlevel_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size) + bool basisu_lowlevel_etc1s_transcoder::decode_tables(const uint8_t* pTable_data, uint32_t table_data_size) { basist::bitwise_decoder sym_codec; if (!sym_codec.init(pTable_data, table_data_size)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 0\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 0\n"); return false; } if (!sym_codec.read_huffman_table(m_endpoint_pred_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 1\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1\n"); return false; } if (m_endpoint_pred_model.get_code_sizes().size() == 0) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 1a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 1a\n"); return false; } if (!sym_codec.read_huffman_table(m_delta_endpoint_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 2\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2\n"); return false; } if (m_delta_endpoint_model.get_code_sizes().size() == 0) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 2a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 2a\n"); return false; } if (!sym_codec.read_huffman_table(m_selector_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 3\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3\n"); return false; } if (m_selector_model.get_code_sizes().size() == 0) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 3a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 3a\n"); return false; } if (!sym_codec.read_huffman_table(m_selector_history_buf_rle_model)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 4\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4\n"); return false; } if (m_selector_history_buf_rle_model.get_code_sizes().size() == 0) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::decode_tables: fail 4a\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_tables: fail 4a\n"); return false; } @@ -8135,27 +7905,37 @@ namespace basist return true; } - bool basisu_lowlevel_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, - uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels, + bool basisu_lowlevel_etc1s_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, bool transcode_alpha, void *pAlpha_blocks, uint32_t output_rows_in_pixels) { - (void)transcode_alpha; - (void)pAlpha_blocks; + // 'pDst_blocks' unused when disabling *all* hardware transcode options + // (and 'bc1_allow_threecolor_blocks' when disabling DXT) + BASISU_NOTE_UNUSED(pDst_blocks); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + BASISU_NOTE_UNUSED(transcode_alpha); + BASISU_NOTE_UNUSED(pAlpha_blocks); + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + return false; + } if (!pState) pState = &m_def_state; - const bool is_video = (header.m_tex_type == cBASISTexTypeVideoFrames); const uint32_t total_blocks = num_blocks_x * num_blocks_y; if (!output_row_pitch_in_blocks_or_pixels) { if (basis_block_format_is_uncompressed(fmt)) - output_row_pitch_in_blocks_or_pixels = slice_desc.m_orig_width; + output_row_pitch_in_blocks_or_pixels = orig_width; else { if (fmt == block_format::cFXT1_RGB) - output_row_pitch_in_blocks_or_pixels = (slice_desc.m_orig_width + 7) / 8; + output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8; else output_row_pitch_in_blocks_or_pixels = num_blocks_x; } @@ -8164,23 +7944,23 @@ namespace basist if (basis_block_format_is_uncompressed(fmt)) { if (!output_rows_in_pixels) - output_rows_in_pixels = slice_desc.m_orig_height; + output_rows_in_pixels = orig_height; } - std::vector<uint32_t>* pPrev_frame_indices = nullptr; + basisu::vector<uint32_t>* pPrev_frame_indices = nullptr; if (is_video) { // TODO: Add check to make sure the caller hasn't tried skipping past p-frames - const bool alpha_flag = (slice_desc.m_flags & cSliceDescFlagsIsAlphaData) != 0; - const uint32_t level_index = slice_desc.m_level_index; + //const bool alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + //const uint32_t level_index = slice_desc.m_level_index; if (level_index >= basisu_transcoder_state::cMaxPrevFrameLevels) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: unsupported level_index\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: unsupported level_index\n"); return false; } - pPrev_frame_indices = &pState->m_prev_frame_indices[alpha_flag][level_index]; + pPrev_frame_indices = &pState->m_prev_frame_indices[is_alpha_slice][level_index]; if (pPrev_frame_indices->size() < total_blocks) pPrev_frame_indices->resize(total_blocks); } @@ -8189,14 +7969,12 @@ namespace basist if (!sym_codec.init(pImage_data, image_data_size)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: sym_codec.init failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: sym_codec.init failed\n"); return false; } approx_move_to_front selector_history_buf(m_selector_history_buf_size); - - const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = (uint32_t)m_selectors.size(); - const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX; + uint32_t cur_selector_rle_count = 0; decoder_etc_block block; @@ -8212,7 +7990,7 @@ namespace basist pPVRTC_work_mem = malloc(num_blocks_x * num_blocks_y * (sizeof(decoder_etc_block) + sizeof(uint32_t))); if (!pPVRTC_work_mem) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: malloc failed\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: malloc failed\n"); return false; } pPVRTC_endpoints = (uint32_t*) & ((decoder_etc_block*)pPVRTC_work_mem)[num_blocks_x * num_blocks_y]; @@ -8228,6 +8006,16 @@ namespace basist int prev_endpoint_pred_sym = 0; int endpoint_pred_repeat_count = 0; uint32_t prev_endpoint_index = 0; + const endpoint_vec& endpoints = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_endpoints : m_local_endpoints; + const selector_vec& selectors = m_pGlobal_codebook ? m_pGlobal_codebook->m_local_selectors : m_local_selectors; + if (!endpoints.size() || !selectors.size()) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: global codebooks must be unpacked first\n"); + return false; + } + + const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = (uint32_t)selectors.size(); + const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = m_selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX; for (uint32_t block_y = 0; block_y < num_blocks_y; block_y++) { @@ -8279,7 +8067,7 @@ namespace basist // Left if (!block_x) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (0)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (0)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; @@ -8292,7 +8080,7 @@ namespace basist // Upper if (!block_y) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (1)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (1)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; @@ -8314,7 +8102,7 @@ namespace basist // Upper left if ((!block_x) || (!block_y)) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (2)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (2)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; @@ -8329,8 +8117,8 @@ namespace basist const uint32_t delta_sym = sym_codec.decode_huffman(m_delta_endpoint_model); endpoint_index = delta_sym + prev_endpoint_index; - if (endpoint_index >= m_endpoints.size()) - endpoint_index -= (int)m_endpoints.size(); + if (endpoint_index >= endpoints.size()) + endpoint_index -= (int)endpoints.size(); } pState->m_block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_endpoint_index = (uint16_t)endpoint_index; @@ -8345,7 +8133,7 @@ namespace basist { cur_selector_rle_count--; - selector_sym = (int)m_selectors.size(); + selector_sym = (int)selectors.size(); } else { @@ -8363,28 +8151,28 @@ namespace basist if (cur_selector_rle_count > total_blocks) { // The file is corrupted or we've got a bug. - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (3)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (3)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; } - selector_sym = (int)m_selectors.size(); + selector_sym = (int)selectors.size(); cur_selector_rle_count--; } } - if (selector_sym >= (int)m_selectors.size()) + if (selector_sym >= (int)selectors.size()) { assert(m_selector_history_buf_size > 0); - int history_buf_index = selector_sym - (int)m_selectors.size(); + int history_buf_index = selector_sym - (int)selectors.size(); if (history_buf_index >= (int)selector_history_buf.size()) { // The file is corrupted or we've got a bug. - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (4)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (4)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; @@ -8404,10 +8192,10 @@ namespace basist } } - if ((endpoint_index >= m_endpoints.size()) || (selector_index >= m_selectors.size())) + if ((endpoint_index >= endpoints.size()) || (selector_index >= selectors.size())) { // The file is corrupted or we've got a bug. - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: invalid datastream (5)\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: invalid datastream (5)\n"); if (pPVRTC_work_mem) free(pPVRTC_work_mem); return false; @@ -8428,8 +8216,8 @@ namespace basist } #endif - const endpoint* pEndpoints = &m_endpoints[endpoint_index]; - const selector* pSelector = &m_selectors[selector_index]; + const endpoint* pEndpoints = &endpoints[endpoint_index]; + const selector* pSelector = &selectors[selector_index]; switch (fmt) { @@ -8448,9 +8236,8 @@ namespace basist } case block_format::cBC1: { - void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - #if BASISD_SUPPORT_DXT1 + void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; #if BASISD_ENABLE_DEBUG_FLAGS if (g_debug_flags & (cDebugFlagVisBC1Sels | cDebugFlagVisBC1Endpoints)) convert_etc1s_to_dxt1_vis(static_cast<dxt1_block*>(pDst_block), pEndpoints, pSelector, bc1_allow_threecolor_blocks); @@ -8534,8 +8321,8 @@ namespace basist const uint16_t* pAlpha_block = reinterpret_cast<uint16_t*>(static_cast<uint8_t*>(pAlpha_blocks) + (block_x + block_y * num_blocks_x) * sizeof(uint32_t)); - const endpoint* pAlpha_endpoints = &m_endpoints[pAlpha_block[0]]; - const selector* pAlpha_selector = &m_selectors[pAlpha_block[1]]; + const endpoint* pAlpha_endpoints = &endpoints[pAlpha_block[0]]; + const selector* pAlpha_selector = &selectors[pAlpha_block[1]]; const color32& alpha_base_color = pAlpha_endpoints->m_color5; const uint32_t alpha_inten_table = pAlpha_endpoints->m_inten5; @@ -8559,16 +8346,7 @@ namespace basist break; } - case block_format::cBC7_M6_OPAQUE_ONLY: - { -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - convert_etc1s_to_bc7_m6(static_cast<bc7_mode_6*>(pDst_block), pEndpoints, pSelector); -#else - assert(0); -#endif - break; - } + case block_format::cBC7: // for more consistency with UASTC case block_format::cBC7_M5_COLOR: { #if BASISD_SUPPORT_BC7_MODE5 @@ -8603,7 +8381,7 @@ namespace basist { #if BASISD_SUPPORT_ASTC void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - convert_etc1s_to_astc_4x4(pDst_block, pEndpoints, pSelector, transcode_alpha, &m_endpoints[0], &m_selectors[0]); + convert_etc1s_to_astc_4x4(pDst_block, pEndpoints, pSelector, transcode_alpha, &endpoints[0], &selectors[0]); #else assert(0); #endif @@ -8648,8 +8426,8 @@ namespace basist assert(transcode_alpha); void* pDst_block = static_cast<uint8_t*>(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - - convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &m_endpoints[0], &m_selectors[0]); + + convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]); #endif break; } @@ -8665,8 +8443,8 @@ namespace basist assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - const uint32_t max_x = basisu::minimum<int>(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum<int>(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); int colors[4]; decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -8705,8 +8483,8 @@ namespace basist assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - const uint32_t max_x = basisu::minimum<int>(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum<int>(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -8734,8 +8512,8 @@ namespace basist assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - const uint32_t max_x = basisu::minimum<int>(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum<int>(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -8765,8 +8543,8 @@ namespace basist assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); - const uint32_t max_x = basisu::minimum<int>(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum<int>(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -8775,12 +8553,20 @@ namespace basist if (fmt == block_format::cRGB565) { for (uint32_t i = 0; i < 4; i++) - packed_colors[i] = static_cast<uint16_t>(((colors[i].r >> 3) << 11) | ((colors[i].g >> 2) << 5) | (colors[i].b >> 3)); + { + packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].b, 31)); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } } else { for (uint32_t i = 0; i < 4; i++) - packed_colors[i] = static_cast<uint16_t>(((colors[i].b >> 3) << 11) | ((colors[i].g >> 2) << 5) | (colors[i].r >> 3)); + { + packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].b, 31) << 11) | (mul_8(colors[i].g, 63) << 5) | mul_8(colors[i].r, 31)); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } } for (uint32_t y = 0; y < max_y; y++) @@ -8800,15 +8586,17 @@ namespace basist assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); - const uint32_t max_x = basisu::minimum<int>(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum<int>(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); uint16_t packed_colors[4]; for (uint32_t i = 0; i < 4; i++) - packed_colors[i] = static_cast<uint16_t>(((colors[i].r >> 4) << 12) | ((colors[i].g >> 4) << 8) | ((colors[i].b >> 4) << 4)); + { + packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4)); + } for (uint32_t y = 0; y < max_y; y++) { @@ -8817,7 +8605,14 @@ namespace basist for (uint32_t x = 0; x < max_x; x++) { uint16_t cur = reinterpret_cast<uint16_t*>(pDst_pixels)[x]; + if (BASISD_IS_BIG_ENDIAN) + cur = byteswap_uint16(cur); + cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3]; + + if (BASISD_IS_BIG_ENDIAN) + cur = byteswap_uint16(cur); + reinterpret_cast<uint16_t*>(pDst_pixels)[x] = cur; } @@ -8831,15 +8626,19 @@ namespace basist assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); - const uint32_t max_x = basisu::minimum<int>(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum<int>(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); uint16_t packed_colors[4]; for (uint32_t i = 0; i < 4; i++) - packed_colors[i] = static_cast<uint16_t>(((colors[i].r >> 4) << 12) | ((colors[i].g >> 4) << 8) | ((colors[i].b >> 4) << 4) | 0xF); + { + packed_colors[i] = static_cast<uint16_t>((mul_8(colors[i].r, 15) << 12) | (mul_8(colors[i].g, 15) << 8) | (mul_8(colors[i].b, 15) << 4) | 0xF); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } for (uint32_t y = 0; y < max_y; y++) { @@ -8858,22 +8657,28 @@ namespace basist assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); - const uint32_t max_x = basisu::minimum<int>(4, output_row_pitch_in_blocks_or_pixels - block_x * 4); - const uint32_t max_y = basisu::minimum<int>(4, output_rows_in_pixels - block_y * 4); + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); uint16_t packed_colors[4]; for (uint32_t i = 0; i < 4; i++) - packed_colors[i] = colors[i].g >> 4; + { + packed_colors[i] = mul_8(colors[i].g, 15); + if (BASISD_IS_BIG_ENDIAN) + packed_colors[i] = byteswap_uint16(packed_colors[i]); + } for (uint32_t y = 0; y < max_y; y++) { const uint32_t s = pSelector->m_selectors[y]; for (uint32_t x = 0; x < max_x; x++) + { reinterpret_cast<uint16_t*>(pDst_pixels)[x] = packed_colors[(s >> (x * 2)) & 3]; + } pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); } @@ -8903,7 +8708,7 @@ namespace basist if (endpoint_pred_repeat_count != 0) { - BASISU_DEVEL_ERROR("basisu_lowlevel_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n"); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_slice: endpoint_pred_repeat_count != 0. The file is corrupted or this is a bug\n"); return false; } @@ -8914,7 +8719,7 @@ namespace basist if (fmt == block_format::cPVRTC1_4_RGB) fixup_pvrtc1_4_modulation_rgb((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y); else if (fmt == block_format::cPVRTC1_4_RGBA) - fixup_pvrtc1_4_modulation_rgba((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, pAlpha_blocks, &m_endpoints[0], &m_selectors[0]); + fixup_pvrtc1_4_modulation_rgba((decoder_etc_block*)pPVRTC_work_mem, pPVRTC_endpoints, pDst_blocks, num_blocks_x, num_blocks_y, pAlpha_blocks, &endpoints[0], &selectors[0]); #endif // BASISD_SUPPORT_PVRTC1 if (pPVRTC_work_mem) @@ -8923,8 +8728,1187 @@ namespace basist return true; } + bool basis_validate_output_buffer_size(transcoder_texture_format target_format, + uint32_t output_blocks_buf_size_in_blocks_or_pixels, + uint32_t orig_width, uint32_t orig_height, + uint32_t output_row_pitch_in_blocks_or_pixels, + uint32_t output_rows_in_pixels, + uint32_t total_slice_blocks) + { + if (basis_transcoder_format_is_uncompressed(target_format)) + { + // Assume the output buffer is orig_width by orig_height + if (!output_row_pitch_in_blocks_or_pixels) + output_row_pitch_in_blocks_or_pixels = orig_width; + + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + + // Now make sure the output buffer is large enough, or we'll overwrite memory. + if (output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < (output_rows_in_pixels * output_row_pitch_in_blocks_or_pixels)\n"); + return false; + } + } + else if (target_format == transcoder_texture_format::cTFFXT1_RGB) + { + const uint32_t num_blocks_fxt1_x = (orig_width + 7) / 8; + const uint32_t num_blocks_fxt1_y = (orig_height + 3) / 4; + const uint32_t total_blocks_fxt1 = num_blocks_fxt1_x * num_blocks_fxt1_y; + + if (output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < total_blocks_fxt1\n"); + return false; + } + } + else + { + if (output_blocks_buf_size_in_blocks_or_pixels < total_slice_blocks) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output_blocks_buf_size_in_blocks_or_pixels < transcode_image\n"); + return false; + } + } + return true; + } + + bool basisu_lowlevel_etc1s_transcoder::transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t rgb_offset, uint32_t rgb_length, uint32_t alpha_offset, uint32_t alpha_length, + uint32_t decode_flags, + bool basis_file_has_alpha_slices, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels) + { + if (((uint64_t)rgb_offset + rgb_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (color)\n"); + return false; + } + + if (alpha_length) + { + if (((uint64_t)alpha_offset + alpha_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: source data buffer too small (alpha)\n"); + return false; + } + } + else + { + assert(!basis_file_has_alpha_slices); + } + + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) + { + if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4))) + { + // PVRTC1 only supports power of 2 dimensions + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n"); + return false; + } + } + + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!basis_file_has_alpha_slices)) + { + // Switch to PVRTC1 RGB if the input doesn't have alpha. + target_format = transcoder_texture_format::cTFPVRTC1_4_RGB; + } + + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; + + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: output buffer size too small\n"); + return false; + } + + bool status = false; + + const uint8_t* pData = pCompressed_data + rgb_offset; + uint32_t data_len = rgb_length; + bool is_alpha_slice = false; + + // If the caller wants us to transcode the mip level's alpha data, then use the next slice. + if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) + { + pData = pCompressed_data + alpha_offset; + data_len = alpha_length; + is_alpha_slice = true; + } + + switch (target_format) + { + case transcoder_texture_format::cTFETC1_RGB: + { + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC1_RGB: + { +#if !BASISD_SUPPORT_DXT1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC1/DXT1 unsupported\n"); + return false; +#else + // status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC1, bytes_per_block_or_pixel, true, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC1 failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFBC4_R: + { +#if !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC4/DXT5A unsupported\n"); + return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC4 failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFPVRTC1_4_RGB: + { +#if !BASISD_SUPPORT_PVRTC1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n"); + return false; +#else + // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?) + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGB failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + { +#if !BASISD_SUPPORT_PVRTC1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC1 4 unsupported\n"); + return false; +#else + assert(basis_file_has_alpha_slices); + assert(alpha_length); + + // Temp buffer to hold alpha block endpoint/selector indices + basisu::vector<uint32_t> temp_block_indices(total_slice_blocks); + + // First transcode alpha data to temp buffer + //status = transcode_slice(pData, data_size, slice_index + 1, &temp_block_indices[0], total_slice_blocks, block_format::cIndices, sizeof(uint32_t), decode_flags, pSlice_descs[slice_index].m_num_blocks_x, pState); + status = transcode_slice(&temp_block_indices[0], num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, num_blocks_x, pState, false, nullptr, 0); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (0)\n"); + } + else + { + // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, &temp_block_indices[0]); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, &temp_block_indices[0], 0); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to PVRTC1 4 RGBA failed (1)\n"); + } + } + + break; +#endif + } + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + { +#if !BASISD_SUPPORT_BC7_MODE5 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: BC7 unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + // We used to support transcoding just alpha to BC7 - but is that useful at all? + + // First transcode the color slice. The cBC7_M5_COLOR transcoder will output opaque mode 5 blocks. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_COLOR, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC7_M5_COLOR, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + + if ((status) && (basis_file_has_alpha_slices)) + { + // Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha. + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC7_M5_ALPHA, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + } + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC7 failed (0)\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFETC2_RGBA: + { +#if !BASISD_SUPPORT_ETC2_EAC_A8 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ETC2 EAC A8 unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + if (basis_file_has_alpha_slices) + { + // First decode the alpha data + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + } + else + { + //write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels); + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + + if (status) + { + // Now decode the color data + //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2 A failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFBC3_RGBA: + { +#if !BASISD_SUPPORT_DXT1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT1 unsupported\n"); + return false; +#elif !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + // First decode the alpha data + if (basis_file_has_alpha_slices) + { + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + + if (status) + { + // Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3. + //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC1, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC3 A failed\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFBC5_RG: + { +#if !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + //bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + // uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + // basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0); + + // Decode the R data (actually the green channel of the color data slice in the basis file) + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (status) + { + if (basis_file_has_alpha_slices) + { + // Decode the G data (actually the green channel of the alpha data slice in the basis file) + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 1 failed\n"); + } + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to BC5 channel 0 failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFASTC_4x4_RGBA: + { +#if !BASISD_SUPPORT_ASTC + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ASTC unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + if (basis_file_has_alpha_slices) + { + // First decode the alpha data to the output (we're using the output texture as a temp buffer here). + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (status) + { + // Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and + // transcode both the alpha and color data at the same time to ASTC. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels); + } + } + else + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cASTC_4x4, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ASTC failed (0)\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFATC_RGB: + { +#if !BASISD_SUPPORT_ATC + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n"); + return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC_RGB failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFATC_RGBA: + { +#if !BASISD_SUPPORT_ATC + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: ATC unsupported\n"); + return false; +#elif !BASISD_SUPPORT_DXT5A + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: DXT5A unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + // First decode the alpha data + if (basis_file_has_alpha_slices) + { + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cBC4, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, pOutput_blocks, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + + if (status) + { + //status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cATC_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ATC A failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFPVRTC2_4_RGB: + { +#if !BASISD_SUPPORT_PVRTC2 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n"); + return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGB failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + { +#if !BASISD_SUPPORT_PVRTC2 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: PVRTC2 unsupported\n"); + return false; +#else + if (basis_file_has_alpha_slices) + { + // First decode the alpha data to the output (we're using the output texture as a temp buffer here). + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cIndices, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to failed\n"); + } + else + { + // Now decode the color data and transcode to PVRTC2 RGBA. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels); + } + } + else + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGB, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to cPVRTC2_4_RGBA failed\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFRGBA32: + { + // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + + // First decode the alpha data + if (basis_file_has_alpha_slices) + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + else + status = true; + + if (status) + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA32 A failed\n"); + } + + break; + } + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + { + // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (fmt == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, (target_format == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGB565 RGB failed\n"); + } + + break; + } + case transcoder_texture_format::cTFRGBA4444: + { + // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + + // First decode the alpha data + if (basis_file_has_alpha_slices) + //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + else + status = true; + + if (status) + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 RGB failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to RGBA4444 A failed\n"); + } + + break; + } + case transcoder_texture_format::cTFFXT1_RGB: + { +#if !BASISD_SUPPORT_FXT1 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: FXT1 unsupported\n"); + return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cFXT1_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cFXT1_RGB, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to FXT1_RGB failed\n"); + } + break; +#endif + } + case transcoder_texture_format::cTFETC2_EAC_R11: + { +#if !BASISD_SUPPORT_ETC2_EAC_RG11 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n"); + return false; +#else + //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 failed\n"); + } + + break; +#endif + } + case transcoder_texture_format::cTFETC2_EAC_RG11: + { +#if !BASISD_SUPPORT_ETC2_EAC_RG11 + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: EAC_RG11 unsupported\n"); + return false; +#else + assert(bytes_per_block_or_pixel == 16); + + if (basis_file_has_alpha_slices) + { + // First decode the alpha data to G + //status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice((uint8_t *)pOutput_blocks + 8, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + } + else + { + basisu_transcoder::write_opaque_alpha_blocks(num_blocks_x, num_blocks_y, (uint8_t*)pOutput_blocks + 8, block_format::cETC2_EAC_R11, 16, output_row_pitch_in_blocks_or_pixels); + status = true; + } + + if (status) + { + // Now decode the color data to R + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 R failed\n"); + } + } + else + { + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC2_EAC_R11 G failed\n"); + } + + break; +#endif + } + default: + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: Invalid fmt\n"); + break; + } + } + + return status; + } + + basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder() + { + } + + bool basisu_lowlevel_uastc_transcoder::transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, uint32_t output_rows_in_pixels, int channel0, int channel1, uint32_t decode_flags) + { + BASISU_NOTE_UNUSED(pState); + BASISU_NOTE_UNUSED(bc1_allow_threecolor_blocks); + + assert(g_transcoder_initialized); + if (!g_transcoder_initialized) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: Transcoder not globally initialized.\n"); + return false; + } + +#if BASISD_SUPPORT_UASTC + const uint32_t total_blocks = num_blocks_x * num_blocks_y; + + if (!output_row_pitch_in_blocks_or_pixels) + { + if (basis_block_format_is_uncompressed(fmt)) + output_row_pitch_in_blocks_or_pixels = orig_width; + else + { + if (fmt == block_format::cFXT1_RGB) + output_row_pitch_in_blocks_or_pixels = (orig_width + 7) / 8; + else + output_row_pitch_in_blocks_or_pixels = num_blocks_x; + } + } + + if (basis_block_format_is_uncompressed(fmt)) + { + if (!output_rows_in_pixels) + output_rows_in_pixels = orig_height; + } + + uint32_t total_expected_block_bytes = sizeof(uastc_block) * total_blocks; + if (image_data_size < total_expected_block_bytes) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: image_data_size < total_expected_block_bytes The file is corrupted or this is a bug.\n"); + return false; + } + + const uastc_block* pSource_block = reinterpret_cast<const uastc_block *>(pImage_data); + + const bool high_quality = (decode_flags & cDecodeFlagsHighQuality) != 0; + const bool from_alpha = has_alpha && (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + + bool status = false; + if ((fmt == block_format::cPVRTC1_4_RGB) || (fmt == block_format::cPVRTC1_4_RGBA)) + { + if (fmt == block_format::cPVRTC1_4_RGBA) + transcode_uastc_to_pvrtc1_4_rgba((const uastc_block*)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality); + else + transcode_uastc_to_pvrtc1_4_rgb((const uastc_block *)pImage_data, pDst_blocks, num_blocks_x, num_blocks_y, high_quality, from_alpha); + } + else + { + for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) + { + void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; + + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes) + { + switch (fmt) + { + case block_format::cETC1: + { + if (from_alpha) + status = transcode_uastc_to_etc1(*pSource_block, pDst_block, 3); + else + status = transcode_uastc_to_etc1(*pSource_block, pDst_block); + break; + } + case block_format::cETC2_RGBA: + { + status = transcode_uastc_to_etc2_rgba(*pSource_block, pDst_block); + break; + } + case block_format::cBC1: + { + status = transcode_uastc_to_bc1(*pSource_block, pDst_block, high_quality); + break; + } + case block_format::cBC3: + { + status = transcode_uastc_to_bc3(*pSource_block, pDst_block, high_quality); + break; + } + case block_format::cBC4: + { + if (channel0 < 0) + channel0 = 0; + status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0); + break; + } + case block_format::cBC5: + { + if (channel0 < 0) + channel0 = 0; + if (channel1 < 0) + channel1 = 3; + status = transcode_uastc_to_bc5(*pSource_block, pDst_block, high_quality, channel0, channel1); + break; + } + case block_format::cBC7: + case block_format::cBC7_M5_COLOR: // for consistently with ETC1S + { + status = transcode_uastc_to_bc7(*pSource_block, pDst_block); + break; + } + case block_format::cASTC_4x4: + { + status = transcode_uastc_to_astc(*pSource_block, pDst_block); + break; + } + case block_format::cETC2_EAC_R11: + { + if (channel0 < 0) + channel0 = 0; + status = transcode_uastc_to_etc2_eac_r11(*pSource_block, pDst_block, high_quality, channel0); + break; + } + case block_format::cETC2_EAC_RG11: + { + if (channel0 < 0) + channel0 = 0; + if (channel1 < 0) + channel1 = 3; + status = transcode_uastc_to_etc2_eac_rg11(*pSource_block, pDst_block, high_quality, channel0, channel1); + break; + } + case block_format::cRGBA32: + { + color32 block_pixels[4][4]; + status = unpack_uastc(*pSource_block, (color32 *)block_pixels, false); + + assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); + + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y][x]; + + pDst_pixels[0 + 4 * x] = c.r; + pDst_pixels[1 + 4 * x] = c.g; + pDst_pixels[2 + 4 * x] = c.b; + pDst_pixels[3 + 4 * x] = c.a; + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); + } + + break; + } + case block_format::cRGB565: + case block_format::cBGR565: + { + color32 block_pixels[4][4]; + status = unpack_uastc(*pSource_block, (color32*)block_pixels, false); + + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); + + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y][x]; + + const uint16_t packed = (fmt == block_format::cRGB565) ? static_cast<uint16_t>((mul_8(c.r, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.b, 31)) : + static_cast<uint16_t>((mul_8(c.b, 31) << 11) | (mul_8(c.g, 63) << 5) | mul_8(c.r, 31)); + + pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF); + pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF); + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + + break; + } + case block_format::cRGBA4444: + { + color32 block_pixels[4][4]; + status = unpack_uastc(*pSource_block, (color32*)block_pixels, false); + + assert(sizeof(uint16_t) == output_block_or_pixel_stride_in_bytes); + uint8_t* pDst_pixels = static_cast<uint8_t*>(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint16_t); + + const uint32_t max_x = basisu::minimum<int>(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); + const uint32_t max_y = basisu::minimum<int>(4, (int)output_rows_in_pixels - (int)block_y * 4); + + for (uint32_t y = 0; y < max_y; y++) + { + for (uint32_t x = 0; x < max_x; x++) + { + const color32& c = block_pixels[y][x]; + + const uint16_t packed = static_cast<uint16_t>((mul_8(c.r, 15) << 12) | (mul_8(c.g, 15) << 8) | (mul_8(c.b, 15) << 4) | mul_8(c.a, 15)); + + pDst_pixels[x * 2 + 0] = (uint8_t)(packed & 0xFF); + pDst_pixels[x * 2 + 1] = (uint8_t)((packed >> 8) & 0xFF); + } + + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint16_t); + } + break; + } + default: + assert(0); + break; + + } + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: Transcoder failed to unpack a UASTC block - this is a bug, or the data was corrupted\n"); + return false; + } + + } // block_x + + } // block_y + } + + return true; +#else + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_slice: UASTC is unsupported\n"); + + BASISU_NOTE_UNUSED(decode_flags); + BASISU_NOTE_UNUSED(channel0); + BASISU_NOTE_UNUSED(channel1); + BASISU_NOTE_UNUSED(output_rows_in_pixels); + BASISU_NOTE_UNUSED(output_row_pitch_in_blocks_or_pixels); + BASISU_NOTE_UNUSED(output_block_or_pixel_stride_in_bytes); + BASISU_NOTE_UNUSED(fmt); + BASISU_NOTE_UNUSED(image_data_size); + BASISU_NOTE_UNUSED(pImage_data); + BASISU_NOTE_UNUSED(num_blocks_x); + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(pDst_blocks); + + return false; +#endif + } + + bool basisu_lowlevel_uastc_transcoder::transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t slice_offset, uint32_t slice_length, + uint32_t decode_flags, + bool has_alpha, + bool is_video, + uint32_t output_row_pitch_in_blocks_or_pixels, + basisu_transcoder_state* pState, + uint32_t output_rows_in_pixels, + int channel0, int channel1) + { + BASISU_NOTE_UNUSED(is_video); + BASISU_NOTE_UNUSED(level_index); + + if (((uint64_t)slice_offset + slice_length) > (uint64_t)compressed_data_length) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: source data buffer too small\n"); + return false; + } + + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) + { + if ((!basisu::is_pow2(num_blocks_x * 4)) || (!basisu::is_pow2(num_blocks_y * 4))) + { + // PVRTC1 only supports power of 2 dimensions + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: PVRTC1 only supports power of 2 dimensions\n"); + return false; + } + } + + if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA) && (!has_alpha)) + { + // Switch to PVRTC1 RGB if the input doesn't have alpha. + target_format = transcoder_texture_format::cTFPVRTC1_4_RGB; + } + + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); + const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; + + if (!basis_validate_output_buffer_size(target_format, output_blocks_buf_size_in_blocks_or_pixels, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, output_rows_in_pixels, total_slice_blocks)) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: output buffer size too small\n"); + return false; + } + + bool status = false; + + // UASTC4x4 + switch (target_format) + { + case transcoder_texture_format::cTFETC1_RGB: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); + } + break; + } + case transcoder_texture_format::cTFETC2_RGBA: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_RGBA, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC2 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC1_RGB: + { + // TODO: ETC1S allows BC1 from alpha channel. That doesn't seem actually useful, though. + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC1, + bytes_per_block_or_pixel, true, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC1 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC3_RGBA: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC3, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC3, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC3 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC4_R: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC4, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC4 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC5_RG: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // 0, 3); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC5, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + 0, 3); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC5 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBC7, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to BC7 failed\n"); + } + break; + } + case transcoder_texture_format::cTFPVRTC1_4_RGB: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGB, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to PVRTC1 RGB 4bpp failed\n"); + } + break; + } + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cPVRTC1_4_RGBA, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to PVRTC1 RGBA 4bpp failed\n"); + } + break; + } + case transcoder_texture_format::cTFASTC_4x4_RGBA: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cASTC_4x4, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ASTC 4x4 failed\n"); + } + break; + } + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFATC_RGBA: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->ATC currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFFXT1_RGB: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->FXT1 currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFPVRTC2_4_RGB: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: UASTC->PVRTC2 currently unsupported\n"); + return false; + } + case transcoder_texture_format::cTFETC2_EAC_R11: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_R11, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + ((has_alpha) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to EAC R11 failed\n"); + } + break; + } + case transcoder_texture_format::cTFETC2_EAC_RG11: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + // nullptr, 0, + // 0, 3); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC2_EAC_RG11, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, + 0, 3); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_basisu_lowlevel_uastc_transcodertranscoder::transcode_image: transcode_slice() to EAC RG11 failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGBA32: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA32, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA32, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGBA32 failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGB565: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGB565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGB565, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGB565 failed\n"); + } + break; + } + case transcoder_texture_format::cTFBGR565: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBGR565, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cBGR565, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGB565 failed\n"); + } + break; + } + case transcoder_texture_format::cTFRGBA4444: + { + //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cRGBA4444, + bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + if (!status) + { + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to RGBA4444 failed\n"); + } + break; + } + default: + { + assert(0); + BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: Invalid format\n"); + break; + } + } + + return status; + } + basisu_transcoder::basisu_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) : - m_lowlevel_decoder(pGlobal_sel_codebook) + m_lowlevel_etc1s_decoder(pGlobal_sel_codebook), + m_ready_to_transcode(false) { } @@ -9027,22 +10011,33 @@ namespace basist return false; } - if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) { - if (pHeader->m_total_slices & 1) + if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) { - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid alpha basis file\n"); + if (pHeader->m_total_slices & 1) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid alpha .basis file\n"); + return false; + } + } + + // This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too. + if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n"); return false; } } - - if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0) + else { - // We only support ETC1S in basis universal - BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: invalid basis file (ETC1S flag check)\n"); - return false; + if ((pHeader->m_flags & cBASISHeaderFlagETC1S) != 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: Invalid .basis file (ETC1S check)\n"); + return false; + } } - + if ((pHeader->m_slice_desc_file_ofs >= data_size) || ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) ) @@ -9103,6 +10098,19 @@ namespace basist return pHeader->m_total_images; } + basis_tex_format basisu_transcoder::get_tex_format(const void* pData, uint32_t data_size) const + { + if (!validate_header_quick(pData, data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::get_total_images: header validation failed\n"); + return basis_tex_format::cETC1S; + } + + const basis_file_header* pHeader = static_cast<const basis_file_header*>(pData); + + return (basis_tex_format)(uint32_t)pHeader->m_tex_format; + } + bool basisu_transcoder::get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const { if (!validate_header_quick(pData, data_size)) @@ -9145,8 +10153,17 @@ namespace basist image_info.m_image_index = image_index; image_info.m_total_levels = total_levels; - image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + + image_info.m_alpha_flag = false; + + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + else + image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; + image_info.m_width = slice_desc.m_num_blocks_x * 4; image_info.m_height = slice_desc.m_num_blocks_y * 4; image_info.m_orig_width = slice_desc.m_orig_width; @@ -9264,7 +10281,13 @@ namespace basist image_info.m_image_index = image_index; image_info.m_level_index = level_index; - image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + else + image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; image_info.m_width = slice_desc.m_num_blocks_x * 4; image_info.m_height = slice_desc.m_num_blocks_y * 4; @@ -9275,6 +10298,21 @@ namespace basist image_info.m_total_blocks = image_info.m_num_blocks_x * image_info.m_num_blocks_y; image_info.m_first_slice_index = slice_index; + image_info.m_rgb_file_ofs = slice_desc.m_file_ofs; + image_info.m_rgb_file_len = slice_desc.m_file_size; + image_info.m_alpha_file_ofs = 0; + image_info.m_alpha_file_len = 0; + + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + if (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) + { + assert((slice_index + 1) < (int)pHeader->m_total_slices); + image_info.m_alpha_file_ofs = pSlice_descs[slice_index + 1].m_file_ofs; + image_info.m_alpha_file_len = pSlice_descs[slice_index + 1].m_file_size; + } + } + return true; } @@ -9294,14 +10332,20 @@ namespace basist file_info.m_total_header_size = sizeof(basis_file_header) + pHeader->m_total_slices * sizeof(basis_slice_desc); file_info.m_total_selectors = pHeader->m_total_selectors; + file_info.m_selector_codebook_ofs = pHeader->m_selector_cb_file_ofs; file_info.m_selector_codebook_size = pHeader->m_selector_cb_file_size; file_info.m_total_endpoints = pHeader->m_total_endpoints; + file_info.m_endpoint_codebook_ofs = pHeader->m_endpoint_cb_file_ofs; file_info.m_endpoint_codebook_size = pHeader->m_endpoint_cb_file_size; + file_info.m_tables_ofs = pHeader->m_tables_file_ofs; file_info.m_tables_size = pHeader->m_tables_file_size; - file_info.m_etc1s = (pHeader->m_flags & cBASISHeaderFlagETC1S) != 0; + file_info.m_tex_format = static_cast<basis_tex_format>(static_cast<int>(pHeader->m_tex_format)); + + file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S); + file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0; file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; @@ -9346,7 +10390,7 @@ namespace basist slice_info.m_image_index = pSlice_descs[i].m_image_index; slice_info.m_level_index = pSlice_descs[i].m_level_index; slice_info.m_unpacked_slice_crc16 = pSlice_descs[i].m_slice_data_crc16; - slice_info.m_alpha_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsIsAlphaData) != 0; + slice_info.m_alpha_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsHasAlpha) != 0; slice_info.m_iframe_flag = (pSlice_descs[i].m_flags & cSliceDescFlagsFrameIsIFrame) != 0; if (pSlice_descs[i].m_image_index >= pHeader->m_total_images) @@ -9366,15 +10410,9 @@ namespace basist return true; } - - bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size) const + + bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size) { - if (m_lowlevel_decoder.m_endpoints.size()) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: already called start_transcoding\n"); - return true; - } - if (!validate_header_quick(pData, data_size)) { BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: header validation failed\n"); @@ -9382,59 +10420,123 @@ namespace basist } const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData); - const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData); - if (!pHeader->m_endpoint_cb_file_size || !pHeader->m_selector_cb_file_size || !pHeader->m_tables_file_size) + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (0)\n"); - } + if (m_lowlevel_etc1s_decoder.m_local_endpoints.size()) + { + m_lowlevel_etc1s_decoder.clear(); + } - if ((pHeader->m_endpoint_cb_file_ofs > data_size) || (pHeader->m_selector_cb_file_ofs > data_size) || (pHeader->m_tables_file_ofs > data_size)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (1)\n"); - return false; - } + if (pHeader->m_flags & cBASISHeaderFlagUsesGlobalCodebook) + { + if (!m_lowlevel_etc1s_decoder.get_global_codebooks()) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: File uses global codebooks, but set_global_codebooks() has not been called\n"); + return false; + } + if (!m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size()) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebooks must be unpacked first by calling start_transcoding()\n"); + return false; + } + if ((m_lowlevel_etc1s_decoder.get_global_codebooks()->get_endpoints().size() != pHeader->m_total_endpoints) || + (m_lowlevel_etc1s_decoder.get_global_codebooks()->get_selectors().size() != pHeader->m_total_selectors)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: Global codebook size mismatch (wrong codebooks for file).\n"); + return false; + } + if (!pHeader->m_tables_file_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (2)\n"); + return false; + } + if (pHeader->m_tables_file_ofs > data_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (4)\n"); + return false; + } + if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (5)\n"); + return false; + } + } + else + { + if (!pHeader->m_endpoint_cb_file_size || !pHeader->m_selector_cb_file_size || !pHeader->m_tables_file_size) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted (0)\n"); + return false; + } - if (pHeader->m_endpoint_cb_file_size > (data_size - pHeader->m_endpoint_cb_file_ofs)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (2)\n"); - return false; - } + if ((pHeader->m_endpoint_cb_file_ofs > data_size) || (pHeader->m_selector_cb_file_ofs > data_size) || (pHeader->m_tables_file_ofs > data_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (1)\n"); + return false; + } - if (pHeader->m_selector_cb_file_size > (data_size - pHeader->m_selector_cb_file_ofs)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n"); - return false; - } + if (pHeader->m_endpoint_cb_file_size > (data_size - pHeader->m_endpoint_cb_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (2)\n"); + return false; + } - if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n"); - return false; - } + if (pHeader->m_selector_cb_file_size > (data_size - pHeader->m_selector_cb_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n"); + return false; + } - if (!m_lowlevel_decoder.decode_palettes( - pHeader->m_total_endpoints, pDataU8 + pHeader->m_endpoint_cb_file_ofs, pHeader->m_endpoint_cb_file_size, - pHeader->m_total_selectors, pDataU8 + pHeader->m_selector_cb_file_ofs, pHeader->m_selector_cb_file_size)) - { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_palettes failed\n"); - return false; - } + if (pHeader->m_tables_file_size > (data_size - pHeader->m_tables_file_ofs)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: file is corrupted or passed in buffer too small (3)\n"); + return false; + } - if (!m_lowlevel_decoder.decode_tables(pDataU8 + pHeader->m_tables_file_ofs, pHeader->m_tables_file_size)) + if (!m_lowlevel_etc1s_decoder.decode_palettes( + pHeader->m_total_endpoints, pDataU8 + pHeader->m_endpoint_cb_file_ofs, pHeader->m_endpoint_cb_file_size, + pHeader->m_total_selectors, pDataU8 + pHeader->m_selector_cb_file_ofs, pHeader->m_selector_cb_file_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_palettes failed\n"); + return false; + } + } + + if (!m_lowlevel_etc1s_decoder.decode_tables(pDataU8 + pHeader->m_tables_file_ofs, pHeader->m_tables_file_size)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_tables failed\n"); + return false; + } + } + else { - BASISU_DEVEL_ERROR("basisu_transcoder::start_transcoding: decode_tables failed\n"); - return false; + // Nothing special to do for UASTC. + if (m_lowlevel_etc1s_decoder.m_local_endpoints.size()) + { + m_lowlevel_etc1s_decoder.clear(); + } } + + m_ready_to_transcode = true; + + return true; + } + bool basisu_transcoder::stop_transcoding() + { + m_lowlevel_etc1s_decoder.clear(); + + m_ready_to_transcode = false; + return true; } bool basisu_transcoder::transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt, - uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels) const + uint32_t output_block_or_pixel_stride_in_bytes, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state* pState, void *pAlpha_blocks, uint32_t output_rows_in_pixels, int channel0, int channel1) const { - if (!m_lowlevel_decoder.m_endpoints.size()) + if (!m_ready_to_transcode) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: must call start_transcoding first\n"); return false; @@ -9529,16 +10631,26 @@ namespace basist BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n"); return false; } - - return m_lowlevel_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, - pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, - fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, - (decode_flags & cDecodeFlagsOutputHasAlphaIndices) != 0, pAlpha_blocks, output_rows_in_pixels); + + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + { + return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + output_rows_in_pixels, channel0, channel1, decode_flags); + } + else + { + return m_lowlevel_etc1s_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, + pDataU8 + slice_desc.m_file_ofs, slice_desc.m_file_size, + fmt, output_block_or_pixel_stride_in_bytes, (decode_flags & cDecodeFlagsBC1ForbidThreeColorBlocks) == 0, *pHeader, slice_desc, output_row_pitch_in_blocks_or_pixels, pState, + (decode_flags & cDecodeFlagsOutputHasAlphaIndices) != 0, pAlpha_blocks, output_rows_in_pixels); + } } int basisu_transcoder::find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const { - (void)data_size; + BASISU_NOTE_UNUSED(data_size); const basis_file_header* pHeader = reinterpret_cast<const basis_file_header*>(pData); const uint8_t* pDataU8 = static_cast<const uint8_t*>(pData); @@ -9576,9 +10688,16 @@ namespace basist const basis_slice_desc& slice_desc = pSlice_descs[slice_iter]; if ((slice_desc.m_image_index == image_index) && (slice_desc.m_level_index == level_index)) { - const bool slice_alpha = (slice_desc.m_flags & cSliceDescFlagsIsAlphaData) != 0; - if (slice_alpha == alpha_data) + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) + { + const bool slice_alpha = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; + if (slice_alpha == alpha_data) + return slice_iter; + } + else + { return slice_iter; + } } } @@ -9587,12 +10706,16 @@ namespace basist return -1; } - static void write_opaque_alpha_blocks( + void basisu_transcoder::write_opaque_alpha_blocks( uint32_t num_blocks_x, uint32_t num_blocks_y, - void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, block_format fmt, + void* pOutput_blocks, block_format fmt, uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels) { - BASISU_NOTE_UNUSED(output_blocks_buf_size_in_blocks_or_pixels); + // 'num_blocks_y', 'pOutput_blocks' & 'block_stride_in_bytes' unused + // when disabling BASISD_SUPPORT_ETC2_EAC_A8 *and* BASISD_SUPPORT_DXT5A + BASISU_NOTE_UNUSED(num_blocks_y); + BASISU_NOTE_UNUSED(pOutput_blocks); + BASISU_NOTE_UNUSED(block_stride_in_bytes); if (!output_row_pitch_in_blocks_or_pixels) output_row_pitch_in_blocks_or_pixels = num_blocks_x; @@ -9606,8 +10729,7 @@ namespace basist blk.m_table = 13; // Selectors are all 4's - static const uint8_t s_etc2_eac_a8_sel4[6] = { 0x92, 0x49, 0x24, 0x92, 0x49, 0x24 }; - memcpy(&blk.m_selectors, s_etc2_eac_a8_sel4, sizeof(s_etc2_eac_a8_sel4)); + memcpy(&blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); for (uint32_t y = 0; y < num_blocks_y; y++) { @@ -9648,9 +10770,9 @@ namespace basist transcoder_texture_format fmt, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, basisu_transcoder_state *pState, uint32_t output_rows_in_pixels) const { - const uint32_t bytes_per_block = basis_get_bytes_per_block(fmt); + const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(fmt); - if (!m_lowlevel_decoder.m_endpoints.size()) + if (!m_ready_to_transcode) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: must call start_transcoding() first\n"); return false; @@ -9693,37 +10815,40 @@ namespace basist fmt = transcoder_texture_format::cTFPVRTC1_4_RGB; } - if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsIsAlphaData) + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has out of order alpha slice\n"); - - // The first slice shouldn't have alpha data in a properly formed basis file - return false; - } - - if (basis_file_has_alpha_slices) - { - // The alpha data should immediately follow the color data, and have the same resolution. - if ((slice_index + 1U) >= pHeader->m_total_slices) + if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice\n"); - // basis file is missing the alpha slice - return false; - } + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has out of order alpha slice\n"); - // Basic sanity checks - if ((pSlice_descs[slice_index + 1].m_flags & cSliceDescFlagsIsAlphaData) == 0) - { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice (flag check)\n"); - // This slice should have alpha data + // The first slice shouldn't have alpha data in a properly formed basis file return false; } - if ((pSlice_descs[slice_index].m_num_blocks_x != pSlice_descs[slice_index + 1].m_num_blocks_x) || (pSlice_descs[slice_index].m_num_blocks_y != pSlice_descs[slice_index + 1].m_num_blocks_y)) + if (basis_file_has_alpha_slices) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file slice dimensions bad\n"); - // Alpha slice should have been the same res as the color slice - return false; + // The alpha data should immediately follow the color data, and have the same resolution. + if ((slice_index + 1U) >= pHeader->m_total_slices) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice\n"); + // basis file is missing the alpha slice + return false; + } + + // Basic sanity checks + if ((pSlice_descs[slice_index + 1].m_flags & cSliceDescFlagsHasAlpha) == 0) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file has missing alpha slice (flag check)\n"); + // This slice should have alpha data + return false; + } + + if ((pSlice_descs[slice_index].m_num_blocks_x != pSlice_descs[slice_index + 1].m_num_blocks_x) || (pSlice_descs[slice_index].m_num_blocks_y != pSlice_descs[slice_index + 1].m_num_blocks_y)) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: alpha basis file slice dimensions bad\n"); + // Alpha slice should have been the same res as the color slice + return false; + } } } @@ -9735,798 +10860,6745 @@ namespace basist { // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves. // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. - // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block. This is all the transcoder actually writes to memory. - memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block); + // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory. + memset(static_cast<uint8_t*>(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel); } - + + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + { + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + + // Use the container independent image transcode method. + status = m_lowlevel_uastc_decoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + } + else + { + // ETC1S + const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; + const basis_slice_desc* pAlpha_slice_desc = basis_file_has_alpha_slices ? &pSlice_descs[slice_index + 1] : nullptr; + + assert((pSlice_desc->m_flags & cSliceDescFlagsHasAlpha) == 0); + + if (pAlpha_slice_desc) + { + // Basic sanity checks + assert((pAlpha_slice_desc->m_flags & cSliceDescFlagsHasAlpha) != 0); + assert(pSlice_desc->m_num_blocks_x == pAlpha_slice_desc->m_num_blocks_x); + assert(pSlice_desc->m_num_blocks_y == pAlpha_slice_desc->m_num_blocks_y); + assert(pSlice_desc->m_level_index == pAlpha_slice_desc->m_level_index); + } + + // Use the container independent image transcode method. + status = m_lowlevel_etc1s_decoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t *)pData, data_size, pSlice_desc->m_num_blocks_x, pSlice_desc->m_num_blocks_y, pSlice_desc->m_orig_width, pSlice_desc->m_orig_height, pSlice_desc->m_level_index, + pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, + (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_ofs : 0U, (pAlpha_slice_desc != nullptr) ? (uint32_t)pAlpha_slice_desc->m_file_size : 0U, + decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); + + } // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) + + if (!status) + { + BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n"); + } + else + { + //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); + } + + return status; + } + + uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt) + { switch (fmt) { case transcoder_texture_format::cTFETC1_RGB: + case transcoder_texture_format::cTFBC1_RGB: + case transcoder_texture_format::cTFBC4_R: + case transcoder_texture_format::cTFPVRTC1_4_RGB: + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + case transcoder_texture_format::cTFETC2_EAC_R11: + return 8; + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + case transcoder_texture_format::cTFETC2_RGBA: + case transcoder_texture_format::cTFBC3_RGBA: + case transcoder_texture_format::cTFBC5_RG: + case transcoder_texture_format::cTFASTC_4x4_RGBA: + case transcoder_texture_format::cTFATC_RGBA: + case transcoder_texture_format::cTFFXT1_RGB: + case transcoder_texture_format::cTFETC2_EAC_RG11: + return 16; + case transcoder_texture_format::cTFRGBA32: + return sizeof(uint32_t); + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return sizeof(uint16_t); + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return 0; + } + + const char* basis_get_format_name(transcoder_texture_format fmt) + { + switch (fmt) { - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + case transcoder_texture_format::cTFETC1_RGB: return "ETC1_RGB"; + case transcoder_texture_format::cTFBC1_RGB: return "BC1_RGB"; + case transcoder_texture_format::cTFBC4_R: return "BC4_R"; + case transcoder_texture_format::cTFPVRTC1_4_RGB: return "PVRTC1_4_RGB"; + case transcoder_texture_format::cTFPVRTC1_4_RGBA: return "PVRTC1_4_RGBA"; + case transcoder_texture_format::cTFBC7_RGBA: return "BC7_RGBA"; + case transcoder_texture_format::cTFBC7_ALT: return "BC7_RGBA"; + case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA"; + case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA"; + case transcoder_texture_format::cTFBC5_RG: return "BC5_RG"; + case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA"; + case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB"; + case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA"; + case transcoder_texture_format::cTFRGBA32: return "RGBA32"; + case transcoder_texture_format::cTFRGB565: return "RGB565"; + case transcoder_texture_format::cTFBGR565: return "BGR565"; + case transcoder_texture_format::cTFRGBA4444: return "RGBA4444"; + case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB"; + case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB"; + case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA"; + case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11"; + case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11"; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return ""; + } - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + const char* basis_get_block_format_name(block_format fmt) + { + switch (fmt) + { + case block_format::cETC1: return "ETC1"; + case block_format::cBC1: return "BC1"; + case block_format::cPVRTC1_4_RGB: return "PVRTC1_4_RGB"; + case block_format::cPVRTC1_4_RGBA: return "PVRTC1_4_RGBA"; + case block_format::cBC7: return "BC7"; + case block_format::cETC2_RGBA: return "ETC2_RGBA"; + case block_format::cBC3: return "BC3"; + case block_format::cASTC_4x4: return "ASTC_4x4"; + case block_format::cATC_RGB: return "ATC_RGB"; + case block_format::cRGBA32: return "RGBA32"; + case block_format::cRGB565: return "RGB565"; + case block_format::cBGR565: return "BGR565"; + case block_format::cRGBA4444: return "RGBA4444"; + case block_format::cFXT1_RGB: return "FXT1_RGB"; + case block_format::cPVRTC2_4_RGB: return "PVRTC2_4_RGB"; + case block_format::cPVRTC2_4_RGBA: return "PVRTC2_4_RGBA"; + case block_format::cETC2_EAC_R11: return "ETC2_EAC_R11"; + case block_format::cETC2_EAC_RG11: return "ETC2_EAC_RG11"; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return ""; + } + + const char* basis_get_texture_type_name(basis_texture_type tex_type) + { + switch (tex_type) + { + case cBASISTexType2D: return "2D"; + case cBASISTexType2DArray: return "2D array"; + case cBASISTexTypeCubemapArray: return "cubemap array"; + case cBASISTexTypeVideoFrames: return "video"; + case cBASISTexTypeVolume: return "3D"; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_texture_type_name: Invalid tex_type\n"); + break; + } + return ""; + } + + bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFETC2_RGBA: + case transcoder_texture_format::cTFBC3_RGBA: + case transcoder_texture_format::cTFASTC_4x4_RGBA: + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + case transcoder_texture_format::cTFATC_RGBA: + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGBA4444: + return true; + default: + break; + } + return false; + } + + basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFETC1_RGB: return basisu::texture_format::cETC1; + case transcoder_texture_format::cTFBC1_RGB: return basisu::texture_format::cBC1; + case transcoder_texture_format::cTFBC4_R: return basisu::texture_format::cBC4; + case transcoder_texture_format::cTFPVRTC1_4_RGB: return basisu::texture_format::cPVRTC1_4_RGB; + case transcoder_texture_format::cTFPVRTC1_4_RGBA: return basisu::texture_format::cPVRTC1_4_RGBA; + case transcoder_texture_format::cTFBC7_RGBA: return basisu::texture_format::cBC7; + case transcoder_texture_format::cTFBC7_ALT: return basisu::texture_format::cBC7; + case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA; + case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3; + case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5; + case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4; + case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB; + case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA; + case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32; + case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565; + case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565; + case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444; + case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB; + case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA; + case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA; + case transcoder_texture_format::cTFETC2_EAC_R11: return basisu::texture_format::cETC2_R11_EAC; + case transcoder_texture_format::cTFETC2_EAC_RG11: return basisu::texture_format::cETC2_RG11_EAC; + default: + assert(0); + BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); + break; + } + return basisu::texture_format::cInvalidTextureFormat; + } + + bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type) + { + switch (tex_type) + { + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return true; + default: + break; + } + return false; + } + + bool basis_block_format_is_uncompressed(block_format blk_fmt) + { + switch (blk_fmt) + { + case block_format::cRGB32: + case block_format::cRGBA32: + case block_format::cA32: + case block_format::cRGB565: + case block_format::cBGR565: + case block_format::cRGBA4444: + case block_format::cRGBA4444_COLOR: + case block_format::cRGBA4444_ALPHA: + case block_format::cRGBA4444_COLOR_OPAQUE: + return true; + default: + break; + } + return false; + } + + uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt) + { + switch (fmt) + { + case transcoder_texture_format::cTFRGBA32: + return sizeof(uint32_t); + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return sizeof(uint16_t); + default: + break; + } + return 0; + } + + uint32_t basis_get_block_width(transcoder_texture_format tex_type) + { + switch (tex_type) + { + case transcoder_texture_format::cTFFXT1_RGB: + return 8; + default: + break; + } + return 4; + } + + uint32_t basis_get_block_height(transcoder_texture_format tex_type) + { + BASISU_NOTE_UNUSED(tex_type); + return 4; + } + + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt) + { + if (fmt == basis_tex_format::cUASTC4x4) + { +#if BASISD_SUPPORT_UASTC + switch (tex_type) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC1 failed\n"); + // These niche formats aren't currently supported for UASTC - everything else is. + case transcoder_texture_format::cTFPVRTC2_4_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFATC_RGBA: + case transcoder_texture_format::cTFFXT1_RGB: + return false; + default: + return true; } - break; +#endif } - case transcoder_texture_format::cTFBC1_RGB: + else { -#if !BASISD_SUPPORT_DXT1 - return false; + switch (tex_type) + { + // ETC1 and uncompressed are always supported. + case transcoder_texture_format::cTFETC1_RGB: + case transcoder_texture_format::cTFRGBA32: + case transcoder_texture_format::cTFRGB565: + case transcoder_texture_format::cTFBGR565: + case transcoder_texture_format::cTFRGBA4444: + return true; +#if BASISD_SUPPORT_DXT1 + case transcoder_texture_format::cTFBC1_RGB: + return true; +#endif +#if BASISD_SUPPORT_DXT5A + case transcoder_texture_format::cTFBC4_R: + case transcoder_texture_format::cTFBC5_RG: + return true; +#endif +#if BASISD_SUPPORT_DXT1 && BASISD_SUPPORT_DXT5A + case transcoder_texture_format::cTFBC3_RGBA: + return true; +#endif +#if BASISD_SUPPORT_PVRTC1 + case transcoder_texture_format::cTFPVRTC1_4_RGB: + case transcoder_texture_format::cTFPVRTC1_4_RGBA: + return true; +#endif +#if BASISD_SUPPORT_BC7_MODE5 + case transcoder_texture_format::cTFBC7_RGBA: + case transcoder_texture_format::cTFBC7_ALT: + return true; +#endif +#if BASISD_SUPPORT_ETC2_EAC_A8 + case transcoder_texture_format::cTFETC2_RGBA: + return true; +#endif +#if BASISD_SUPPORT_ASTC + case transcoder_texture_format::cTFASTC_4x4_RGBA: + return true; +#endif +#if BASISD_SUPPORT_ATC + case transcoder_texture_format::cTFATC_RGB: + case transcoder_texture_format::cTFATC_RGBA: + return true; +#endif +#if BASISD_SUPPORT_FXT1 + case transcoder_texture_format::cTFFXT1_RGB: + return true; +#endif +#if BASISD_SUPPORT_PVRTC2 + case transcoder_texture_format::cTFPVRTC2_4_RGB: + case transcoder_texture_format::cTFPVRTC2_4_RGBA: + return true; #endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; +#if BASISD_SUPPORT_ETC2_EAC_RG11 + case transcoder_texture_format::cTFETC2_EAC_R11: + case transcoder_texture_format::cTFETC2_EAC_RG11: + return true; +#endif + default: + break; + } + } - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + return false; + } + + // ------------------------------------------------------------------------------------------------------ + // UASTC + // ------------------------------------------------------------------------------------------------------ + +#if BASISD_SUPPORT_UASTC + const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] = + { + { 0, 28, false }, { 1, 20, false }, { 2, 16, true }, { 3, 29, false }, + { 4, 91, true }, { 5, 9, false }, { 6, 107, true }, { 7, 72, true }, + { 8, 149, false }, { 9, 204, true }, { 10, 50, false }, { 11, 114, true }, + { 12, 496, true }, { 13, 17, true }, { 14, 78, false }, { 15, 39, true }, + { 17, 252, true }, { 18, 828, true }, { 19, 43, false }, { 20, 156, false }, + { 21, 116, false }, { 22, 210, true }, { 23, 476, true }, { 24, 273, false }, + { 25, 684, true }, { 26, 359, false }, { 29, 246, true }, { 32, 195, true }, + { 33, 694, true }, { 52, 524, true } + }; + + const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS] = + { + { 10, 36, 4 }, { 11, 48, 4 }, { 0, 61, 3 }, { 2, 137, 4 }, + { 8, 161, 5 }, { 13, 183, 4 }, { 1, 226, 2 }, { 33, 281, 2 }, + { 40, 302, 3 }, { 20, 307, 4 }, { 21, 479, 0 }, { 58, 495, 3 }, + { 3, 593, 0 }, { 32, 594, 2 }, { 59, 605, 1 }, { 34, 799, 3 }, + { 20, 812, 1 }, { 14, 988, 4 }, { 31, 993, 3 } + }; + + const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3] = + { + { 4, 260, 0 }, { 8, 74, 5 }, { 9, 32, 5 }, { 10, 156, 2 }, + { 11, 183, 2 }, { 12, 15, 0 }, { 13, 745, 4 }, { 20, 0, 1 }, + { 35, 335, 1 }, { 36, 902, 5 }, { 57, 254, 0 } + }; + + const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 1, 2, 0 }, { 2, 0, 1 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } }; + + const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3] = { { 0, 1, 2 }, { 2, 0, 1 }, { 1, 2, 0 }, { 2, 1, 0 }, { 0, 2, 1 }, { 1, 0, 2 } }; + + uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k) + { + assert(k < 6); + switch (k >> 1) + { + case 0: + if (p <= 1) + p = 0; + else + p = 1; + break; + case 1: + if (p == 0) + p = 0; + else + p = 1; + break; + case 2: + if ((p == 0) || (p == 2)) + p = 0; + else + p = 1; + break; + } + if (k & 1) + p = 1 - p; + return p; + } + + static const uint8_t g_zero_pattern[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16] = + { + { 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, { 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1 }, { 1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0 }, { 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1 }, + { 1,1,1,1,1,1,1,0,1,1,1,0,1,1,0,0 }, { 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1 }, { 1,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,0,1,1,0,0,1,0,0,0 }, + { 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1 }, { 1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0 }, + { 1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0 }, { 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0 }, + { 1,0,0,0,1,1,1,0,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,1 }, { 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0 }, { 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0 }, + { 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0 }, { 1,1,1,1,1,1,1,1,0,1,1,1,0,0,1,1 }, { 1,0,0,0,1,1,0,0,1,1,0,0,1,1,1,0 }, { 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0 }, + { 1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1 }, { 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0 }, { 1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1 }, { 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0 }, + { 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0 }, { 1,0,0,1,0,0,1,1,0,1,1,0,1,1,0,0 } + }; + + const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16] = + { + { 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2 }, { 1,1,1,1,1,1,1,1,0,0,0,0,2,2,2,2 }, { 1,1,1,1,0,0,0,0,0,0,0,0,2,2,2,2 }, { 1,1,1,1,2,2,2,2,0,0,0,0,0,0,0,0 }, + { 1,1,2,0,1,1,2,0,1,1,2,0,1,1,2,0 }, { 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2 }, { 0,2,1,1,0,2,1,1,0,2,1,1,0,2,1,1 }, { 2,0,0,0,2,0,0,0,2,1,1,1,2,1,1,1 }, + { 2,0,1,2,2,0,1,2,2,0,1,2,2,0,1,2 }, { 1,1,1,1,0,0,0,0,2,2,2,2,1,1,1,1 }, { 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2 } + }; + + const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16] = + { + { 0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0 }, { 1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,1 }, + { 1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1 }, { 0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0 }, { 0,0,0,1,0,0,1,1,1,1,1,1,1,1,1,1 }, { 0,1,1,1,0,0,1,1,0,0,1,1,0,0,1,1 }, + { 1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0 }, { 0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0 }, { 1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0 }, + { 0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0 }, { 0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1 }, { 1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0 }, { 1,1,0,0,1,1,0,0,1,1,0,0,1,0,0,0 }, + { 1,1,1,1,1,1,1,1,1,0,0,0,1,0,0,0 }, { 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0 }, { 1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0 } + }; + + const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3] = + { + { 0, 2 }, { 0, 3 }, { 1, 0 }, { 0, 3 }, { 7, 0 }, { 0, 2 }, { 3, 0 }, { 7, 0 }, + { 0, 11 }, { 2, 0 }, { 0, 7 }, { 11, 0 }, { 3, 0 }, { 8, 0 }, { 0, 4 }, { 12, 0 }, + { 1, 0 }, { 8, 0 }, { 0, 1 }, { 0, 2 }, { 0, 4 }, { 8, 0 }, { 1, 0 }, { 0, 2 }, + { 4, 0 }, { 0, 1 }, { 4, 0 }, { 1, 0 }, { 4, 0 }, { 1, 0 } + }; + + const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3] = + { + { 0, 8, 10 }, { 8, 0, 12 }, { 4, 0, 12 }, { 8, 0, 4 }, { 3, 0, 2 }, { 0, 1, 3 }, { 0, 2, 1 }, { 1, 9, 0 }, { 1, 2, 0 }, { 4, 0, 8 }, { 0, 6, 2 } + }; + + const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3] = + { + { 0, 4 }, { 0, 2 }, { 2, 0 }, { 0, 7 }, { 8, 0 }, { 0, 1 }, { 0, 3 }, { 0, 1 }, { 2, 0 }, { 0, 1 }, { 0, 8 }, { 2, 0 }, { 0, 1 }, { 0, 7 }, { 12, 0 }, { 2, 0 }, { 9, 0 }, { 0, 2 }, { 4, 0 } + }; + + const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2] = + { + { 0x1, 4 }, + { 0x35, 6 }, + { 0x1D, 5 }, + { 0x3, 5 }, + + { 0x13, 5 }, + { 0xB, 5 }, + { 0x1B, 5 }, + { 0x7, 5 }, + + { 0x17, 5 }, + { 0xF, 5 }, + { 0x2, 3 }, + { 0x0, 2 }, + + { 0x6, 3 }, + { 0x1F, 5 }, + { 0xD, 5 }, + { 0x5, 7 }, + + { 0x15, 6 }, + { 0x25, 6 }, + { 0x9, 4 }, + { 0x45, 7 } // future expansion + }; + + // If g_uastc_mode_huff_codes[] changes this table must be updated! + static const uint8_t g_uastc_huff_modes[128] = + { + 11,0,10,3,11,15,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11, + 19,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,16,12,8,11,18,10,6,11,2,12,13,11,0,10,3,11,17,12,7,11,18,10,5,11,14,12,9,11,0,10,4,11,1,12,8,11,18,10,6,11,2,12,13 + }; + + const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES] = { 4, 2, 3, 2, 2, 3, 2, 2, 0, 2, 4, 2, 3, 1, 2, 4, 2, 2, 5 }; + const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES] = { 8, 2, 5, 2, 2, 5, 2, 2, 0, 2, 8, 2, 5, 0, 2, 8, 2, 2, 11 }; + const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES] = { 19, 20, 8, 7, 12, 20, 18, 12, 0, 8, 13, 13, 19, 20, 20, 20, 20, 20, 11 }; + const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES] = { 1, 1, 2, 3, 2, 1, 1, 2, 0, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1 }; + const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1 }; + const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES] = { 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 3 }; + const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 }; + const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; + const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1 }; + const uint8_t g_uastc_mode_cem[TOTAL_UASTC_MODES] = { 8, 8, 8, 8, 8, 8, 8, 8, 0, 12, 12, 12, 12, 12, 12, 4, 4, 4, 8 }; + const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }; + const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0 }; + const uint8_t g_uastc_mode_total_hint_bits[TOTAL_UASTC_MODES] = { 15, 15, 15, 15, 15, 15, 15, 15, 0, 23, 17, 17, 17, 23, 23, 23, 23, 23, 15 }; + + // bits, trits, quints + const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3] = + { + { 1, 0, 0 }, // 0-1 0 + { 0, 1, 0 }, // 0-2 1 + { 2, 0, 0 }, // 0-3 2 + { 0, 0, 1 }, // 0-4 3 + + { 1, 1, 0 }, // 0-5 4 + { 3, 0, 0 }, // 0-7 5 + { 1, 0, 1 }, // 0-9 6 + { 2, 1, 0 }, // 0-11 7 + + { 4, 0, 0 }, // 0-15 8 + { 2, 0, 1 }, // 0-19 9 + { 3, 1, 0 }, // 0-23 10 + { 5, 0, 0 }, // 0-31 11 + + { 3, 0, 1 }, // 0-39 12 + { 4, 1, 0 }, // 0-47 13 + { 6, 0, 0 }, // 0-63 14 + { 4, 0, 1 }, // 0-79 15 + + { 5, 1, 0 }, // 0-95 16 + { 7, 0, 0 }, // 0-127 17 + { 5, 0, 1 }, // 0-159 18 + { 6, 1, 0 }, // 0-191 19 + + { 8, 0, 0 }, // 0-255 20 + }; + + int astc_get_levels(int range) + { + assert(range < (int)BC7ENC_TOTAL_ASTC_RANGES); + return (1 + 2 * g_astc_bise_range_table[range][1] + 4 * g_astc_bise_range_table[range][2]) << g_astc_bise_range_table[range][0]; + } + + // g_astc_unquant[] is the inverse of g_astc_sorted_order_unquant[] + astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index] + + // Taken right from the ASTC spec. + static struct + { + const char* m_pB_str; + uint32_t m_c; + } g_astc_endpoint_unquant_params[BC7ENC_TOTAL_ASTC_RANGES] = + { + { "", 0 }, + { "", 0 }, + { "", 0 }, + { "", 0 }, + { "000000000", 204, }, // 0-5 + { "", 0 }, + { "000000000", 113, }, // 0-9 + { "b000b0bb0", 93 }, // 0-11 + { "", 0 }, + { "b0000bb00", 54 }, // 0-19 + { "cb000cbcb", 44 }, // 0-23 + { "", 0 }, + { "cb0000cbc", 26 }, // 0-39 + { "dcb000dcb", 22 }, // 0-47 + { "", 0 }, + { "dcb0000dc", 13 }, // 0-79 + { "edcb000ed", 11 }, // 0-95 + { "", 0 }, + { "edcb0000e", 6 }, // 0-159 + { "fedcb000f", 5 }, // 0-191 + { "", 0 }, + }; + + bool astc_is_valid_endpoint_range(uint32_t range) + { + if ((g_astc_bise_range_table[range][1] == 0) && (g_astc_bise_range_table[range][2] == 0)) + return true; + + return g_astc_endpoint_unquant_params[range].m_c != 0; + } + + uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range) + { + assert(range < BC7ENC_TOTAL_ASTC_RANGES); + + const uint32_t bits = g_astc_bise_range_table[range][0]; + const uint32_t trits = g_astc_bise_range_table[range][1]; + const uint32_t quints = g_astc_bise_range_table[range][2]; + + uint32_t val = 0; + if ((!trits) && (!quints)) + { + assert(!packed_trits && !packed_quints); + + int bits_left = 8; + while (bits_left > 0) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC1 failed\n"); + uint32_t v = packed_bits; + + int n = basisu::minimumi(bits_left, bits); + if (n < (int)bits) + v >>= (bits - n); + + assert(v < (1U << n)); + + val |= (v << (bits_left - n)); + bits_left -= n; } - break; } - case transcoder_texture_format::cTFBC4_R: + else { -#if !BASISD_SUPPORT_DXT5A - return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + const uint32_t A = (packed_bits & 1) ? 511 : 0; + const uint32_t C = g_astc_endpoint_unquant_params[range].m_c; + const uint32_t D = trits ? packed_trits : packed_quints; - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + assert(C); + + uint32_t B = 0; + for (uint32_t i = 0; i < 9; i++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC4 failed\n"); + B <<= 1; + + char c = g_astc_endpoint_unquant_params[range].m_pB_str[i]; + if (c != '0') + { + c -= 'a'; + B |= ((packed_bits >> c) & 1); + } } - break; + + val = D * C + B; + val = val ^ A; + val = (A & 0x80) | (val >> 2); } - case transcoder_texture_format::cTFPVRTC1_4_RGB: + + return val; + } + + uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range) + { + assert(range < BC7ENC_TOTAL_ASTC_RANGES); + assert(packed_val < (uint32_t)astc_get_levels(range)); + + const uint32_t bits = g_astc_bise_range_table[range][0]; + const uint32_t trits = g_astc_bise_range_table[range][1]; + const uint32_t quints = g_astc_bise_range_table[range][2]; + + if ((!trits) && (!quints)) + return unquant_astc_endpoint(packed_val, 0, 0, range); + else if (trits) + return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), packed_val >> bits, 0, range); + else + return unquant_astc_endpoint(packed_val & ((1 << bits) - 1), 0, packed_val >> bits, range); + } + + // BC7 - Various BC7 tables/helpers + const uint32_t g_bc7_weights1[2] = { 0, 64 }; + const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; + const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; + const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + const uint32_t g_astc_weights4[16] = { 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 }; + const uint32_t g_astc_weights5[32] = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64 }; + const uint32_t g_astc_weights_3levels[3] = { 0, 32, 64 }; + + const uint8_t g_bc7_partition1[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; + + const uint8_t g_bc7_partition2[64 * 16] = + { + 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, + 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1, + 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0, + 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1, + 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0, + 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0, + 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1 + }; + + const uint8_t g_bc7_partition3[64 * 16] = + { + 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1, + 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0, + 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0, + 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1, + 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1, + 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1, + 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2, + 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0, + }; + + const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 }; + + const uint8_t g_bc7_table_anchor_index_third_subset_1[64] = + { + 3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3 + }; + + const uint8_t g_bc7_table_anchor_index_third_subset_2[64] = + { + 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8 + }; + + const uint8_t g_bc7_num_subsets[8] = { 3, 2, 3, 2, 1, 1, 1, 2 }; + const uint8_t g_bc7_partition_bits[8] = { 4, 6, 6, 6, 0, 0, 0, 6 }; + const uint8_t g_bc7_color_index_bitcount[8] = { 3, 3, 2, 2, 2, 2, 4, 2 }; + + const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 }; + const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 }; + const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 }; + const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 }; + + const uint8_t g_bc7_alpha_index_bitcount[8] = { 0, 0, 0, 0, 3, 2, 4, 2 }; + + endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit] + endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c] + + static inline void bc7_set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t* pCur_ofs) + { + assert((num_bits <= 32) && (val < (1ULL << num_bits))); + while (num_bits) { -#if !BASISD_SUPPORT_PVRTC1 - return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + const uint32_t n = basisu::minimumu(8 - (*pCur_ofs & 7), num_bits); + pBytes[*pCur_ofs >> 3] |= (uint8_t)(val << (*pCur_ofs & 7)); + val >>= n; + num_bits -= n; + *pCur_ofs += n; + } + assert(*pCur_ofs <= 128); + } - // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?) - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGB, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + // TODO: Optimize this. + void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults) + { + const uint32_t best_mode = pResults->m_mode; + + const uint32_t total_subsets = g_bc7_num_subsets[best_mode]; + const uint32_t total_partitions = 1 << g_bc7_partition_bits[best_mode]; + //const uint32_t num_rotations = 1 << g_bc7_rotation_bits[best_mode]; + //const uint32_t num_index_selectors = (best_mode == 4) ? 2 : 1; + + const uint8_t* pPartition; + if (total_subsets == 1) + pPartition = &g_bc7_partition1[0]; + else if (total_subsets == 2) + pPartition = &g_bc7_partition2[pResults->m_partition * 16]; + else + pPartition = &g_bc7_partition3[pResults->m_partition * 16]; + + uint8_t color_selectors[16]; + memcpy(color_selectors, pResults->m_selectors, 16); + + uint8_t alpha_selectors[16]; + memcpy(alpha_selectors, pResults->m_alpha_selectors, 16); + + color_quad_u8 low[3], high[3]; + memcpy(low, pResults->m_low, sizeof(low)); + memcpy(high, pResults->m_high, sizeof(high)); + + uint32_t pbits[3][2]; + memcpy(pbits, pResults->m_pbits, sizeof(pbits)); + + int anchor[3] = { -1, -1, -1 }; + + for (uint32_t k = 0; k < total_subsets; k++) + { + uint32_t anchor_index = 0; + if (k) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to PVRTC1 4 RGB failed\n"); + if ((total_subsets == 3) && (k == 1)) + anchor_index = g_bc7_table_anchor_index_third_subset_1[pResults->m_partition]; + else if ((total_subsets == 3) && (k == 2)) + anchor_index = g_bc7_table_anchor_index_third_subset_2[pResults->m_partition]; + else + anchor_index = g_bc7_table_anchor_index_second_subset[pResults->m_partition]; + } + + anchor[k] = anchor_index; + + const uint32_t color_index_bits = get_bc7_color_index_size(best_mode, pResults->m_index_selector); + const uint32_t num_color_indices = 1 << color_index_bits; + + if (color_selectors[anchor_index] & (num_color_indices >> 1)) + { + for (uint32_t i = 0; i < 16; i++) + if (pPartition[i] == k) + color_selectors[i] = (uint8_t)((num_color_indices - 1) - color_selectors[i]); + + if (get_bc7_mode_has_seperate_alpha_selectors(best_mode)) + { + for (uint32_t q = 0; q < 3; q++) + { + uint8_t t = low[k].m_c[q]; + low[k].m_c[q] = high[k].m_c[q]; + high[k].m_c[q] = t; + } + } + else + { + color_quad_u8 tmp = low[k]; + low[k] = high[k]; + high[k] = tmp; + } + + if (!g_bc7_mode_has_shared_p_bits[best_mode]) + { + uint32_t t = pbits[k][0]; + pbits[k][0] = pbits[k][1]; + pbits[k][1] = t; + } + } + + if (get_bc7_mode_has_seperate_alpha_selectors(best_mode)) + { + const uint32_t alpha_index_bits = get_bc7_alpha_index_size(best_mode, pResults->m_index_selector); + const uint32_t num_alpha_indices = 1 << alpha_index_bits; + + if (alpha_selectors[anchor_index] & (num_alpha_indices >> 1)) + { + for (uint32_t i = 0; i < 16; i++) + if (pPartition[i] == k) + alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]); + + uint8_t t = low[k].m_c[3]; + low[k].m_c[3] = high[k].m_c[3]; + high[k].m_c[3] = t; + } } - break; } - case transcoder_texture_format::cTFPVRTC1_4_RGBA: + + uint8_t* pBlock_bytes = (uint8_t*)(pBlock); + memset(pBlock_bytes, 0, BC7ENC_BLOCK_SIZE); + + uint32_t cur_bit_ofs = 0; + bc7_set_block_bits(pBlock_bytes, 1 << best_mode, best_mode + 1, &cur_bit_ofs); + + if ((best_mode == 4) || (best_mode == 5)) + bc7_set_block_bits(pBlock_bytes, pResults->m_rotation, 2, &cur_bit_ofs); + + if (best_mode == 4) + bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector, 1, &cur_bit_ofs); + + if (total_partitions > 1) + bc7_set_block_bits(pBlock_bytes, pResults->m_partition, (total_partitions == 64) ? 6 : 4, &cur_bit_ofs); + + const uint32_t total_comps = (best_mode >= 4) ? 4 : 3; + for (uint32_t comp = 0; comp < total_comps; comp++) { -#if !BASISD_SUPPORT_PVRTC1 - return false; -#endif - assert(basis_file_has_alpha_slices); + for (uint32_t subset = 0; subset < total_subsets; subset++) + { + bc7_set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); + bc7_set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); + } + } - // Temp buffer to hold alpha block endpoint/selector indices - std::vector<uint32_t> temp_block_indices(total_slice_blocks); + if (g_bc7_mode_has_p_bits[best_mode]) + { + for (uint32_t subset = 0; subset < total_subsets; subset++) + { + bc7_set_block_bits(pBlock_bytes, pbits[subset][0], 1, &cur_bit_ofs); + if (!g_bc7_mode_has_shared_p_bits[best_mode]) + bc7_set_block_bits(pBlock_bytes, pbits[subset][1], 1, &cur_bit_ofs); + } + } - // First transcode alpha data to temp buffer - status = transcode_slice(pData, data_size, slice_index + 1, &temp_block_indices[0], total_slice_blocks, block_format::cIndices, sizeof(uint32_t), decode_flags, pSlice_descs[slice_index].m_num_blocks_x, pState); - if (!status) + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to PVRTC1 4 RGBA failed (0)\n"); + int idx = x + y * 4; + + uint32_t n = pResults->m_index_selector ? get_bc7_alpha_index_size(best_mode, pResults->m_index_selector) : get_bc7_color_index_size(best_mode, pResults->m_index_selector); + + if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2])) + n--; + + bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? alpha_selectors[idx] : color_selectors[idx], n, &cur_bit_ofs); } - else + } + + if (get_bc7_mode_has_seperate_alpha_selectors(best_mode)) + { + for (uint32_t y = 0; y < 4; y++) { - // output_row_pitch_in_blocks_or_pixels is actually ignored because we're transcoding to PVRTC1. (Print a dev warning if it's != 0?) - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC1_4_RGBA, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, &temp_block_indices[0]); - if (!status) + for (uint32_t x = 0; x < 4; x++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to PVRTC1 4 RGBA failed (1)\n"); + int idx = x + y * 4; + + uint32_t n = pResults->m_index_selector ? get_bc7_color_index_size(best_mode, pResults->m_index_selector) : get_bc7_alpha_index_size(best_mode, pResults->m_index_selector); + + if ((idx == anchor[0]) || (idx == anchor[1]) || (idx == anchor[2])) + n--; + + bc7_set_block_bits(pBlock_bytes, pResults->m_index_selector ? color_selectors[idx] : alpha_selectors[idx], n, &cur_bit_ofs); } } + } - break; + assert(cur_bit_ofs == 128); + } + + // ASTC + static inline void astc_set_bits_1_to_9(uint32_t* pDst, int& bit_offset, uint32_t code, uint32_t codesize) + { + uint8_t* pBuf = reinterpret_cast<uint8_t*>(pDst); + + assert(codesize <= 9); + if (codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t val = code << byte_bit_offset; + + uint32_t index = bit_offset >> 3; + pBuf[index] |= (uint8_t)val; + + if (codesize > (8 - byte_bit_offset)) + pBuf[index + 1] |= (uint8_t)(val >> 8); + + bit_offset += codesize; } - case transcoder_texture_format::cTFBC7_M6_RGB: + } + + void pack_astc_solid_block(void* pDst_block, const color32& color) + { + uint32_t r = color[0], g = color[1], b = color[2]; + uint32_t a = color[3]; + + uint32_t* pOutput = static_cast<uint32_t*>(pDst_block); + uint8_t* pBytes = reinterpret_cast<uint8_t*>(pDst_block); + + pBytes[0] = 0xfc; pBytes[1] = 0xfd; pBytes[2] = 0xff; pBytes[3] = 0xff; + + pOutput[1] = 0xffffffff; + pOutput[2] = 0; + pOutput[3] = 0; + + int bit_pos = 64; + astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, r | (r << 8), 16); + astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, g | (g << 8), 16); + astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, b | (b << 8), 16); + astc_set_bits(reinterpret_cast<uint32_t*>(pDst_block), bit_pos, a | (a << 8), 16); + } + + // See 23.21 https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_partition_pattern_generation +#ifdef _DEBUG + static inline uint32_t astc_hash52(uint32_t v) + { + uint32_t p = v; + p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4; + p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3; + p ^= p << 6; p ^= p >> 17; + return p; + } + + int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block) + { + if (small_block) + { + x <<= 1; y <<= 1; z <<= 1; + } + seed += (partitioncount - 1) * 1024; + uint32_t rnum = astc_hash52(seed); + uint8_t seed1 = rnum & 0xF; + uint8_t seed2 = (rnum >> 4) & 0xF; + uint8_t seed3 = (rnum >> 8) & 0xF; + uint8_t seed4 = (rnum >> 12) & 0xF; + uint8_t seed5 = (rnum >> 16) & 0xF; + uint8_t seed6 = (rnum >> 20) & 0xF; + uint8_t seed7 = (rnum >> 24) & 0xF; + uint8_t seed8 = (rnum >> 28) & 0xF; + uint8_t seed9 = (rnum >> 18) & 0xF; + uint8_t seed10 = (rnum >> 22) & 0xF; + uint8_t seed11 = (rnum >> 26) & 0xF; + uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; + + seed1 *= seed1; seed2 *= seed2; + seed3 *= seed3; seed4 *= seed4; + seed5 *= seed5; seed6 *= seed6; + seed7 *= seed7; seed8 *= seed8; + seed9 *= seed9; seed10 *= seed10; + seed11 *= seed11; seed12 *= seed12; + + int sh1, sh2, sh3; + if (seed & 1) + { + sh1 = (seed & 2 ? 4 : 5); sh2 = (partitioncount == 3 ? 6 : 5); + } + else { -#if !BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - return false; + sh1 = (partitioncount == 3 ? 6 : 5); sh2 = (seed & 2 ? 4 : 5); + } + sh3 = (seed & 0x10) ? sh1 : sh2; + + seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2; + seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2; + seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3; + + int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); + int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); + int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); + int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); + + a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F; + + if (partitioncount < 4) d = 0; + if (partitioncount < 3) c = 0; + + if (a >= b && a >= c && a >= d) + return 0; + else if (b >= c && b >= d) + return 1; + else if (c >= d) + return 2; + else + return 3; + } #endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M6_OPAQUE_ONLY, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + static const uint8_t g_astc_quint_encode[125] = + { + 0, 1, 2, 3, 4, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 24, 25, 26, 27, 28, 5, 13, 21, 29, 6, 32, 33, 34, 35, 36, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 56, 57, + 58, 59, 60, 37, 45, 53, 61, 14, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 80, 81, 82, 83, 84, 88, 89, 90, 91, 92, 69, 77, 85, 93, 22, 96, 97, 98, 99, 100, 104, + 105, 106, 107, 108, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 101, 109, 117, 125, 30, 102, 103, 70, 71, 38, 110, 111, 78, 79, 46, 118, 119, 86, 87, 54, + 126, 127, 94, 95, 62, 39, 47, 55, 63, 31 + }; + + // Encodes 3 values to output, usable for any range that uses quints and bits + static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, int& bit_pos, int n) + { + // First extract the trits and the bits from the 5 input values + int quints = 0, bits[3]; + const uint32_t bit_mask = (1 << n) - 1; + for (int i = 0; i < 3; i++) + { + static const int s_muls[3] = { 1, 5, 25 }; + + const int t = pValues[i] >> n; + + quints += t * s_muls[i]; + bits[i] = pValues[i] & bit_mask; + } + + // Encode the quints, by inverting the bit manipulations done by the decoder, converting 3 quints into 7-bits. + // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-integer-sequence-encoding + + assert(quints < 125); + const int T = g_astc_quint_encode[quints]; + + // Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96. + astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) | + (bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3); + } + + // Packs values using ASTC's BISE to output buffer. + static void astc_pack_bise(uint32_t* pDst, const uint8_t* pSrc_vals, int bit_pos, int num_vals, int range) + { + uint32_t temp[5] = { 0, 0, 0, 0, 0 }; + + const int num_bits = g_astc_bise_range_table[range][0]; + + int group_size = 0; + if (g_astc_bise_range_table[range][1]) + group_size = 5; + else if (g_astc_bise_range_table[range][2]) + group_size = 3; + + if (group_size) + { + // Range has trits or quints - pack each group of 5 or 3 values + const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); + + for (int group_index = 0; group_index < total_groups; group_index++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC7 m6 opaque only failed\n"); + uint8_t vals[5] = { 0, 0, 0, 0, 0 }; + + const int limit = basisu::minimum(group_size, num_vals - group_index * group_size); + for (int i = 0; i < limit; i++) + vals[i] = pSrc_vals[group_index * group_size + i]; + + if (group_size == 5) + astc_encode_trits(temp, vals, bit_pos, num_bits); + else + astc_encode_quints(temp, vals, bit_pos, num_bits); + } + } + else + { + for (int i = 0; i < num_vals; i++) + astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits); + } + + pDst[0] |= temp[0]; pDst[1] |= temp[1]; + pDst[2] |= temp[2]; pDst[3] |= temp[3]; + } + + const uint32_t ASTC_BLOCK_MODE_BITS = 11; + const uint32_t ASTC_PART_BITS = 2; + const uint32_t ASTC_CEM_BITS = 4; + const uint32_t ASTC_PARTITION_INDEX_BITS = 10; + const uint32_t ASTC_CCS_BITS = 2; + + const uint32_t g_uastc_mode_astc_block_mode[TOTAL_UASTC_MODES] = { 0x242, 0x42, 0x53, 0x42, 0x42, 0x53, 0x442, 0x42, 0, 0x42, 0x242, 0x442, 0x53, 0x441, 0x42, 0x242, 0x42, 0x442, 0x253 }; + + bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t uastc_mode) + { + assert(uastc_mode < TOTAL_UASTC_MODES); + uint8_t* pDst_bytes = reinterpret_cast<uint8_t*>(pDst); + + const int total_weights = pBlock->m_dual_plane ? 32 : 16; + + // Set mode bits - see Table 146-147 + uint32_t mode = g_uastc_mode_astc_block_mode[uastc_mode]; + pDst_bytes[0] = (uint8_t)mode; + pDst_bytes[1] = (uint8_t)(mode >> 8); + + memset(pDst_bytes + 2, 0, 16 - 2); + + int bit_pos = ASTC_BLOCK_MODE_BITS; + + // We only support 1-5 bit weight indices + assert(!g_astc_bise_range_table[pBlock->m_weight_range][1] && !g_astc_bise_range_table[pBlock->m_weight_range][2]); + const int bits_per_weight = g_astc_bise_range_table[pBlock->m_weight_range][0]; + + // See table 143 - PART + astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_subsets - 1, ASTC_PART_BITS); + + if (pBlock->m_subsets == 1) + astc_set_bits_1_to_9(pDst, bit_pos, pBlock->m_cem, ASTC_CEM_BITS); + else + { + // See table 145 + astc_set_bits(pDst, bit_pos, pBlock->m_partition_seed, ASTC_PARTITION_INDEX_BITS); + + // Table 150 - we assume all CEM's are equal, so write 2 0's along with the CEM + astc_set_bits_1_to_9(pDst, bit_pos, (pBlock->m_cem << 2) & 63, ASTC_CEM_BITS + 2); + } + + if (pBlock->m_dual_plane) + { + const int total_weight_bits = total_weights * bits_per_weight; + + // See Illegal Encodings 23.24 + // https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.inline.html#_illegal_encodings + assert((total_weight_bits >= 24) && (total_weight_bits <= 96)); + + int ccs_bit_pos = 128 - total_weight_bits - ASTC_CCS_BITS; + astc_set_bits_1_to_9(pDst, ccs_bit_pos, pBlock->m_ccs, ASTC_CCS_BITS); + } + + const int num_cem_pairs = (1 + (pBlock->m_cem >> 2)) * pBlock->m_subsets; + assert(num_cem_pairs <= 9); + + astc_pack_bise(pDst, pBlock->m_endpoints, bit_pos, num_cem_pairs * 2, g_uastc_mode_endpoint_ranges[uastc_mode]); + + // Write the weight bits in reverse bit order. + switch (bits_per_weight) + { + case 1: + { + const uint32_t N = 1; + for (int i = 0; i < total_weights; i++) + { + const uint32_t ofs = 128 - N - i; + assert((ofs >> 3) < 16); + pDst_bytes[ofs >> 3] |= (pBlock->m_weights[i] << (ofs & 7)); } break; } - case transcoder_texture_format::cTFBC7_M5_RGBA: + case 2: { -#if !BASISD_SUPPORT_BC7_MODE5 - return false; -#else - assert(bytes_per_block == 16); + const uint32_t N = 2; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits2[4] = { 0, 2, 1, 3 }; + const uint32_t ofs = 128 - N - (i * N); + assert((ofs >> 3) < 16); + pDst_bytes[ofs >> 3] |= (s_reverse_bits2[pBlock->m_weights[i]] << (ofs & 7)); + } + break; + } + case 3: + { + const uint32_t N = 3; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits3[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; - // First transcode the color slice. The cBC7_M5_COLOR transcoder will output opaque mode 5 blocks. - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_COLOR, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + const uint32_t ofs = 128 - N - (i * N); + const uint32_t rev = s_reverse_bits3[pBlock->m_weights[i]] << (ofs & 7); - if ((status) && (basis_file_has_alpha_slices)) + uint32_t index = ofs >> 3; + assert(index < 16); + pDst_bytes[index++] |= rev & 0xFF; + if (index < 16) + pDst_bytes[index++] |= (rev >> 8); + } + break; + } + case 4: + { + const uint32_t N = 4; + for (int i = 0; i < total_weights; i++) { - // Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha. - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + static const uint8_t s_reverse_bits4[16] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; + const int ofs = 128 - N - (i * N); + assert(ofs >= 0 && (ofs >> 3) < 16); + pDst_bytes[ofs >> 3] |= (s_reverse_bits4[pBlock->m_weights[i]] << (ofs & 7)); + } + break; + } + case 5: + { + const uint32_t N = 5; + for (int i = 0; i < total_weights; i++) + { + static const uint8_t s_reverse_bits5[32] = { 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30, 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31 }; + + const uint32_t ofs = 128 - N - (i * N); + const uint32_t rev = s_reverse_bits5[pBlock->m_weights[i]] << (ofs & 7); + + uint32_t index = ofs >> 3; + assert(index < 16); + pDst_bytes[index++] |= rev & 0xFF; + if (index < 16) + pDst_bytes[index++] |= (rev >> 8); } break; -#endif } - case transcoder_texture_format::cTFETC2_RGBA: + default: + assert(0); + break; + } + + return true; + } + + const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern) + { + const uint8_t* pSubset_anchor_indices = g_zero_pattern; + pPartition_pattern = g_zero_pattern; + + if (subsets >= 2) { -#if !BASISD_SUPPORT_ETC2_EAC_A8 - return false; + if (subsets == 3) + { + pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0]; + pSubset_anchor_indices = &g_astc_bc7_pattern3_anchors[common_pattern][0]; + } + else if (mode == 7) + { + pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0]; + pSubset_anchor_indices = &g_bc7_3_astc2_patterns2_anchors[common_pattern][0]; + } + else + { + pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0]; + pSubset_anchor_indices = &g_astc_bc7_pattern2_anchors[common_pattern][0]; + } + } + + return pSubset_anchor_indices; + } + + static inline uint32_t read_bit(const uint8_t* pBuf, uint32_t& bit_offset) + { + uint32_t byte_bits = pBuf[bit_offset >> 3] >> (bit_offset & 7); + bit_offset += 1; + return byte_bits & 1; + } + + static inline uint32_t read_bits1_to_9(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 9); + if (!codesize) + return 0; + + if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS) || (bit_offset >= 112)) + { + const uint8_t* pBytes = &pBuf[bit_offset >> 3U]; + + uint32_t byte_bit_offset = bit_offset & 7U; + + uint32_t bits = pBytes[0] >> byte_bit_offset; + uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset); + + uint32_t bits_remaining = codesize - bits_read; + if (bits_remaining) + bits |= ((uint32_t)pBytes[1]) << bits_read; + + bit_offset += codesize; + + return bits & ((1U << codesize) - 1U); + } + + uint32_t byte_bit_offset = bit_offset & 7U; + const uint16_t w = *(const uint16_t *)(&pBuf[bit_offset >> 3U]); + bit_offset += codesize; + return (w >> byte_bit_offset) & ((1U << codesize) - 1U); + } + + inline uint64_t read_bits64(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 64U); + uint64_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7U; + uint32_t bits_to_read = basisu::minimum<int>(codesize - total_bits, 8U - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3U] >> byte_bit_offset; + byte_bits &= ((1U << bits_to_read) - 1U); + + bits |= ((uint64_t)(byte_bits) << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; + } + + static inline uint32_t read_bits1_to_9_fst(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) + { + assert(codesize <= 9); + if (!codesize) + return 0; + assert(bit_offset < 112); + + if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS)) + { + const uint8_t* pBytes = &pBuf[bit_offset >> 3U]; + + uint32_t byte_bit_offset = bit_offset & 7U; + + uint32_t bits = pBytes[0] >> byte_bit_offset; + uint32_t bits_read = basisu::minimum<int>(codesize, 8 - byte_bit_offset); + + uint32_t bits_remaining = codesize - bits_read; + if (bits_remaining) + bits |= ((uint32_t)pBytes[1]) << bits_read; + + bit_offset += codesize; + + return bits & ((1U << codesize) - 1U); + } + + uint32_t byte_bit_offset = bit_offset & 7U; + const uint16_t w = *(const uint16_t*)(&pBuf[bit_offset >> 3U]); + bit_offset += codesize; + return (w >> byte_bit_offset)& ((1U << codesize) - 1U); + } + + bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints) + { + //memset(&unpacked, 0, sizeof(unpacked)); + +#if 0 + uint8_t table[128]; + memset(table, 0xFF, sizeof(table)); + + { + for (uint32_t mode = 0; mode <= TOTAL_UASTC_MODES; mode++) + { + const uint32_t code = g_uastc_mode_huff_codes[mode][0]; + const uint32_t codesize = g_uastc_mode_huff_codes[mode][1]; + + table[code] = mode; + + uint32_t bits_left = 7 - codesize; + for (uint32_t i = 0; i < (1 << bits_left); i++) + table[code | (i << codesize)] = mode; + } + + for (uint32_t i = 0; i < 128; i++) + printf("%u,", table[i]); + exit(0); + } #endif - assert(bytes_per_block == 16); - if (basis_file_has_alpha_slices) + const int mode = g_uastc_huff_modes[blk.m_bytes[0] & 127]; + if (mode >= (int)TOTAL_UASTC_MODES) + return false; + + unpacked.m_mode = mode; + + uint32_t bit_ofs = g_uastc_mode_huff_codes[mode][1]; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + unpacked.m_solid_color.r = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + unpacked.m_solid_color.g = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + unpacked.m_solid_color.b = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + unpacked.m_solid_color.a = (uint8_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + + if (read_hints) { - // First decode the alpha data - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + unpacked.m_etc1_flip = false; + unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0; + unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3); + unpacked.m_etc1_inten1 = 0; + unpacked.m_etc1_selector = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2); + unpacked.m_etc1_r = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + unpacked.m_etc1_g = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + unpacked.m_etc1_b = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + unpacked.m_etc1_bias = 0; + unpacked.m_etc2_hints = 0; } + + return true; + } + + if (read_hints) + { + if (g_uastc_mode_has_bc1_hint0[mode]) + unpacked.m_bc1_hint0 = read_bit(blk.m_bytes, bit_ofs) != 0; + else + unpacked.m_bc1_hint0 = false; + + if (g_uastc_mode_has_bc1_hint1[mode]) + unpacked.m_bc1_hint1 = read_bit(blk.m_bytes, bit_ofs) != 0; else + unpacked.m_bc1_hint1 = false; + + unpacked.m_etc1_flip = read_bit(blk.m_bytes, bit_ofs) != 0; + unpacked.m_etc1_diff = read_bit(blk.m_bytes, bit_ofs) != 0; + unpacked.m_etc1_inten0 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3); + unpacked.m_etc1_inten1 = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 3); + + if (g_uastc_mode_has_etc1_bias[mode]) + unpacked.m_etc1_bias = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + else + unpacked.m_etc1_bias = 0; + + if (g_uastc_mode_has_alpha[mode]) { - write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, output_row_pitch_in_blocks_or_pixels); - status = true; + unpacked.m_etc2_hints = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 8); + //assert(unpacked.m_etc2_hints > 0); } + else + unpacked.m_etc2_hints = 0; + } + else + bit_ofs += g_uastc_mode_total_hint_bits[mode]; + + uint32_t subsets = 1; + switch (mode) + { + case 2: + case 4: + case 7: + case 9: + case 16: + unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 5); + subsets = 2; + break; + case 3: + unpacked.m_common_pattern = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 4); + subsets = 3; + break; + default: + break; + } - if (status) + uint32_t part_seed = 0; + switch (mode) + { + case 2: + case 4: + case 9: + case 16: + if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS2) + return false; + + part_seed = g_astc_bc7_common_partitions2[unpacked.m_common_pattern].m_astc; + break; + case 3: + if (unpacked.m_common_pattern >= TOTAL_ASTC_BC7_COMMON_PARTITIONS3) + return false; + + part_seed = g_astc_bc7_common_partitions3[unpacked.m_common_pattern].m_astc; + break; + case 7: + if (unpacked.m_common_pattern >= TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS) + return false; + + part_seed = g_bc7_3_astc2_common_partitions[unpacked.m_common_pattern].m_astc2; + break; + default: + break; + } + + uint32_t total_planes = 1; + switch (mode) + { + case 6: + case 11: + case 13: + unpacked.m_astc.m_ccs = (int)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, 2); + total_planes = 2; + break; + case 17: + unpacked.m_astc.m_ccs = 3; + total_planes = 2; + break; + default: + break; + } + + unpacked.m_astc.m_dual_plane = (total_planes == 2); + + unpacked.m_astc.m_subsets = subsets; + unpacked.m_astc.m_partition_seed = part_seed; + + const uint32_t total_comps = g_uastc_mode_comps[mode]; + + const uint32_t weight_bits = g_uastc_mode_weight_bits[mode]; + + unpacked.m_astc.m_weight_range = g_uastc_mode_weight_ranges[mode]; + + const uint32_t total_values = total_comps * 2 * subsets; + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + + const uint32_t cem = g_uastc_mode_cem[mode]; + unpacked.m_astc.m_cem = cem; + + const uint32_t ep_bits = g_astc_bise_range_table[endpoint_range][0]; + const uint32_t ep_trits = g_astc_bise_range_table[endpoint_range][1]; + const uint32_t ep_quints = g_astc_bise_range_table[endpoint_range][2]; + + uint32_t total_tqs = 0; + uint32_t bundle_size = 0, mul = 0; + if (ep_trits) + { + total_tqs = (total_values + 4) / 5; + bundle_size = 5; + mul = 3; + } + else if (ep_quints) + { + total_tqs = (total_values + 2) / 3; + bundle_size = 3; + mul = 5; + } + + uint32_t tq_values[8]; + for (uint32_t i = 0; i < total_tqs; i++) + { + uint32_t num_bits = ep_trits ? 8 : 7; + if (i == (total_tqs - 1)) { - // Now decode the color data - status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + uint32_t num_remaining = total_values - (total_tqs - 1) * bundle_size; + if (ep_trits) + { + switch (num_remaining) + { + case 1: num_bits = 2; break; + case 2: num_bits = 4; break; + case 3: num_bits = 5; break; + case 4: num_bits = 7; break; + default: break; + } + } + else if (ep_quints) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2 RGB failed\n"); + switch (num_remaining) + { + case 1: num_bits = 3; break; + case 2: num_bits = 5; break; + default: break; + } } } - else + + tq_values[i] = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, num_bits); + } // i + + uint32_t accum = 0; + uint32_t accum_remaining = 0; + uint32_t next_tq_index = 0; + + for (uint32_t i = 0; i < total_values; i++) + { + uint32_t value = (uint32_t)read_bits1_to_9_fst(blk.m_bytes, bit_ofs, ep_bits); + + if (total_tqs) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2 A failed\n"); + if (!accum_remaining) + { + assert(next_tq_index < total_tqs); + accum = tq_values[next_tq_index++]; + accum_remaining = bundle_size; + } + + // TODO: Optimize with tables + uint32_t v = accum % mul; + accum /= mul; + accum_remaining--; + + value |= (v << ep_bits); } - break; + + unpacked.m_astc.m_endpoints[i] = (uint8_t)value; } - case transcoder_texture_format::cTFBC3_RGBA: + + const uint8_t* pPartition_pattern; + const uint8_t* pSubset_anchor_indices = get_anchor_indices(subsets, mode, unpacked.m_common_pattern, pPartition_pattern); + +#ifdef _DEBUG + for (uint32_t i = 0; i < 16; i++) + assert(pPartition_pattern[i] == astc_compute_texel_partition(part_seed, i & 3, i >> 2, 0, subsets, true)); + + for (uint32_t subset_index = 0; subset_index < subsets; subset_index++) { -#if !BASISD_SUPPORT_DXT1 - return false; -#endif -#if !BASISD_SUPPORT_DXT5A - return false; + uint32_t anchor_index = 0; + + for (uint32_t i = 0; i < 16; i++) + { + if (pPartition_pattern[i] == subset_index) + { + anchor_index = i; + break; + } + } + + assert(pSubset_anchor_indices[subset_index] == anchor_index); + } #endif - assert(bytes_per_block == 16); - // First decode the alpha data - if (basis_file_has_alpha_slices) +#if 0 + const uint32_t total_planes_shift = total_planes - 1; + for (uint32_t i = 0; i < 16 * total_planes; i++) + { + uint32_t num_bits = weight_bits; + for (uint32_t s = 0; s < subsets; s++) { - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + if (pSubset_anchor_indices[s] == (i >> total_planes_shift)) + { + num_bits--; + break; + } } + + unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, num_bits); + } +#endif + + if (mode == 18) + { + // Mode 18 is the only mode with more than 64 weight bits. + for (uint32_t i = 0; i < 16; i++) + unpacked.m_astc.m_weights[i] = (uint8_t)read_bits1_to_9(blk.m_bytes, bit_ofs, i ? weight_bits : (weight_bits - 1)); + } + else + { + // All other modes have <= 64 weight bits. + uint64_t bits; + + // Read the weight bits + if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS)) + bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum<int>(64, 128 - (int)bit_ofs)); else { - write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); - status = true; +#ifdef __EMSCRIPTEN__ + bits = blk.m_dwords[2]; + bits |= (((uint64_t)blk.m_dwords[3]) << 32U); +#else + bits = blk.m_qwords[1]; +#endif + + if (bit_ofs >= 64U) + bits >>= (bit_ofs - 64U); + else + { + assert(bit_ofs >= 56U); + + uint32_t bits_needed = 64U - bit_ofs; + bits <<= bits_needed; + bits |= (blk.m_bytes[7] >> (8U - bits_needed)); + } } + + bit_ofs = 0; - if (status) + const uint32_t mask = (1U << weight_bits) - 1U; + const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U; + + if (total_planes == 2) { - // Now decode the color data. Forbid 3 color blocks, which aren't allowed in BC3. - status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC1, 16, decode_flags | cDecodeFlagsBC1ForbidThreeColorBlocks, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + // Dual plane modes always have a single subset, and the first 2 weights are anchors. + + unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + + for (uint32_t i = 2; i < 32; i++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC3 RGB failed\n"); + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask); + bit_ofs += weight_bits; } } else { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC3 A failed\n"); - } + if (subsets == 1) + { + // Specialize the single subset case. + if (weight_bits == 4) + { + assert(bit_ofs == 0); + + // Specialize the most common case: 4-bit weights. + unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7); + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15); + unpacked.m_astc.m_weights[2] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 1)) & 15); + unpacked.m_astc.m_weights[3] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 2)) & 15); + + unpacked.m_astc.m_weights[4] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 3)) & 15); + unpacked.m_astc.m_weights[5] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 4)) & 15); + unpacked.m_astc.m_weights[6] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 5)) & 15); + unpacked.m_astc.m_weights[7] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 6)) & 15); + + unpacked.m_astc.m_weights[8] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 7)) & 15); + unpacked.m_astc.m_weights[9] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 8)) & 15); + unpacked.m_astc.m_weights[10] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 9)) & 15); + unpacked.m_astc.m_weights[11] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 10)) & 15); + + unpacked.m_astc.m_weights[12] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 11)) & 15); + unpacked.m_astc.m_weights[13] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 12)) & 15); + unpacked.m_astc.m_weights[14] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 13)) & 15); + unpacked.m_astc.m_weights[15] = (uint8_t)((uint32_t)(bits >> (3 + 4 * 14)) & 15); + } + else + { + // First weight is always an anchor. + unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); - break; + for (uint32_t i = 1; i < 16; i++) + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask); + bit_ofs += weight_bits; + } + } + } + else + { + const uint32_t a0 = pSubset_anchor_indices[0], a1 = pSubset_anchor_indices[1], a2 = pSubset_anchor_indices[2]; + + for (uint32_t i = 0; i < 16; i++) + { + if ((i == a0) || (i == a1) || (i == a2)) + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); + bit_ofs += (weight_bits - 1); + } + else + { + unpacked.m_astc.m_weights[i] = (uint8_t)((uint32_t)(bits >> bit_ofs) & mask); + bit_ofs += weight_bits; + } + } + } + } } - case transcoder_texture_format::cTFBC5_RG: + + if ((blue_contract_check) && (total_comps >= 3)) { -#if !BASISD_SUPPORT_DXT5A - return false; -#endif - assert(bytes_per_block == 16); + // We only need to disable ASTC Blue Contraction when we'll be packing to ASTC. The other transcoders don't care. + bool invert_subset[3] = { false, false, false }; + bool any_flag = false; - // Decode the R data (actually the green channel of the color data slice in the basis file) - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (status) + for (uint32_t subset_index = 0; subset_index < subsets; subset_index++) { - if (basis_file_has_alpha_slices) + const int s0 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 0]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 2]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 4]].m_unquant; + + const int s1 = g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 1]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 3]].m_unquant + + g_astc_unquant[endpoint_range][unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + 5]].m_unquant; + + if (s1 < s0) { - // Decode the G data (actually the green channel of the alpha data slice in the basis file) - status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + for (uint32_t c = 0; c < total_comps; c++) + std::swap(unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 0], unpacked.m_astc.m_endpoints[subset_index * total_comps * 2 + c * 2 + 1]); + + invert_subset[subset_index] = true; + any_flag = true; + } + } + + if (any_flag) + { + const uint32_t weight_mask = (1 << weight_bits) - 1; + + for (uint32_t i = 0; i < 16; i++) + { + uint32_t subset = pPartition_pattern[i]; + + if (invert_subset[subset]) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC5 1 failed\n"); + unpacked.m_astc.m_weights[i * total_planes] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes]); + + if (total_planes == 2) + unpacked.m_astc.m_weights[i * total_planes + 1] = (uint8_t)(weight_mask - unpacked.m_astc.m_weights[i * total_planes + 1]); } } + } + } + + return true; + } + + static const uint32_t* g_astc_weight_tables[6] = { nullptr, g_bc7_weights1, g_bc7_weights2, g_bc7_weights3, g_astc_weights4, g_astc_weights5 }; + + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb) + { + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = solid_color; + return true; + } + + color32 endpoints[3][2]; + + const uint32_t total_subsets = g_uastc_mode_subsets[mode]; + const uint32_t total_comps = basisu::minimum<uint32_t>(4U, g_uastc_mode_comps[mode]); + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + const uint32_t total_planes = g_uastc_mode_planes[mode]; + const uint32_t weight_bits = g_uastc_mode_weight_bits[mode]; + const uint32_t weight_levels = 1 << weight_bits; + + for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++) + { + if (total_comps == 2) + { + const uint32_t ll = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 0]].m_unquant; + const uint32_t lh = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 0 * 2 + 1]].m_unquant; + + const uint32_t al = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 0]].m_unquant; + const uint32_t ah = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + 1 * 2 + 1]].m_unquant; + + endpoints[subset_index][0].set_noclamp_rgba(ll, ll, ll, al); + endpoints[subset_index][1].set_noclamp_rgba(lh, lh, lh, ah); + } + else + { + for (uint32_t comp_index = 0; comp_index < total_comps; comp_index++) + { + endpoints[subset_index][0][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 0]].m_unquant; + endpoints[subset_index][1][comp_index] = g_astc_unquant[endpoint_range][astc.m_endpoints[subset_index * total_comps * 2 + comp_index * 2 + 1]].m_unquant; + } + for (uint32_t comp_index = total_comps; comp_index < 4; comp_index++) + { + endpoints[subset_index][0][comp_index] = 255; + endpoints[subset_index][1][comp_index] = 255; + } + } + } + + color32 block_colors[3][32]; + + const uint32_t* pWeights = g_astc_weight_tables[weight_bits]; + + for (uint32_t subset_index = 0; subset_index < total_subsets; subset_index++) + { + for (uint32_t l = 0; l < weight_levels; l++) + { + if (total_comps == 2) + { + const uint8_t lc = (uint8_t)astc_interpolate(endpoints[subset_index][0][0], endpoints[subset_index][1][0], pWeights[l], srgb); + const uint8_t ac = (uint8_t)astc_interpolate(endpoints[subset_index][0][3], endpoints[subset_index][1][3], pWeights[l], srgb); + + block_colors[subset_index][l].set(lc, lc, lc, ac); + } else { - write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); - status = true; + uint32_t comp_index; + for (comp_index = 0; comp_index < total_comps; comp_index++) + block_colors[subset_index][l][comp_index] = (uint8_t)astc_interpolate(endpoints[subset_index][0][comp_index], endpoints[subset_index][1][comp_index], pWeights[l], srgb); + + for (; comp_index < 4; comp_index++) + block_colors[subset_index][l][comp_index] = 255; } } + } + + const uint8_t* pPartition_pattern = g_zero_pattern; + + if (total_subsets >= 2) + { + if (total_subsets == 3) + pPartition_pattern = &g_astc_bc7_patterns3[common_pattern][0]; + else if (mode == 7) + pPartition_pattern = &g_bc7_3_astc2_patterns2[common_pattern][0]; else + pPartition_pattern = &g_astc_bc7_patterns2[common_pattern][0]; + +#ifdef _DEBUG + for (uint32_t i = 0; i < 16; i++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC5 channel 0 failed\n"); + assert(pPartition_pattern[i] == (uint8_t)astc_compute_texel_partition(astc.m_partition_seed, i & 3, i >> 2, 0, total_subsets, true)); } - break; +#endif } - case transcoder_texture_format::cTFASTC_4x4_RGBA: + + if (total_planes == 1) { -#if !BASISD_SUPPORT_ASTC + if (total_subsets == 1) + { + for (uint32_t i = 0; i < 16; i++) + { + assert(astc.m_weights[i] < weight_levels); + pPixels[i] = block_colors[0][astc.m_weights[i]]; + } + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + assert(astc.m_weights[i] < weight_levels); + pPixels[i] = block_colors[pPartition_pattern[i]][astc.m_weights[i]]; + } + } + } + else + { + assert(total_subsets == 1); + + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t subset_index = 0; // pPartition_pattern[i]; + + const uint32_t weight_index0 = astc.m_weights[i * 2]; + const uint32_t weight_index1 = astc.m_weights[i * 2 + 1]; + + assert(weight_index0 < weight_levels && weight_index1 < weight_levels); + + color32& c = pPixels[i]; + for (uint32_t comp = 0; comp < 4; comp++) + { + if ((int)comp == astc.m_ccs) + c[comp] = block_colors[subset_index][weight_index1][comp]; + else + c[comp] = block_colors[subset_index][weight_index0][comp]; + } + } + } + + return true; + } + + bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb) + { + return unpack_uastc(unpacked_blk.m_mode, unpacked_blk.m_common_pattern, unpacked_blk.m_solid_color, unpacked_blk.m_astc, pPixels, srgb); + } + + bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb) + { + unpacked_uastc_block unpacked_blk; + + if (!unpack_uastc(blk, unpacked_blk, false, false)) return false; -#endif - assert(bytes_per_block == 16); - if (basis_file_has_alpha_slices) + return unpack_uastc(unpacked_blk, pPixels, srgb); + } + + // Determines the best shared pbits to use to encode xl/xh + static void determine_shared_pbits( + uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4], + color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2]) + { + const uint32_t total_bits = comp_bits + 1; + assert(total_bits >= 4 && total_bits <= 8); + + const int iscalep = (1 << total_bits) - 1; + const float scalep = (float)iscalep; + + float best_err = 1e+9f; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + for (uint32_t c = 0; c < 4; c++) { - // First decode the alpha data to the output (we're using the output texture as a temp buffer here). - status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow, scaledHigh; + + for (uint32_t i = 0; i < 4; i++) + { + scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits)); + scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits); + assert(scaledLow.m_c[i] <= 255); + + scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits)); + scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits); + assert(scaledHigh.m_c[i] <= 255); + } + + float err = 0; + for (uint32_t i = 0; i < total_comps; i++) + err += basisu::squaref((scaledLow.m_c[i] / 255.0f) - xl[i]) + basisu::squaref((scaledHigh.m_c[i] / 255.0f) - xh[i]); + + if (err < best_err) + { + best_err = err; + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1; + bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1; + } + } + } + } + + // Determines the best unique pbits to use to encode xl/xh + static void determine_unique_pbits( + uint32_t total_comps, uint32_t comp_bits, float xl[4], float xh[4], + color_quad_u8& bestMinColor, color_quad_u8& bestMaxColor, uint32_t best_pbits[2]) + { + const uint32_t total_bits = comp_bits + 1; + const int iscalep = (1 << total_bits) - 1; + const float scalep = (float)iscalep; + + float best_err0 = 1e+9f; + float best_err1 = 1e+9f; + + for (int p = 0; p < 2; p++) + { + color_quad_u8 xMinColor, xMaxColor; + + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + + color_quad_u8 scaledLow, scaledHigh; + for (uint32_t i = 0; i < 4; i++) + { + scaledLow.m_c[i] = (xMinColor.m_c[i] << (8 - total_bits)); + scaledLow.m_c[i] |= (scaledLow.m_c[i] >> total_bits); + assert(scaledLow.m_c[i] <= 255); + + scaledHigh.m_c[i] = (xMaxColor.m_c[i] << (8 - total_bits)); + scaledHigh.m_c[i] |= (scaledHigh.m_c[i] >> total_bits); + assert(scaledHigh.m_c[i] <= 255); + } + + float err0 = 0, err1 = 0; + for (uint32_t i = 0; i < total_comps; i++) + { + err0 += basisu::squaref(scaledLow.m_c[i] - xl[i] * 255.0f); + err1 += basisu::squaref(scaledHigh.m_c[i] - xh[i] * 255.0f); + } + + if (err0 < best_err0) + { + best_err0 = err0; + best_pbits[0] = p; + + bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1; + bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1; + bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1; + bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1; + } + + if (err1 < best_err1) + { + best_err1 = err1; + best_pbits[1] = p; + + bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1; + bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1; + bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1; + bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1; + } + } + } + + bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, true, false)) + return false; + + bool success = false; + if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_astc_solid_block(pDst, unpacked_src_blk.m_solid_color); + success = true; + } + else + { + success = pack_astc_block(static_cast<uint32_t*>(pDst), &unpacked_src_blk.m_astc, unpacked_src_blk.m_mode); + } + + return success; + } + + bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk) + { + memset(&dst_blk, 0, sizeof(dst_blk)); + + const uint32_t mode = unpacked_src_blk.m_mode; + + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + const uint32_t total_comps = g_uastc_mode_comps[mode]; + + switch (mode) + { + case 0: + case 5: + case 10: + case 12: + case 14: + case 15: + case 18: + { + // MODE 0: DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 19 (192) - BC7 MODE6 RGB + // MODE 5: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 RGB + // MODE 10 DualPlane: 0, WeightRange: 8 (16), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE6 + // MODE 12: DualPlane: 0, WeightRange : 5 (8), Subsets : 1, EndpointRange : 19 (192) - BC7 MODE6 + // MODE 14: DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE6 + // MODE 18: DualPlane: 0, WeightRange : 11 (32), Subsets : 1, CEM : 8, EndpointRange : 11 (32) - BC7 MODE6 + // MODE 15: DualPlane: 0, WeightRange : 8 (16), Subsets : 1, CEM : 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE6 + dst_blk.m_mode = 6; + + float xl[4], xh[4]; + if (total_comps == 2) + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f; + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f; + + xl[1] = xl[0]; + xh[1] = xh[0]; + + xl[2] = xl[0]; + xh[2] = xh[0]; + + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f; + } + else + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant / 255.0f; + xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant / 255.0f; + xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4]].m_unquant / 255.0f; + + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant / 255.0f; + xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant / 255.0f; + xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5]].m_unquant / 255.0f; + + if (total_comps == 4) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to failed\n"); + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7]].m_unquant / 255.0f; } else { - // Now decode the color data and transcode to ASTC. The transcoder function will read the alpha selector data from the output texture as it converts and - // transcode both the alpha and color data at the same time to ASTC. - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); + xl[3] = 1.0f; + xh[3] = 1.0f; } } + + uint32_t best_pbits[2]; + color_quad_u8 bestMinColor, bestMaxColor; + determine_unique_pbits((total_comps == 2) ? 4 : total_comps, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + dst_blk.m_low[0] = bestMinColor; + dst_blk.m_high[0] = bestMaxColor; + + if (total_comps == 3) + { + dst_blk.m_low[0].m_c[3] = 127; + dst_blk.m_high[0].m_c[3] = 127; + } + + dst_blk.m_pbits[0][0] = best_pbits[0]; + dst_blk.m_pbits[0][1] = best_pbits[1]; + + if (mode == 18) + { + const uint8_t s_bc7_5_to_4[32] = { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15 }; + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = s_bc7_5_to_4[unpacked_src_blk.m_astc.m_weights[i]]; + } + else if (mode == 14) + { + const uint8_t s_bc7_2_to_4[4] = { 0, 5, 10, 15 }; + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = s_bc7_2_to_4[unpacked_src_blk.m_astc.m_weights[i]]; + } + else if ((mode == 5) || (mode == 12)) + { + const uint8_t s_bc7_3_to_4[8] = { 0, 2, 4, 6, 9, 11, 13, 15 }; + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = s_bc7_3_to_4[unpacked_src_blk.m_astc.m_weights[i]]; + } else - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + { + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + } break; } - case transcoder_texture_format::cTFATC_RGB: + case 1: { -#if !BASISD_SUPPORT_ATC - return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + // DualPlane: 0, WeightRange : 2 (4), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE3 + // Mode 1 uses endpoint range 20 - no need to use ASTC dequant tables. + dst_blk.m_mode = 3; - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + float xl[4], xh[4]; + xl[0] = unpacked_src_blk.m_astc.m_endpoints[0] / 255.0f; + xl[1] = unpacked_src_blk.m_astc.m_endpoints[2] / 255.0f; + xl[2] = unpacked_src_blk.m_astc.m_endpoints[4] / 255.0f; + xl[3] = 1.0f; + + xh[0] = unpacked_src_blk.m_astc.m_endpoints[1] / 255.0f; + xh[1] = unpacked_src_blk.m_astc.m_endpoints[3] / 255.0f; + xh[2] = unpacked_src_blk.m_astc.m_endpoints[5] / 255.0f; + xh[3] = 1.0f; + + uint32_t best_pbits[2]; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + for (uint32_t i = 0; i < 3; i++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ATC_RGB failed\n"); + dst_blk.m_low[0].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[0].m_c[i] = bestMaxColor.m_c[i]; + dst_blk.m_low[1].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[1].m_c[i] = bestMaxColor.m_c[i]; } + dst_blk.m_pbits[0][0] = best_pbits[0]; + dst_blk.m_pbits[0][1] = best_pbits[1]; + dst_blk.m_pbits[1][0] = best_pbits[0]; + dst_blk.m_pbits[1][1] = best_pbits[1]; + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + break; } - case transcoder_texture_format::cTFATC_RGBA: + case 2: { -#if !BASISD_SUPPORT_ATC - return false; -#endif -#if !BASISD_SUPPORT_DXT5A - return false; -#endif - assert(bytes_per_block == 16); + // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 + dst_blk.m_mode = 1; + dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; - // First decode the alpha data - if (basis_file_has_alpha_slices) + const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert; + + float xl[4], xh[4]; + xl[3] = 1.0f; + xh[3] = 1.0f; + + for (uint32_t subset = 0; subset < 2; subset++) { - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + for (uint32_t i = 0; i < 3; i++) + { + uint32_t v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]; + v = (v << 4) | v; + xl[i] = v / 255.0f; + + v = unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]; + v = (v << 4) | v; + xh[i] = v / 255.0f; + } + + uint32_t best_pbits[2] = { 0, 0 }; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_shared_pbits(3, 6, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset; + + for (uint32_t i = 0; i < 3; i++) + { + dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i]; + } + + dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0]; + } // subset + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 3: + { + // DualPlane: 0, WeightRange : 2 (4), Subsets : 3, EndpointRange : 7 (12) - BC7 MODE2 + dst_blk.m_mode = 2; + dst_blk.m_partition = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_bc7; + + const uint32_t perm = g_astc_bc7_common_partitions3[unpacked_src_blk.m_common_pattern].m_astc_to_bc7_perm; + + for (uint32_t subset = 0; subset < 3; subset++) + { + for (uint32_t comp = 0; comp < 3; comp++) + { + uint32_t lo = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 0 + subset * 6]].m_unquant; + uint32_t hi = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[comp * 2 + 1 + subset * 6]].m_unquant; + + // TODO: I think this can be improved by using tables like Basis Universal does with ETC1S conversion. + lo = (lo * 31 + 127) / 255; + hi = (hi * 31 + 127) / 255; + + const uint32_t bc7_subset_index = g_astc_to_bc7_partition_index_perm_tables[perm][subset]; + + dst_blk.m_low[bc7_subset_index].m_c[comp] = (uint8_t)lo; + dst_blk.m_high[bc7_subset_index].m_c[comp] = (uint8_t)hi; + } + } + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 4: + { + // 4. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, EndpointRange: 12 (40) - BC7 MODE3 + dst_blk.m_mode = 3; + dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; + + const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert; + + float xl[4], xh[4]; + xl[3] = 1.0f; + xh[3] = 1.0f; + + for (uint32_t subset = 0; subset < 2; subset++) + { + for (uint32_t i = 0; i < 3; i++) + { + xl[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6]].m_unquant / 255.0f; + xh[i] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[i * 2 + subset * 6 + 1]].m_unquant / 255.0f; + } + + uint32_t best_pbits[2] = { 0, 0 }; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_unique_pbits(3, 7, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + const uint32_t bc7_subset_index = invert_partition ? (1 - subset) : subset; + + for (uint32_t i = 0; i < 3; i++) + { + dst_blk.m_low[bc7_subset_index].m_c[i] = bestMinColor.m_c[i]; + dst_blk.m_high[bc7_subset_index].m_c[i] = bestMaxColor.m_c[i]; + } + dst_blk.m_low[bc7_subset_index].m_c[3] = 127; + dst_blk.m_high[bc7_subset_index].m_c[3] = 127; + + dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0]; + dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1]; + + } // subset + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + + break; + } + case 6: + case 11: + case 13: + case 17: + { + // MODE 6: DualPlane: 1, WeightRange : 2 (4), Subsets : 1, EndpointRange : 18 (160) - BC7 MODE5 RGB + // MODE 11: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, EndpointRange: 13 (48) - BC7 MODE5 + // MODE 13: DualPlane: 1, WeightRange: 0 (2), Subsets : 1, EndpointRange : 20 (256) - BC7 MODE5 + // MODE 17: DualPlane: 1, WeightRange: 2 (4), Subsets: 1, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE5 + dst_blk.m_mode = 5; + dst_blk.m_rotation = (unpacked_src_blk.m_astc.m_ccs + 1) & 3; + + if (total_comps == 2) + { + assert(unpacked_src_blk.m_astc.m_ccs == 3); + + dst_blk.m_low->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0]].m_unquant * 127 + 127) / 255); + dst_blk.m_high->m_c[0] = (uint8_t)((g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1]].m_unquant * 127 + 127) / 255); + + dst_blk.m_low->m_c[1] = dst_blk.m_low->m_c[0]; + dst_blk.m_high->m_c[1] = dst_blk.m_high->m_c[0]; + + dst_blk.m_low->m_c[2] = dst_blk.m_low->m_c[0]; + dst_blk.m_high->m_c[2] = dst_blk.m_high->m_c[0]; + + dst_blk.m_low->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2]].m_unquant); + dst_blk.m_high->m_c[3] = (uint8_t)(g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3]].m_unquant); } else { - write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, output_row_pitch_in_blocks_or_pixels); - status = true; + for (uint32_t astc_comp = 0; astc_comp < 4; astc_comp++) + { + uint32_t bc7_comp = astc_comp; + // ASTC and BC7 handle dual plane component rotations differently: + // ASTC: 2nd plane separately interpolates the CCS channel. + // BC7: 2nd plane channel is swapped with alpha, 2nd plane controls alpha interpolation, then we swap alpha with the desired channel. + if (astc_comp == (uint32_t)unpacked_src_blk.m_astc.m_ccs) + bc7_comp = 3; + else if (astc_comp == 3) + bc7_comp = unpacked_src_blk.m_astc.m_ccs; + + uint32_t l = 255, h = 255; + if (astc_comp < total_comps) + { + l = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 0]].m_unquant; + h = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[astc_comp * 2 + 1]].m_unquant; + } + + if (bc7_comp < 3) + { + l = (l * 127 + 127) / 255; + h = (h * 127 + 127) / 255; + } + + dst_blk.m_low->m_c[bc7_comp] = (uint8_t)l; + dst_blk.m_high->m_c[bc7_comp] = (uint8_t)h; + } } - if (status) + if (mode == 13) { - status = transcode_slice(pData, data_size, slice_index, (uint8_t*)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cATC_RGB, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + for (uint32_t i = 0; i < 16; i++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ATC RGB failed\n"); + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2] ? 3 : 0; + dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1] ? 3 : 0; } } else { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ATC A failed\n"); + for (uint32_t i = 0; i < 16; i++) + { + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2]; + dst_blk.m_alpha_selectors[i] = unpacked_src_blk.m_astc.m_weights[i * 2 + 1]; + } } + break; } - case transcoder_texture_format::cTFPVRTC2_4_RGB: + case 7: { -#if !BASISD_SUPPORT_PVRTC2 - return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + // DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 12 (40) - BC7 MODE2 + dst_blk.m_mode = 2; + dst_blk.m_partition = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].m_bc73; - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + const uint32_t common_pattern_k = g_bc7_3_astc2_common_partitions[unpacked_src_blk.m_common_pattern].k; + + for (uint32_t bc7_part = 0; bc7_part < 3; bc7_part++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to cPVRTC2_4_RGB failed\n"); + const uint32_t astc_part = bc7_convert_partition_index_3_to_2(bc7_part, common_pattern_k); + + for (uint32_t c = 0; c < 3; c++) + { + dst_blk.m_low[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 0 + astc_part * 6]].m_unquant * 31 + 127) / 255; + dst_blk.m_high[bc7_part].m_c[c] = (g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[c * 2 + 1 + astc_part * 6]].m_unquant * 31 + 127) / 255; + } } + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; + break; } - case transcoder_texture_format::cTFPVRTC2_4_RGBA: + case UASTC_MODE_INDEX_SOLID_COLOR: { -#if !BASISD_SUPPORT_PVRTC2 - return false; -#endif - if (basis_file_has_alpha_slices) + // Void-Extent: Solid Color RGBA (BC7 MODE5 or MODE6) + const color32& solid_color = unpacked_src_blk.m_solid_color; + + uint32_t best_err0 = g_bc7_mode_6_optimal_endpoints[solid_color.r][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][0].m_error + + g_bc7_mode_6_optimal_endpoints[solid_color.b][0].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][0].m_error; + + uint32_t best_err1 = g_bc7_mode_6_optimal_endpoints[solid_color.r][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.g][1].m_error + + g_bc7_mode_6_optimal_endpoints[solid_color.b][1].m_error + g_bc7_mode_6_optimal_endpoints[solid_color.a][1].m_error; + + if (best_err0 > 0 && best_err1 > 0) { - // First decode the alpha data to the output (we're using the output texture as a temp buffer here). - status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t*)pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cIndices, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + dst_blk.m_mode = 5; + + for (uint32_t c = 0; c < 3; c++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to failed\n"); + dst_blk.m_low[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_lo; + dst_blk.m_high[0].m_c[c] = g_bc7_mode_5_optimal_endpoints[solid_color.c[c]].m_hi; } - else + + memset(dst_blk.m_selectors, BC7ENC_MODE_5_OPTIMAL_INDEX, 16); + + dst_blk.m_low[0].m_c[3] = solid_color.c[3]; + dst_blk.m_high[0].m_c[3] = solid_color.c[3]; + + //memset(dst_blk.m_alpha_selectors, 0, 16); + } + else + { + dst_blk.m_mode = 6; + + uint32_t best_p = 0; + if (best_err1 < best_err0) + best_p = 1; + + for (uint32_t c = 0; c < 4; c++) { - // Now decode the color data and transcode to PVRTC2 RGBA. - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); + dst_blk.m_low[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_lo; + dst_blk.m_high[0].m_c[c] = g_bc7_mode_6_optimal_endpoints[solid_color.c[c]][best_p].m_hi; } + + dst_blk.m_pbits[0][0] = best_p; + dst_blk.m_pbits[0][1] = best_p; + memset(dst_blk.m_selectors, BC7ENC_MODE_6_OPTIMAL_INDEX, 16); } - else - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGB, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + break; + } + case 9: + case 16: + { + // 9. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE7 + // 16. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 4 (LA Direct), EndpointRange: 20 (256) - BC7 MODE7 + + dst_blk.m_mode = 7; + dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; + + const bool invert_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_invert; + + for (uint32_t astc_subset = 0; astc_subset < 2; astc_subset++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to cPVRTC2_4_RGBA failed\n"); - } + float xl[4], xh[4]; + + if (total_comps == 2) + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 4]].m_unquant / 255.0f; + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 4]].m_unquant / 255.0f; + + xl[1] = xl[0]; + xh[1] = xh[0]; + + xl[2] = xl[0]; + xh[2] = xh[0]; + + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 4]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 4]].m_unquant / 255.0f; + } + else + { + xl[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[0 + astc_subset * 8]].m_unquant / 255.0f; + xl[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[2 + astc_subset * 8]].m_unquant / 255.0f; + xl[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[4 + astc_subset * 8]].m_unquant / 255.0f; + xl[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[6 + astc_subset * 8]].m_unquant / 255.0f; + + xh[0] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[1 + astc_subset * 8]].m_unquant / 255.0f; + xh[1] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[3 + astc_subset * 8]].m_unquant / 255.0f; + xh[2] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[5 + astc_subset * 8]].m_unquant / 255.0f; + xh[3] = g_astc_unquant[endpoint_range][unpacked_src_blk.m_astc.m_endpoints[7 + astc_subset * 8]].m_unquant / 255.0f; + } + + uint32_t best_pbits[2] = { 0, 0 }; + color_quad_u8 bestMinColor, bestMaxColor; + memset(&bestMinColor, 0, sizeof(bestMinColor)); + memset(&bestMaxColor, 0, sizeof(bestMaxColor)); + determine_unique_pbits(4, 5, xl, xh, bestMinColor, bestMaxColor, best_pbits); + + const uint32_t bc7_subset_index = invert_partition ? (1 - astc_subset) : astc_subset; + + dst_blk.m_low[bc7_subset_index] = bestMinColor; + dst_blk.m_high[bc7_subset_index] = bestMaxColor; + + dst_blk.m_pbits[bc7_subset_index][0] = best_pbits[0]; + dst_blk.m_pbits[bc7_subset_index][1] = best_pbits[1]; + } // astc_subset + + for (uint32_t i = 0; i < 16; i++) + dst_blk.m_selectors[i] = unpacked_src_blk.m_astc.m_weights[i]; break; } - case transcoder_texture_format::cTFRGBA32: + default: + return false; + } + + return true; + } + + bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false, false)) + return false; + + return transcode_uastc_to_bc7(unpacked_src_blk, dst_blk); + } + + bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst) + { + bc7_optimization_results temp; + if (!transcode_uastc_to_bc7(src_blk, temp)) + return false; + + encode_bc7_block(pDst, &temp); + return true; + } + + color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock) + { + color32 result; + + for (uint32_t c = 0; c < 3; c++) { - // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + static const int s_divs[3] = { 1, 3, 9 }; - // First decode the alpha data - if (basis_file_has_alpha_slices) - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + int delta = 0; + + switch (bias) + { + case 2: delta = subblock ? 0 : ((c == 0) ? -1 : 0); break; + case 5: delta = subblock ? 0 : ((c == 1) ? -1 : 0); break; + case 6: delta = subblock ? 0 : ((c == 2) ? -1 : 0); break; + + case 7: delta = subblock ? 0 : ((c == 0) ? 1 : 0); break; + case 11: delta = subblock ? 0 : ((c == 1) ? 1 : 0); break; + case 15: delta = subblock ? 0 : ((c == 2) ? 1 : 0); break; + + case 18: delta = subblock ? ((c == 0) ? -1 : 0) : 0; break; + case 19: delta = subblock ? ((c == 1) ? -1 : 0) : 0; break; + case 20: delta = subblock ? ((c == 2) ? -1 : 0) : 0; break; + + case 21: delta = subblock ? ((c == 0) ? 1 : 0) : 0; break; + case 24: delta = subblock ? ((c == 1) ? 1 : 0) : 0; break; + case 8: delta = subblock ? ((c == 2) ? 1 : 0) : 0; break; + + case 10: delta = -2; break; + + case 27: delta = subblock ? 0 : -1; break; + case 28: delta = subblock ? -1 : 1; break; + case 29: delta = subblock ? 1 : 0; break; + case 30: delta = subblock ? -1 : 0; break; + case 31: delta = subblock ? 0 : 1; break; + + default: + delta = ((bias / s_divs[c]) % 3) - 1; + break; + } + + int v = block_color[c]; + if (v == 0) + { + if (delta == -2) + v += 3; + else + v += delta + 1; + } + else if (v == (int)limit) + { + v += (delta - 1); + } else - status = true; + { + v += delta; + if ((v < 0) || (v > (int)limit)) + v = (v - delta) - delta; + } - if (status) + assert(v >= 0); + assert(v <= (int)limit); + + result[c] = (uint8_t)v; + } + + return result; + } + + static void etc1_determine_selectors(decoder_etc_block& dst_blk, const color32* pSource_pixels, uint32_t first_subblock, uint32_t last_subblock) + { + static const uint8_t s_tran[4] = { 1, 0, 2, 3 }; + + uint16_t l_bitmask = 0; + uint16_t h_bitmask = 0; + + for (uint32_t subblock = first_subblock; subblock < last_subblock; subblock++) + { + color32 block_colors[4]; + dst_blk.get_block_colors(block_colors, subblock); + + uint32_t block_y[4]; + for (uint32_t i = 0; i < 4; i++) + block_y[i] = block_colors[i][0] * 54 + block_colors[i][1] * 183 + block_colors[i][2] * 19; + + const uint32_t block_y01 = block_y[0] + block_y[1]; + const uint32_t block_y12 = block_y[1] + block_y[2]; + const uint32_t block_y23 = block_y[2] + block_y[3]; + + // X0 X0 X0 X0 X1 X1 X1 X1 X2 X2 X2 X2 X3 X3 X3 X3 + // Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 Y0 Y1 Y2 Y3 + + if (dst_blk.get_flip_bit()) { - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGB32 : block_format::cRGBA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); - if (!status) + uint32_t ofs = subblock * 2; + + for (uint32_t y = 0; y < 2; y++) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to RGBA32 RGB failed\n"); + for (uint32_t x = 0; x < 4; x++) + { + const color32& c = pSource_pixels[x + (subblock * 2 + y) * 4]; + const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38; + + uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; } } else { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to RGBA32 A failed\n"); + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + const color32& c = pSource_pixels[subblock * 2 + x + y * 4]; + const uint32_t l = c[0] * 108 + c[1] * 366 + c[2] * 38; + + uint32_t t = s_tran[(l < block_y01) + (l < block_y12) + (l < block_y23)]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } } + } - break; + dst_blk.m_bytes[7] = (uint8_t)(l_bitmask); + dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8); + dst_blk.m_bytes[5] = (uint8_t)(h_bitmask); + dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8); + } + + static const uint8_t s_etc1_solid_selectors[4][4] = { { 255, 255, 255, 255 }, { 255, 255, 0, 0 }, { 0, 0, 0, 0 }, {0, 0, 255, 255 } }; + + struct etc_coord2 + { + uint8_t m_x, m_y; + }; + + // [flip][subblock][pixel_index] + const etc_coord2 g_etc1_pixel_coords[2][2][8] = + { + { + { + { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + }, + { + { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + } + }, + { + { + { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + }, + { + { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + }, } - case transcoder_texture_format::cTFRGB565: - case transcoder_texture_format::cTFBGR565: + }; + + void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst) + { + decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst); + + if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) { - // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + dst_blk.m_bytes[3] = (uint8_t)((unpacked_src_blk.m_etc1_diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten0 << 2)); - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, (fmt == transcoder_texture_format::cTFRGB565) ? block_format::cRGB565 : block_format::cBGR565, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); - if (!status) + if (unpacked_src_blk.m_etc1_diff) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to RGB565 RGB failed\n"); + dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r << 3); + dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g << 3); + dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b << 3); + } + else + { + dst_blk.m_bytes[0] = (uint8_t)(unpacked_src_blk.m_etc1_r | (unpacked_src_blk.m_etc1_r << 4)); + dst_blk.m_bytes[1] = (uint8_t)(unpacked_src_blk.m_etc1_g | (unpacked_src_blk.m_etc1_g << 4)); + dst_blk.m_bytes[2] = (uint8_t)(unpacked_src_blk.m_etc1_b | (unpacked_src_blk.m_etc1_b << 4)); } - break; + memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[unpacked_src_blk.m_etc1_selector][0], 4); + + return; } - case transcoder_texture_format::cTFRGBA4444: + + const bool flip = unpacked_src_blk.m_etc1_flip != 0; + const bool diff = unpacked_src_blk.m_etc1_diff != 0; + + dst_blk.m_bytes[3] = (uint8_t)((int)flip | (diff << 1) | (unpacked_src_blk.m_etc1_inten0 << 5) | (unpacked_src_blk.m_etc1_inten1 << 2)); + + const uint32_t limit = diff ? 31 : 15; + + color32 block_colors[2]; + + for (uint32_t subset = 0; subset < 2; subset++) { - // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. + uint32_t avg_color[3]; + memset(avg_color, 0, sizeof(avg_color)); - // First decode the alpha data - if (basis_file_has_alpha_slices) - status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); + for (uint32_t j = 0; j < 8; j++) + { + const etc_coord2& c = g_etc1_pixel_coords[flip][subset][j]; + + avg_color[0] += block_pixels[c.m_y][c.m_x].r; + avg_color[1] += block_pixels[c.m_y][c.m_x].g; + avg_color[2] += block_pixels[c.m_y][c.m_x].b; + } // j + + block_colors[subset][0] = (uint8_t)((avg_color[0] * limit + 1020) / (8 * 255)); + block_colors[subset][1] = (uint8_t)((avg_color[1] * limit + 1020) / (8 * 255)); + block_colors[subset][2] = (uint8_t)((avg_color[2] * limit + 1020) / (8 * 255)); + block_colors[subset][3] = 0; + + if (g_uastc_mode_has_etc1_bias[unpacked_src_blk.m_mode]) + { + block_colors[subset] = apply_etc1_bias(block_colors[subset], unpacked_src_blk.m_etc1_bias, limit, subset); + } + + } // subset + + if (diff) + { + int dr = block_colors[1].r - block_colors[0].r; + int dg = block_colors[1].g - block_colors[0].g; + int db = block_colors[1].b - block_colors[0].b; + + dr = basisu::clamp<int>(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + dg = basisu::clamp<int>(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + db = basisu::clamp<int>(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + + if (dr < 0) dr += 8; + if (dg < 0) dg += 8; + if (db < 0) db += 8; + + dst_blk.m_bytes[0] = (uint8_t)((block_colors[0].r << 3) | dr); + dst_blk.m_bytes[1] = (uint8_t)((block_colors[0].g << 3) | dg); + dst_blk.m_bytes[2] = (uint8_t)((block_colors[0].b << 3) | db); + } + else + { + dst_blk.m_bytes[0] = (uint8_t)(block_colors[1].r | (block_colors[0].r << 4)); + dst_blk.m_bytes[1] = (uint8_t)(block_colors[1].g | (block_colors[0].g << 4)); + dst_blk.m_bytes[2] = (uint8_t)(block_colors[1].b | (block_colors[0].b << 4)); + } + + etc1_determine_selectors(dst_blk, &block_pixels[0][0], 0, 2); + } + + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + color32 block_pixels[4][4]; + if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR) + { + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + } + + transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, pDst); + + return true; + } + + static inline int gray_distance2(const uint8_t c, int y) + { + int gray_dist = (int)c - y; + return gray_dist * gray_dist; + } + + static bool pack_etc1_y_estimate_flipped(const uint8_t* pSrc_pixels, + int& upper_avg, int& lower_avg, int& left_avg, int& right_avg) + { + int sums[2][2]; + +#define GET_XY(x, y) pSrc_pixels[(x) + ((y) * 4)] + + sums[0][0] = GET_XY(0, 0) + GET_XY(0, 1) + GET_XY(1, 0) + GET_XY(1, 1); + sums[1][0] = GET_XY(2, 0) + GET_XY(2, 1) + GET_XY(3, 0) + GET_XY(3, 1); + sums[0][1] = GET_XY(0, 2) + GET_XY(0, 3) + GET_XY(1, 2) + GET_XY(1, 3); + sums[1][1] = GET_XY(2, 2) + GET_XY(2, 3) + GET_XY(3, 2) + GET_XY(3, 3); + + upper_avg = (sums[0][0] + sums[1][0] + 4) / 8; + lower_avg = (sums[0][1] + sums[1][1] + 4) / 8; + left_avg = (sums[0][0] + sums[0][1] + 4) / 8; + right_avg = (sums[1][0] + sums[1][1] + 4) / 8; + +#undef GET_XY +#define GET_XY(x, y, a) gray_distance2(pSrc_pixels[(x) + ((y) * 4)], a) + + int upper_gray_dist = 0, lower_gray_dist = 0, left_gray_dist = 0, right_gray_dist = 0; + for (uint32_t i = 0; i < 4; i++) + { + for (uint32_t j = 0; j < 2; j++) + { + upper_gray_dist += GET_XY(i, j, upper_avg); + lower_gray_dist += GET_XY(i, 2 + j, lower_avg); + left_gray_dist += GET_XY(j, i, left_avg); + right_gray_dist += GET_XY(2 + j, i, right_avg); + } + } + +#undef GET_XY + + int upper_lower_sum = upper_gray_dist + lower_gray_dist; + int left_right_sum = left_gray_dist + right_gray_dist; + + return upper_lower_sum < left_right_sum; + } + + // Base Sel Table + // XXXXX XX XXX + static const uint16_t g_etc1_y_solid_block_configs[256] = + { + 0,781,64,161,260,192,33,131,96,320,65,162,261,193,34,291,97,224,66,163,262,194,35,549,98,4,67,653,164,195,523,36,99,5,578,68,165,353,196,37,135,100,324,69,166,354,197,38,295,101,228,70,167, + 355,198,39,553,102,8,71,608,168,199,527,40,103,9,582,72,169,357,200,41,139,104,328,73,170,358,201,42,299,105,232,74,171,359,202,43,557,106,12,75,612,172,203,531,44,107,13,586,76,173,361, + 204,45,143,108,332,77,174,362,205,46,303,109,236,78,175,363,206,47,561,110,16,79,616,176,207,535,48,111,17,590,80,177,365,208,49,147,112,336,81,178,366,209,50,307,113,240,82,179,367,210, + 51,565,114,20,83,620,180,211,539,52,115,21,594,84,181,369,212,53,151,116,340,85,182,370,213,54,311,117,244,86,183,371,214,55,569,118,24,87,624,184,215,543,56,119,25,598,88,185,373,216,57, + 155,120,344,89,186,374,217,58,315,121,248,90,187,375,218,59,573,122,28,91,628,188,219,754,60,123,29,602,92,189,377,220,61,159,124,348,93,190,378,221,62,319,125,252,94,191,379,222,63,882,126 + }; + + // individual + // table base sel0 sel1 sel2 sel3 + static const uint16_t g_etc1_y_solid_block_4i_configs[256] = + { + 0xA000,0xA800,0x540B,0xAA01,0xAA01,0xFE00,0xFF00,0xFF00,0x8,0x5515,0x5509,0x5509,0xAA03,0x5508,0x5508,0x9508,0xA508,0xA908,0xAA08,0x5513,0xAA09,0xAA09,0xAA05,0xFF08,0xFF08,0x10,0x551D,0x5511,0x5511, + 0xAA0B,0x5510,0x5510,0x9510,0xA510,0xA910,0xAA10,0x551B,0xAA11,0xAA11,0xAA0D,0xFF10,0xFF10,0x18,0x5525,0x5519,0x5519,0xAA13,0x5518,0x5518,0x9518,0xA518,0xA918,0xAA18,0x5523,0xAA19,0xAA19,0xAA15, + 0xFF18,0xFF18,0x20,0x552D,0x5521,0x5521,0xAA1B,0x5520,0x5520,0x9520,0xA520,0xA920,0xAA20,0x552B,0xAA21,0xAA21,0xAA1D,0xFF20,0xFF20,0x28,0x5535,0x5529,0x5529,0xAA23,0x5528,0x5528,0x9528,0xA528,0xA928, + 0xAA28,0x5533,0xAA29,0xAA29,0xAA25,0xFF28,0xFF28,0x30,0x553D,0x5531,0x5531,0xAA2B,0x5530,0x5530,0x9530,0xA530,0xA930,0xAA30,0x553B,0xAA31,0xAA31,0xAA2D,0xFF30,0xFF30,0x38,0x5545,0x5539,0x5539,0xAA33, + 0x5538,0x5538,0x9538,0xA538,0xA938,0xAA38,0x5543,0xAA39,0xAA39,0xAA35,0xFF38,0xFF38,0x40,0x554D,0x5541,0x5541,0xAA3B,0x5540,0x5540,0x9540,0xA540,0xA940,0xAA40,0x554B,0xAA41,0xAA41,0xAA3D,0xFF40,0xFF40, + 0x48,0x5555,0x5549,0x5549,0xAA43,0x5548,0x5548,0x9548,0xA548,0xA948,0xAA48,0x5553,0xAA49,0xAA49,0xAA45,0xFF48,0xFF48,0x50,0x555D,0x5551,0x5551,0xAA4B,0x5550,0x5550,0x9550,0xA550,0xA950,0xAA50,0x555B, + 0xAA51,0xAA51,0xAA4D,0xFF50,0xFF50,0x58,0x5565,0x5559,0x5559,0xAA53,0x5558,0x5558,0x9558,0xA558,0xA958,0xAA58,0x5563,0xAA59,0xAA59,0xAA55,0xFF58,0xFF58,0x60,0x556D,0x5561,0x5561,0xAA5B,0x5560,0x5560, + 0x9560,0xA560,0xA960,0xAA60,0x556B,0xAA61,0xAA61,0xAA5D,0xFF60,0xFF60,0x68,0x5575,0x5569,0x5569,0xAA63,0x5568,0x5568,0x9568,0xA568,0xA968,0xAA68,0x5573,0xAA69,0xAA69,0xAA65,0xFF68,0xFF68,0x70,0x557D, + 0x5571,0x5571,0xAA6B,0x5570,0x5570,0x9570,0xA570,0xA970,0xAA70,0x557B,0xAA71,0xAA71,0xAA6D,0xFF70,0xFF70,0x78,0x78,0x5579,0x5579,0xAA73,0x5578,0x9578,0x2578,0xE6E,0x278 + }; + + static const uint16_t g_etc1_y_solid_block_2i_configs[256] = + { + 0x416,0x800,0xA00,0x50B,0xA01,0xA01,0xF00,0xF00,0xF00,0x8,0x515,0x509,0x509,0xA03,0x508,0x508,0xF01,0xF01,0xA08,0xA08,0x513,0xA09,0xA09,0xA05,0xF08,0xF08,0x10,0x51D,0x511,0x511,0xA0B,0x510,0x510,0xF09, + 0xF09,0xA10,0xA10,0x51B,0xA11,0xA11,0xA0D,0xF10,0xF10,0x18,0x525,0x519,0x519,0xA13,0x518,0x518,0xF11,0xF11,0xA18,0xA18,0x523,0xA19,0xA19,0xA15,0xF18,0xF18,0x20,0x52D,0x521,0x521,0xA1B,0x520,0x520,0xF19, + 0xF19,0xA20,0xA20,0x52B,0xA21,0xA21,0xA1D,0xF20,0xF20,0x28,0x535,0x529,0x529,0xA23,0x528,0x528,0xF21,0xF21,0xA28,0xA28,0x533,0xA29,0xA29,0xA25,0xF28,0xF28,0x30,0x53D,0x531,0x531,0xA2B,0x530,0x530,0xF29, + 0xF29,0xA30,0xA30,0x53B,0xA31,0xA31,0xA2D,0xF30,0xF30,0x38,0x545,0x539,0x539,0xA33,0x538,0x538,0xF31,0xF31,0xA38,0xA38,0x543,0xA39,0xA39,0xA35,0xF38,0xF38,0x40,0x54D,0x541,0x541,0xA3B,0x540,0x540,0xF39, + 0xF39,0xA40,0xA40,0x54B,0xA41,0xA41,0xA3D,0xF40,0xF40,0x48,0x555,0x549,0x549,0xA43,0x548,0x548,0xF41,0xF41,0xA48,0xA48,0x553,0xA49,0xA49,0xA45,0xF48,0xF48,0x50,0x55D,0x551,0x551,0xA4B,0x550,0x550,0xF49, + 0xF49,0xA50,0xA50,0x55B,0xA51,0xA51,0xA4D,0xF50,0xF50,0x58,0x565,0x559,0x559,0xA53,0x558,0x558,0xF51,0xF51,0xA58,0xA58,0x563,0xA59,0xA59,0xA55,0xF58,0xF58,0x60,0x56D,0x561,0x561,0xA5B,0x560,0x560,0xF59, + 0xF59,0xA60,0xA60,0x56B,0xA61,0xA61,0xA5D,0xF60,0xF60,0x68,0x575,0x569,0x569,0xA63,0x568,0x568,0xF61,0xF61,0xA68,0xA68,0x573,0xA69,0xA69,0xA65,0xF68,0xF68,0x70,0x57D,0x571,0x571,0xA6B,0x570,0x570,0xF69, + 0xF69,0xA70,0xA70,0x57B,0xA71,0xA71,0xA6D,0xF70,0xF70,0x78,0x78,0x579,0x579,0xA73,0x578,0x578,0xE6E,0x278 + }; + + static const uint16_t g_etc1_y_solid_block_1i_configs[256] = + { + 0x0,0x116,0x200,0x200,0x10B,0x201,0x201,0x300,0x300,0x8,0x115,0x109,0x109,0x203,0x108,0x108,0x114,0x301,0x204,0x208,0x208,0x113,0x209,0x209,0x205,0x308,0x10,0x11D,0x111,0x111,0x20B,0x110,0x110,0x11C,0x309, + 0x20C,0x210,0x210,0x11B,0x211,0x211,0x20D,0x310,0x18,0x125,0x119,0x119,0x213,0x118,0x118,0x124,0x311,0x214,0x218,0x218,0x123,0x219,0x219,0x215,0x318,0x20,0x12D,0x121,0x121,0x21B,0x120,0x120,0x12C,0x319,0x21C, + 0x220,0x220,0x12B,0x221,0x221,0x21D,0x320,0x28,0x135,0x129,0x129,0x223,0x128,0x128,0x134,0x321,0x224,0x228,0x228,0x133,0x229,0x229,0x225,0x328,0x30,0x13D,0x131,0x131,0x22B,0x130,0x130,0x13C,0x329,0x22C,0x230, + 0x230,0x13B,0x231,0x231,0x22D,0x330,0x38,0x145,0x139,0x139,0x233,0x138,0x138,0x144,0x331,0x234,0x238,0x238,0x143,0x239,0x239,0x235,0x338,0x40,0x14D,0x141,0x141,0x23B,0x140,0x140,0x14C,0x339,0x23C,0x240,0x240, + 0x14B,0x241,0x241,0x23D,0x340,0x48,0x155,0x149,0x149,0x243,0x148,0x148,0x154,0x341,0x244,0x248,0x248,0x153,0x249,0x249,0x245,0x348,0x50,0x15D,0x151,0x151,0x24B,0x150,0x150,0x15C,0x349,0x24C,0x250,0x250,0x15B, + 0x251,0x251,0x24D,0x350,0x58,0x165,0x159,0x159,0x253,0x158,0x158,0x164,0x351,0x254,0x258,0x258,0x163,0x259,0x259,0x255,0x358,0x60,0x16D,0x161,0x161,0x25B,0x160,0x160,0x16C,0x359,0x25C,0x260,0x260,0x16B,0x261, + 0x261,0x25D,0x360,0x68,0x175,0x169,0x169,0x263,0x168,0x168,0x174,0x361,0x264,0x268,0x268,0x173,0x269,0x269,0x265,0x368,0x70,0x17D,0x171,0x171,0x26B,0x170,0x170,0x17C,0x369,0x26C,0x270,0x270,0x17B,0x271,0x271, + 0x26D,0x370,0x78,0x78,0x179,0x179,0x273,0x178,0x178,0x26E,0x278 + }; + + // We don't have any useful hints to accelerate single channel ETC1, so we need to real-time encode from scratch. + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + +#if 0 + for (uint32_t individ = 0; individ < 2; individ++) + { + uint32_t overall_error = 0; + + for (uint32_t c = 0; c < 256; c++) + { + uint32_t best_err = UINT32_MAX; + uint32_t best_individ = 0; + uint32_t best_base = 0; + uint32_t best_sels[4] = { 0,0,0,0 }; + uint32_t best_table = 0; + + const uint32_t limit = individ ? 16 : 32; + + for (uint32_t table = 0; table < 8; table++) + { + for (uint32_t base = 0; base < limit; base++) + { + uint32_t total_e = 0; + uint32_t sels[4] = { 0,0,0,0 }; + + const uint32_t N = 4; + for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++) + { + uint32_t best_sel_e = UINT32_MAX; + uint32_t best_sel = 0; + + for (uint32_t sel = 0; sel < 4; sel++) + { + int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2)); + val = clamp255(val + g_etc1_inten_tables[table][sel]); + + int e = iabs(val - clamp255(c + i)); + if (e < best_sel_e) + { + best_sel_e = e; + best_sel = sel; + } + + } // sel + + sels[i] = best_sel; + total_e += best_sel_e * best_sel_e; + + } // i + + if (total_e < best_err) + { + best_err = total_e; + best_individ = individ; + best_base = base; + memcpy(best_sels, sels, sizeof(best_sels)); + best_table = table; + } + + } // base + } // table + + //printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]); + + uint32_t encoded = best_table | (best_base << 3) | + (best_sels[0] << 8) | + (best_sels[1] << 10) | + (best_sels[2] << 12) | + (best_sels[3] << 14); + + printf("0x%X,", encoded); + + overall_error += best_err; + } // c + + printf("\n"); + printf("Overall error: %u\n", overall_error); + + } // individ + + exit(0); +#endif + +#if 0 + for (uint32_t individ = 0; individ < 2; individ++) + { + uint32_t overall_error = 0; + + for (uint32_t c = 0; c < 256; c++) + { + uint32_t best_err = UINT32_MAX; + uint32_t best_individ = 0; + uint32_t best_base = 0; + uint32_t best_sels[4] = { 0,0,0,0 }; + uint32_t best_table = 0; + + const uint32_t limit = individ ? 16 : 32; + + for (uint32_t table = 0; table < 8; table++) + { + for (uint32_t base = 0; base < limit; base++) + { + uint32_t total_e = 0; + uint32_t sels[4] = { 0,0,0,0 }; + + const uint32_t N = 1; + for (uint32_t i = 0; i < basisu::minimum<uint32_t>(N, (256 - c)); i++) + { + uint32_t best_sel_e = UINT32_MAX; + uint32_t best_sel = 0; + + for (uint32_t sel = 0; sel < 4; sel++) + { + int val = individ ? ((base << 4) | base) : ((base << 3) | (base >> 2)); + val = clamp255(val + g_etc1_inten_tables[table][sel]); + + int e = iabs(val - clamp255(c + i)); + if (e < best_sel_e) + { + best_sel_e = e; + best_sel = sel; + } + + } // sel + + sels[i] = best_sel; + total_e += best_sel_e * best_sel_e; + + } // i + + if (total_e < best_err) + { + best_err = total_e; + best_individ = individ; + best_base = base; + memcpy(best_sels, sels, sizeof(best_sels)); + best_table = table; + } + + } // base + } // table + + //printf("%u: %u,%u,%u,%u,%u,%u,%u,%u\n", c, best_err, best_individ, best_table, best_base, best_sels[0], best_sels[1], best_sels[2], best_sels[3]); + + uint32_t encoded = best_table | (best_base << 3) | + (best_sels[0] << 8) | + (best_sels[1] << 10) | + (best_sels[2] << 12) | + (best_sels[3] << 14); + + printf("0x%X,", encoded); + + overall_error += best_err; + } // c + + printf("\n"); + printf("Overall error: %u\n", overall_error); + + } // individ + + exit(0); +#endif + + decoder_etc_block& dst_blk = *static_cast<decoder_etc_block*>(pDst); + + if (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + const uint32_t y = unpacked_src_blk.m_solid_color[channel]; + const uint32_t encoded_config = g_etc1_y_solid_block_configs[y]; + + const uint32_t base = encoded_config & 31; + const uint32_t sel = (encoded_config >> 5) & 3; + const uint32_t table = encoded_config >> 7; + + dst_blk.m_bytes[3] = (uint8_t)(2 | (table << 5) | (table << 2)); + + dst_blk.m_bytes[0] = (uint8_t)(base << 3); + dst_blk.m_bytes[1] = (uint8_t)(base << 3); + dst_blk.m_bytes[2] = (uint8_t)(base << 3); + + memcpy(dst_blk.m_bytes + 4, &s_etc1_solid_selectors[sel][0], 4); + return true; + } + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + uint8_t block_y[4][4]; + for (uint32_t i = 0; i < 16; i++) + ((uint8_t*)block_y)[i] = ((color32*)block_pixels)[i][channel]; + + int upper_avg, lower_avg, left_avg, right_avg; + bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg); + + // non-flipped: | | + // vs. + // flipped: -- + // -- + + uint32_t low[2] = { 255, 255 }, high[2] = { 0, 0 }; + + if (flip) + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t v = block_y[y][x]; + low[0] = basisu::minimum(low[0], v); + high[0] = basisu::maximum(high[0], v); + } + } + for (uint32_t y = 2; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const uint32_t v = block_y[y][x]; + low[1] = basisu::minimum(low[1], v); + high[1] = basisu::maximum(high[1], v); + } + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const uint32_t v = block_y[y][x]; + low[0] = basisu::minimum(low[0], v); + high[0] = basisu::maximum(high[0], v); + } + } + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 2; x < 4; x++) + { + const uint32_t v = block_y[y][x]; + low[1] = basisu::minimum(low[1], v); + high[1] = basisu::maximum(high[1], v); + } + } + } + + const uint32_t range[2] = { high[0] - low[0], high[1] - low[1] }; + + dst_blk.m_bytes[3] = (uint8_t)((int)flip); + + if ((range[0] <= 3) && (range[1] <= 3)) + { + // This is primarily for better gradients. + dst_blk.m_bytes[0] = 0; + dst_blk.m_bytes[1] = 0; + dst_blk.m_bytes[2] = 0; + + uint16_t l_bitmask = 0, h_bitmask = 0; + + for (uint32_t subblock = 0; subblock < 2; subblock++) + { + const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]); + + const uint32_t table = encoded & 7; + const uint32_t base = (encoded >> 3) & 31; + assert(base <= 15); + const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 }; + + dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5)); + + const uint32_t sv = base << (subblock ? 0 : 4); + dst_blk.m_bytes[0] |= (uint8_t)(sv); + dst_blk.m_bytes[1] |= (uint8_t)(sv); + dst_blk.m_bytes[2] |= (uint8_t)(sv); + + if (flip) + { + uint32_t ofs = subblock * 2; + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t t = block_y[y + subblock * 2][x]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + uint32_t t = block_y[y][x + subblock * 2]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + } // subblock + + dst_blk.m_bytes[7] = (uint8_t)(l_bitmask); + dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8); + dst_blk.m_bytes[5] = (uint8_t)(h_bitmask); + dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8); + + return true; + } + + uint32_t y0 = ((flip ? upper_avg : left_avg) * 31 + 127) / 255; + uint32_t y1 = ((flip ? lower_avg : right_avg) * 31 + 127) / 255; + + bool diff = true; + + int dy = y1 - y0; + + if ((dy < cETC1ColorDeltaMin) || (dy > cETC1ColorDeltaMax)) + { + diff = false; + + y0 = ((flip ? upper_avg : left_avg) * 15 + 127) / 255; + y1 = ((flip ? lower_avg : right_avg) * 15 + 127) / 255; + + dst_blk.m_bytes[0] = (uint8_t)(y1 | (y0 << 4)); + dst_blk.m_bytes[1] = (uint8_t)(y1 | (y0 << 4)); + dst_blk.m_bytes[2] = (uint8_t)(y1 | (y0 << 4)); + } + else + { + dy = basisu::clamp<int>(dy, cETC1ColorDeltaMin, cETC1ColorDeltaMax); + + y1 = y0 + dy; + + if (dy < 0) dy += 8; + + dst_blk.m_bytes[0] = (uint8_t)((y0 << 3) | dy); + dst_blk.m_bytes[1] = (uint8_t)((y0 << 3) | dy); + dst_blk.m_bytes[2] = (uint8_t)((y0 << 3) | dy); + + dst_blk.m_bytes[3] |= 2; + } + + const uint32_t base_y[2] = { diff ? ((y0 << 3) | (y0 >> 2)) : ((y0 << 4) | y0), diff ? ((y1 << 3) | (y1 >> 2)) : ((y1 << 4) | y1) }; + + uint32_t enc_range[2]; + for (uint32_t subset = 0; subset < 2; subset++) + { + const int pos = basisu::iabs((int)high[subset] - (int)base_y[subset]); + const int neg = basisu::iabs((int)base_y[subset] - (int)low[subset]); + + enc_range[subset] = basisu::maximum(pos, neg); + } + + uint16_t l_bitmask = 0, h_bitmask = 0; + for (uint32_t subblock = 0; subblock < 2; subblock++) + { + if ((!diff) && (range[subblock] <= 3)) + { + const uint32_t encoded = (range[subblock] == 0) ? g_etc1_y_solid_block_1i_configs[low[subblock]] : ((range[subblock] < 2) ? g_etc1_y_solid_block_2i_configs[low[subblock]] : g_etc1_y_solid_block_4i_configs[low[subblock]]); + + const uint32_t table = encoded & 7; + const uint32_t base = (encoded >> 3) & 31; + assert(base <= 15); + const uint32_t sels[4] = { (encoded >> 8) & 3, (encoded >> 10) & 3, (encoded >> 12) & 3, (encoded >> 14) & 3 }; + + dst_blk.m_bytes[3] |= (uint8_t)(table << (subblock ? 2 : 5)); + + const uint32_t mask = ~(0xF << (subblock ? 0 : 4)); + + dst_blk.m_bytes[0] &= mask; + dst_blk.m_bytes[1] &= mask; + dst_blk.m_bytes[2] &= mask; + + const uint32_t sv = base << (subblock ? 0 : 4); + dst_blk.m_bytes[0] |= (uint8_t)(sv); + dst_blk.m_bytes[1] |= (uint8_t)(sv); + dst_blk.m_bytes[2] |= (uint8_t)(sv); + + if (flip) + { + uint32_t ofs = subblock * 2; + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t t = block_y[y + subblock * 2][x]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; + } + } + else + { + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + uint32_t t = block_y[y][x + subblock * 2]; + assert(t >= low[subblock] && t <= high[subblock]); + t -= low[subblock]; + assert(t <= 3); + + t = g_selector_index_to_etc1[sels[t]]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } + } + + continue; + } // if + + uint32_t best_err = UINT32_MAX; + uint8_t best_sels[8]; + uint32_t best_inten = 0; + + const int base = base_y[subblock]; + + const int low_limit = -base; + const int high_limit = 255 - base; + + assert(low_limit <= 0 && high_limit >= 0); + + uint32_t inten_table_mask = 0xFF; + const uint32_t er = enc_range[subblock]; + // Each one of these tables is expensive to evaluate, so let's only examine the ones we know may be useful. + if (er <= 51) + { + inten_table_mask = 0xF; + + if (er > 22) + inten_table_mask &= ~(1 << 0); + + if ((er < 4) || (er > 39)) + inten_table_mask &= ~(1 << 1); + + if (er < 9) + inten_table_mask &= ~(1 << 2); + + if (er < 12) + inten_table_mask &= ~(1 << 3); + } else - status = true; + { + inten_table_mask &= ~((1 << 0) | (1 << 1)); - if (status) + if (er > 60) + inten_table_mask &= ~(1 << 2); + + if (er > 89) + inten_table_mask &= ~(1 << 3); + + if (er > 120) + inten_table_mask &= ~(1 << 4); + + if (er > 136) + inten_table_mask &= ~(1 << 5); + + if (er > 174) + inten_table_mask &= ~(1 << 6); + } + + for (uint32_t inten = 0; inten < 8; inten++) { - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, basis_file_has_alpha_slices ? block_format::cRGBA4444_COLOR : block_format::cRGBA4444_COLOR_OPAQUE, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); - if (!status) + if ((inten_table_mask & (1 << inten)) == 0) + continue; + + const int t0 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][0]); + const int t1 = basisu::maximum(low_limit, g_etc1_inten_tables[inten][1]); + const int t2 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][2]); + const int t3 = basisu::minimum(high_limit, g_etc1_inten_tables[inten][3]); + assert((t0 <= t1) && (t1 <= t2) && (t2 <= t3)); + + const int tv[4] = { t2, t3, t1, t0 }; + + const int thresh01 = t0 + t1; + const int thresh12 = t1 + t2; + const int thresh23 = t2 + t3; + + assert(thresh01 <= thresh12 && thresh12 <= thresh23); + + static const uint8_t s_table[4] = { 1, 0, 2, 3 }; + + uint32_t total_err = 0; + uint8_t sels[8]; + + if (flip) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to RGBA4444 RGB failed\n"); + if (((int)high[subblock] - base) * 2 < thresh01) + { + memset(sels, 3, 8); + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int delta = (int)block_y[y + subblock * 2][x] - base; + + const uint32_t c = 3; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else if (((int)low[subblock] - base) * 2 >= thresh23) + { + memset(sels, 1, 8); + + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int delta = (int)block_y[y + subblock * 2][x] - base; + + const uint32_t c = 1; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + const int delta = (int)block_y[y + subblock * 2][x] - base; + const int delta2 = delta * 2; + + uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)]; + sels[y * 4 + x] = (uint8_t)c; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + } + else + { + if (((int)high[subblock] - base) * 2 < thresh01) + { + memset(sels, 3, 8); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const int delta = (int)block_y[y][x + subblock * 2] - base; + + const uint32_t c = 3; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else if (((int)low[subblock] - base) * 2 >= thresh23) + { + memset(sels, 1, 8); + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const int delta = (int)block_y[y][x + subblock * 2] - base; + + const uint32_t c = 1; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + else + { + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 2; x++) + { + const int delta = (int)block_y[y][x + subblock * 2] - base; + const int delta2 = delta * 2; + + uint32_t c = s_table[(delta2 < thresh01) + (delta2 < thresh12) + (delta2 < thresh23)]; + sels[y * 2 + x] = (uint8_t)c; + + uint32_t e = basisu::iabs(tv[c] - delta); + total_err += e * e; + } + if (total_err >= best_err) + break; + } + } + } + + if (total_err < best_err) + { + best_err = total_err; + best_inten = inten; + memcpy(best_sels, sels, 8); + } + + } // inten + + //g_inten_hist[best_inten][enc_range[subblock]]++; + + dst_blk.m_bytes[3] |= (uint8_t)(best_inten << (subblock ? 2 : 5)); + + if (flip) + { + uint32_t ofs = subblock * 2; + for (uint32_t y = 0; y < 2; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t t = best_sels[y * 4 + x]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ofs += 4; + } + + ofs = (int)ofs + 1 - 4 * 4; } } else { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to RGBA4444 A failed\n"); + uint32_t ofs = (subblock * 2) * 4; + for (uint32_t x = 0; x < 2; x++) + { + for (uint32_t y = 0; y < 4; y++) + { + uint32_t t = best_sels[y * 2 + x]; + + assert(ofs < 16); + l_bitmask |= ((t & 1) << ofs); + h_bitmask |= ((t >> 1) << ofs); + ++ofs; + } + } } - break; + } // subblock + + dst_blk.m_bytes[7] = (uint8_t)(l_bitmask); + dst_blk.m_bytes[6] = (uint8_t)(l_bitmask >> 8); + dst_blk.m_bytes[5] = (uint8_t)(h_bitmask); + dst_blk.m_bytes[4] = (uint8_t)(h_bitmask >> 8); + + return true; + } + + const uint32_t ETC2_EAC_MIN_VALUE_SELECTOR = 3, ETC2_EAC_MAX_VALUE_SELECTOR = 7; + + void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst) + { + eac_block& dst = *static_cast<eac_block*>(pDst); + const color32* pSrc_pixels = &block_pixels[0][0]; + + if ((!g_uastc_mode_has_alpha[unpacked_src_blk.m_mode]) || (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR)) + { + const uint32_t a = (unpacked_src_blk.m_mode == UASTC_MODE_INDEX_SOLID_COLOR) ? unpacked_src_blk.m_solid_color[3] : 255; + + dst.m_base = a; + dst.m_table = 13; + dst.m_multiplier = 1; + + memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + + return; } - case transcoder_texture_format::cTFFXT1_RGB: + + uint32_t min_a = 255, max_a = 0; + for (uint32_t i = 0; i < 16; i++) { -#if !BASISD_SUPPORT_FXT1 + min_a = basisu::minimum<uint32_t>(min_a, pSrc_pixels[i].a); + max_a = basisu::maximum<uint32_t>(max_a, pSrc_pixels[i].a); + } + + if (min_a == max_a) + { + dst.m_base = min_a; + dst.m_table = 13; + dst.m_multiplier = 1; + + memcpy(dst.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + return; + } + + const uint32_t table = unpacked_src_blk.m_etc2_hints & 0xF; + const int multiplier = unpacked_src_blk.m_etc2_hints >> 4; + + assert(multiplier >= 1); + + dst.m_multiplier = multiplier; + dst.m_table = table; + + const float range = (float)(g_eac_modifier_table[dst.m_table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]); + const int center = (int)roundf(basisu::lerp((float)min_a, (float)max_a, (float)(0 - g_eac_modifier_table[dst.m_table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range)); + + dst.m_base = center; + + const int8_t* pTable = &g_eac_modifier_table[dst.m_table][0]; + + uint32_t vals[8]; + for (uint32_t j = 0; j < 8; j++) + vals[j] = clamp255(center + (pTable[j] * multiplier)); + + uint64_t sels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t a = block_pixels[i & 3][i >> 2].a; + + const uint32_t err0 = (basisu::iabs(vals[0] - a) << 3) | 0; + const uint32_t err1 = (basisu::iabs(vals[1] - a) << 3) | 1; + const uint32_t err2 = (basisu::iabs(vals[2] - a) << 3) | 2; + const uint32_t err3 = (basisu::iabs(vals[3] - a) << 3) | 3; + const uint32_t err4 = (basisu::iabs(vals[4] - a) << 3) | 4; + const uint32_t err5 = (basisu::iabs(vals[5] - a) << 3) | 5; + const uint32_t err6 = (basisu::iabs(vals[6] - a) << 3) | 6; + const uint32_t err7 = (basisu::iabs(vals[7] - a) << 3) | 7; + + const uint32_t min_err = basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(basisu::minimum(err0, err1, err2), err3), err4), err5), err6), err7); + + const uint64_t best_index = min_err & 7; + sels |= (best_index << (45 - i * 3)); + } + + dst.set_selector_bits(sels); + } + + bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst) + { + eac_block& dst_etc2_eac_a8_blk = *static_cast<eac_block*>(pDst); + decoder_etc_block& dst_etc1_blk = static_cast<decoder_etc_block*>(pDst)[1]; + + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cFXT1_RGB, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + color32 block_pixels[4][4]; + if (unpacked_src_blk.m_mode != UASTC_MODE_INDEX_SOLID_COLOR) + { + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + } + + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &dst_etc2_eac_a8_blk); + + transcode_uastc_to_etc1(unpacked_src_blk, block_pixels, &dst_etc1_blk); + + return true; + } + + static const uint8_t s_uastc5_to_bc1[32] = { 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1 }; + static const uint8_t s_uastc4_to_bc1[16] = { 0, 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 1, 1 }; + static const uint8_t s_uastc3_to_bc1[8] = { 0, 0, 2, 2, 3, 3, 1, 1 }; + static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 }; + static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 }; + const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 }; + + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) + { + uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v; + + { + min0_v = max0_v = pPixels[0 * stride]; + min1_v = max1_v = pPixels[1 * stride]; + min2_v = max2_v = pPixels[2 * stride]; + min3_v = max3_v = pPixels[3 * stride]; + } + + { + uint32_t v0 = pPixels[4 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0); + uint32_t v1 = pPixels[5 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1); + uint32_t v2 = pPixels[6 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2); + uint32_t v3 = pPixels[7 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3); + } + + { + uint32_t v0 = pPixels[8 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0); + uint32_t v1 = pPixels[9 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1); + uint32_t v2 = pPixels[10 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2); + uint32_t v3 = pPixels[11 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3); + } + + { + uint32_t v0 = pPixels[12 * stride]; min0_v = basisu::minimum(min0_v, v0); max0_v = basisu::maximum(max0_v, v0); + uint32_t v1 = pPixels[13 * stride]; min1_v = basisu::minimum(min1_v, v1); max1_v = basisu::maximum(max1_v, v1); + uint32_t v2 = pPixels[14 * stride]; min2_v = basisu::minimum(min2_v, v2); max2_v = basisu::maximum(max2_v, v2); + uint32_t v3 = pPixels[15 * stride]; min3_v = basisu::minimum(min3_v, v3); max3_v = basisu::maximum(max3_v, v3); + } + + const uint32_t min_v = basisu::minimum(min0_v, min1_v, min2_v, min3_v); + const uint32_t max_v = basisu::maximum(max0_v, max1_v, max2_v, max3_v); + + uint8_t* pDst_bytes = static_cast<uint8_t*>(pDst); + pDst_bytes[0] = (uint8_t)max_v; + pDst_bytes[1] = (uint8_t)min_v; + + if (max_v == min_v) + { + memset(pDst_bytes + 2, 0, 6); + return; + } + + const uint32_t delta = max_v - min_v; + + // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors. + const int t0 = delta * 13; + const int t1 = delta * 11; + const int t2 = delta * 9; + const int t3 = delta * 7; + const int t4 = delta * 5; + const int t5 = delta * 3; + const int t6 = delta * 1; + + // BC4 floors in its divisions, which we compensate for with the 4 bias. + // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one). + const int bias = 4 - min_v * 14; + + static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U }; + static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U }; + static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U }; + static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U }; + + uint64_t a0, a1, a2, a3; + { + const int v0 = pPixels[0 * stride] * 14 + bias; + const int v1 = pPixels[1 * stride] * 14 + bias; + const int v2 = pPixels[2 * stride] * 14 + bias; + const int v3 = pPixels[3 * stride] * 14 + bias; + a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]; + a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]; + a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]; + a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]; + } + + { + const int v0 = pPixels[4 * stride] * 14 + bias; + const int v1 = pPixels[5 * stride] * 14 + bias; + const int v2 = pPixels[6 * stride] * 14 + bias; + const int v3 = pPixels[7 * stride] * 14 + bias; + a0 |= (s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U); + a1 |= (s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U); + a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); + a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); + } + + { + const int v0 = pPixels[8 * stride] * 14 + bias; + const int v1 = pPixels[9 * stride] * 14 + bias; + const int v2 = pPixels[10 * stride] * 14 + bias; + const int v3 = pPixels[11 * stride] * 14 + bias; + a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U); + a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U); + a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U); + a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U); + } + + { + const int v0 = pPixels[12 * stride] * 14 + bias; + const int v1 = pPixels[13 * stride] * 14 + bias; + const int v2 = pPixels[14 * stride] * 14 + bias; + const int v3 = pPixels[15 * stride] * 14 + bias; + a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U); + a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U); + a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U); + a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U); + } + + const uint64_t f = a0 | a1 | a2 | a3; + + pDst_bytes[2] = (uint8_t)f; + pDst_bytes[3] = (uint8_t)(f >> 8U); + pDst_bytes[4] = (uint8_t)(f >> 16U); + pDst_bytes[5] = (uint8_t)(f >> 24U); + pDst_bytes[6] = (uint8_t)(f >> 32U); + pDst_bytes[7] = (uint8_t)(f >> 40U); + } + + static void bc1_find_sels(const color32 *pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16]) + { + uint32_t block_r[4], block_g[4], block_b[4]; + + block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); + block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2); + block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3; + block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3; + + int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + for (uint32_t i = 0; i < 16; i++) + { + const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab; + static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; + + // Rounding matters here! + // d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality. + sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)]; + } + } + + static inline void bc1_find_sels_2(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16]) + { + uint32_t block_r[4], block_g[4], block_b[4]; + + block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); + block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2); + block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3; + block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3; + + int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; + + for (uint32_t i = 0; i < 16; i += 4) + { + const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab; + const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab; + const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab; + const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab; + + sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; + sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; + sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; + sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; + } + } + + struct vec3F { float c[3]; }; + + static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) + { + // Derived from bc7enc16's LS function. + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // I did this in matrix form first, expanded out all the ops, then optimized it a bit. + uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0; + + // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) + // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. + static const uint32_t s_weight_vals[4] = { 0x000009, 0x010204, 0x040201, 0x090000 }; + + uint32_t weight_accum = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; + const uint32_t sel = pSelectors[i]; + ut_r += r; + ut_g += g; + ut_b += b; + weight_accum += s_weight_vals[sel]; + uq00_r += sel * r; + uq00_g += sel * g; + uq00_b += sel * b; + } + + float q00_r = (float)uq00_r, q10_r = (float)uq10_r, t_r = (float)ut_r; + float q00_g = (float)uq00_g, q10_g = (float)uq10_g, t_g = (float)ut_g; + float q00_b = (float)uq00_b, q10_b = (float)uq10_b, t_b = (float)ut_b; + + q10_r = t_r * 3.0f - q00_r; + q10_g = t_g * 3.0f - q00_g; + q10_b = t_b * 3.0f - q00_b; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = 3.0f / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->c[0] = iz00 * q00_r + iz01 * q10_r; pXh->c[0] = iz10 * q00_r + iz11 * q10_r; + pXl->c[1] = iz00 * q00_g + iz01 * q10_g; pXh->c[1] = iz10 * q00_g + iz11 * q10_g; + pXl->c[2] = iz00 * q00_b + iz01 * q10_b; pXh->c[2] = iz10 * q00_b + iz11 * q10_b; + + // Check and fix channel singularities - might not be needed, but is in UASTC's encoder. + for (uint32_t c = 0; c < 3; c++) + { + if ((pXl->c[c] < 0.0f) || (pXh->c[c] > 255.0f)) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to FXT1_RGB failed\n"); + uint32_t lo_v = UINT32_MAX, hi_v = 0; + for (uint32_t i = 0; i < 16; i++) + { + lo_v = basisu::minimumu(lo_v, pColors[i].c[c]); + hi_v = basisu::maximumu(hi_v, pColors[i].c[c]); + } + + if (lo_v == hi_v) + { + pXl->c[c] = (float)lo_v; + pXh->c[c] = (float)hi_v; + } } - break; } - case transcoder_texture_format::cTFETC2_EAC_R11: + + return true; + } + + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) + { + dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst); + + uint32_t mask = 0xAA; + uint32_t max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi; + uint32_t min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo; + + if (min16 == max16) { -#if !BASISD_SUPPORT_ETC2_EAC_RG11 - return false; -#endif - uint32_t slice_index_to_decode = slice_index; - // If the caller wants us to transcode the mip level's alpha data, then use the next slice. - if ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) - slice_index_to_decode++; + // Always forbid 3 color blocks + // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's. + mask = 0; - status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + // Make l > h + if (min16 > 0) + min16--; + else { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2_EAC_R11 failed\n"); + // l = h = 0 + assert(min16 == max16 && max16 == 0); + + max16 = 1; + min16 = 0; + mask = 0x55; } - break; + assert(max16 > min16); } - case transcoder_texture_format::cTFETC2_EAC_RG11: + + if (max16 < min16) { -#if !BASISD_SUPPORT_ETC2_EAC_RG11 - return false; + std::swap(max16, min16); + mask ^= 0x55; + } + + pDst_block->set_low_color(static_cast<uint16_t>(max16)); + pDst_block->set_high_color(static_cast<uint16_t>(min16)); + pDst_block->m_selectors[0] = static_cast<uint8_t>(mask); + pDst_block->m_selectors[1] = static_cast<uint8_t>(mask); + pDst_block->m_selectors[2] = static_cast<uint8_t>(mask); + pDst_block->m_selectors[3] = static_cast<uint8_t>(mask); + } + + static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + + // Good references: squish library, stb_dxt. + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags) + { + const color32* pSrc_pixels = (const color32*)pPixels; + dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst); + + int avg_r = -1, avg_g = 0, avg_b = 0; + int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; + uint8_t sels[16]; + + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; + if (use_sels) + { + // Caller is jamming in their own selectors for us to try. + const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); + + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; + + for (uint32_t i = 0; i < 16; i++) + sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; + } + else + { + const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; + + uint32_t j; + for (j = 1; j < 16; j++) + if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) + break; + + if (j == 16) + { + encode_bc1_solid_block(pDst, fr, fg, fb); + return; + } + + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) + int total_r = fr, total_g = fg, total_b = fb; + int max_r = fr, max_g = fg, max_b = fb; + int min_r = fr, min_g = fg, min_b = fb; + for (uint32_t i = 1; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); + total_r += r; total_g += g; total_b += b; + } + + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; + icov[0] += r * r; + icov[1] += r * g; + icov[2] += r * b; + icov[3] += g * g; + icov[4] += g * b; + icov[5] += b * b; + } + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = static_cast<float>(icov[i])* (1.0f / 255.0f); + +#if 0 + // Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference). + // TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta + int saxis_r = max_r - min_r; + int saxis_g = max_g - min_g; + int saxis_b = max_b - min_b; +#else + float xr = (float)(max_r - min_r); + float xg = (float)(max_g - min_g); + float xb = (float)(max_b - min_b); + //float xr = (float)(max_r - avg_r); // max-avg is nearly the same, and doesn't require computing min's + //float xg = (float)(max_g - avg_g); + //float xb = (float)(max_b - avg_b); + for (uint32_t power_iter = 0; power_iter < 4; power_iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + xr = r; xg = g; xb = b; + } + + float k = basisu::maximum(fabsf(xr), fabsf(xg), fabsf(xb)); + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + if (k >= 2) + { + float m = 1024.0f / k; + saxis_r = (int)(xr * m); + saxis_g = (int)(xg * m); + saxis_b = (int)(xb * m); + } #endif - assert(bytes_per_block == 16); + + int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0; + for (uint32_t i = 0; i < 16; i++) + { + int dot = pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b; + if (dot < low_dot) + { + low_dot = dot; + low_c = i; + } + if (dot > high_dot) + { + high_dot = dot; + high_c = i; + } + } - if (basis_file_has_alpha_slices) + lr = to_5(pSrc_pixels[low_c].r); + lg = to_6(pSrc_pixels[low_c].g); + lb = to_5(pSrc_pixels[low_c].b); + + hr = to_5(pSrc_pixels[high_c].r); + hg = to_6(pSrc_pixels[high_c].g); + hb = to_5(pSrc_pixels[high_c].b); + + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } // if (use_sels) + + const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1); + for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) + { + // This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors. + vec3F xl, xh; + if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh)) { - // First decode the alpha data to G - status = transcode_slice(pData, data_size, slice_index + 1, (uint8_t *)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); + if (avg_r < 0) + { + int total_r = 0, total_g = 0, total_b = 0; + for (uint32_t i = 0; i < 16; i++) + { + total_r += pSrc_pixels[i].r; + total_g += pSrc_pixels[i].g; + total_b += pSrc_pixels[i].b; + } + + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + } + + // All selectors equal - treat it as a solid block which should always be equal or better. + lr = g_bc1_match5_equals_1[avg_r].m_hi; + lg = g_bc1_match6_equals_1[avg_g].m_hi; + lb = g_bc1_match5_equals_1[avg_b].m_hi; + + hr = g_bc1_match5_equals_1[avg_r].m_lo; + hg = g_bc1_match6_equals_1[avg_g].m_lo; + hb = g_bc1_match5_equals_1[avg_b].m_lo; + + // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. } else { - write_opaque_alpha_blocks(pSlice_descs[slice_index].m_num_blocks_x, pSlice_descs[slice_index].m_num_blocks_y, (uint8_t *)pOutput_blocks + 8, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, output_row_pitch_in_blocks_or_pixels); - status = true; + lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + + hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); } + + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } - if (status) + uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); + uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); + + // Always forbid 3 color blocks + if (lc16 == hc16) + { + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else { - // Now decode the color data to R - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); - if (!status) + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + pDst_block->set_low_color(static_cast<uint16_t>(lc16)); + pDst_block->set_high_color(static_cast<uint16_t>(hc16)); + + pDst_block->m_selectors[0] = mask; + pDst_block->m_selectors[1] = mask; + pDst_block->m_selectors[2] = mask; + pDst_block->m_selectors[3] = mask; + } + else + { + uint8_t invert_mask = 0; + if (lc16 < hc16) + { + std::swap(lc16, hc16); + invert_mask = 0x55; + } + + assert(lc16 > hc16); + pDst_block->set_low_color((uint16_t)lc16); + pDst_block->set_high_color((uint16_t)hc16); + + uint32_t packed_sels = 0; + static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 }; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); + + pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask; + pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; + pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; + pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; + } + } + + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags) + { + const color32* pSrc_pixels = (const color32*)pPixels; + dxt1_block* pDst_block = static_cast<dxt1_block*>(pDst); + + int avg_r = -1, avg_g = 0, avg_b = 0; + int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; + uint8_t sels[16]; + + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; + if (use_sels) + { + // Caller is jamming in their own selectors for us to try. + const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); + + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; + + for (uint32_t i = 0; i < 16; i++) + sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; + } + else + { + const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; + + uint32_t j; + for (j = 1; j < 16; j++) + if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) + break; + + if (j == 16) + { + encode_bc1_solid_block(pDst, fr, fg, fb); + return; + } + + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) + int total_r = fr, total_g = fg, total_b = fb; + int max_r = fr, max_g = fg, max_b = fb; + int min_r = fr, min_g = fg, min_b = fb; + uint32_t grayscale_flag = (fr == fg) && (fr == fb); + for (uint32_t i = 1; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + grayscale_flag &= ((r == g) && (r == b)); + max_r = basisu::maximum(max_r, r); max_g = basisu::maximum(max_g, g); max_b = basisu::maximum(max_b, b); + min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); + total_r += r; total_g += g; total_b += b; + } + + if (grayscale_flag) + { + // Grayscale blocks are a common enough case to specialize. + if ((max_r - min_r) < 2) { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2_EAC_R11 R failed\n"); + lr = lb = hr = hb = to_5(fr); + lg = hg = to_6(fr); + } + else + { + lr = lb = to_5(min_r); + lg = to_6(min_r); + + hr = hb = to_5(max_r); + hg = to_6(max_r); } } else { - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ETC2_EAC_R11 G failed\n"); + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + + // Find the shortest vector from a AABB corner to the block's average color. + // This is to help avoid outliers. + + uint32_t dist[3][2]; + dist[0][0] = basisu::square(min_r - avg_r) << 3; dist[0][1] = basisu::square(max_r - avg_r) << 3; + dist[1][0] = basisu::square(min_g - avg_g) << 3; dist[1][1] = basisu::square(max_g - avg_g) << 3; + dist[2][0] = basisu::square(min_b - avg_b) << 3; dist[2][1] = basisu::square(max_b - avg_b) << 3; + + uint32_t min_d0 = (dist[0][0] + dist[1][0] + dist[2][0]); + uint32_t d4 = (dist[0][0] + dist[1][0] + dist[2][1]) | 4; + min_d0 = basisu::minimum(min_d0, d4); + + uint32_t min_d1 = (dist[0][1] + dist[1][0] + dist[2][0]) | 1; + uint32_t d5 = (dist[0][1] + dist[1][0] + dist[2][1]) | 5; + min_d1 = basisu::minimum(min_d1, d5); + + uint32_t d2 = (dist[0][0] + dist[1][1] + dist[2][0]) | 2; + min_d0 = basisu::minimum(min_d0, d2); + + uint32_t d3 = (dist[0][1] + dist[1][1] + dist[2][0]) | 3; + min_d1 = basisu::minimum(min_d1, d3); + + uint32_t d6 = (dist[0][0] + dist[1][1] + dist[2][1]) | 6; + min_d0 = basisu::minimum(min_d0, d6); + + uint32_t d7 = (dist[0][1] + dist[1][1] + dist[2][1]) | 7; + min_d1 = basisu::minimum(min_d1, d7); + + uint32_t min_d = basisu::minimum(min_d0, min_d1); + uint32_t best_i = min_d & 7; + + int delta_r = (best_i & 1) ? (max_r - avg_r) : (avg_r - min_r); + int delta_g = (best_i & 2) ? (max_g - avg_g) : (avg_g - min_g); + int delta_b = (best_i & 4) ? (max_b - avg_b) : (avg_b - min_b); + + // Note: if delta_r/g/b==0, we actually want to choose a single color, so the block average color optimization kicks in. + uint32_t low_c = 0, high_c = 0; + if ((delta_r | delta_g | delta_b) != 0) + { + // Now we have a smaller AABB going from the block's average color to a cornerpoint of the larger AABB. + // Project all pixels colors along the 4 vectors going from a smaller AABB cornerpoint to the opposite cornerpoint, find largest projection. + // One of these vectors will be a decent approximation of the block's PCA. + const int saxis0_r = delta_r, saxis0_g = delta_g, saxis0_b = delta_b; + + int low_dot0 = INT_MAX, high_dot0 = INT_MIN; + int low_dot1 = INT_MAX, high_dot1 = INT_MIN; + int low_dot2 = INT_MAX, high_dot2 = INT_MIN; + int low_dot3 = INT_MAX, high_dot3 = INT_MIN; + + //int low_c0, low_c1, low_c2, low_c3; + //int high_c0, high_c1, high_c2, high_c3; + + for (uint32_t i = 0; i < 16; i++) + { + const int dotx = pSrc_pixels[i].r * saxis0_r; + const int doty = pSrc_pixels[i].g * saxis0_g; + const int dotz = pSrc_pixels[i].b * saxis0_b; + + const int dot0 = ((dotz + dotx + doty) << 4) + i; + const int dot1 = ((dotz - dotx - doty) << 4) + i; + const int dot2 = ((dotz - dotx + doty) << 4) + i; + const int dot3 = ((dotz + dotx - doty) << 4) + i; + + if (dot0 < low_dot0) + { + low_dot0 = dot0; + //low_c0 = i; + } + if ((dot0 ^ 15) > high_dot0) + { + high_dot0 = dot0 ^ 15; + //high_c0 = i; + } + + if (dot1 < low_dot1) + { + low_dot1 = dot1; + //low_c1 = i; + } + if ((dot1 ^ 15) > high_dot1) + { + high_dot1 = dot1 ^ 15; + //high_c1 = i; + } + + if (dot2 < low_dot2) + { + low_dot2 = dot2; + //low_c2 = i; + } + if ((dot2 ^ 15) > high_dot2) + { + high_dot2 = dot2 ^ 15; + //high_c2 = i; + } + + if (dot3 < low_dot3) + { + low_dot3 = dot3; + //low_c3 = i; + } + if ((dot3 ^ 15) > high_dot3) + { + high_dot3 = dot3 ^ 15; + //high_c3 = i; + } + } + + low_c = low_dot0 & 15; + high_c = ~high_dot0 & 15; + uint32_t r = (high_dot0 & ~15) - (low_dot0 & ~15); + + uint32_t tr = (high_dot1 & ~15) - (low_dot1 & ~15); + if (tr > r) { + low_c = low_dot1 & 15; + high_c = ~high_dot1 & 15; + r = tr; + } + + tr = (high_dot2 & ~15) - (low_dot2 & ~15); + if (tr > r) { + low_c = low_dot2 & 15; + high_c = ~high_dot2 & 15; + r = tr; + } + + tr = (high_dot3 & ~15) - (low_dot3 & ~15); + if (tr > r) { + low_c = low_dot3 & 15; + high_c = ~high_dot3 & 15; + } + } + + lr = to_5(pSrc_pixels[low_c].r); + lg = to_6(pSrc_pixels[low_c].g); + lb = to_5(pSrc_pixels[low_c].b); + + hr = to_5(pSrc_pixels[high_c].r); + hg = to_6(pSrc_pixels[high_c].g); + hb = to_5(pSrc_pixels[high_c].b); } - break; + bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); + } // if (use_sels) + + const uint32_t total_ls_passes = (flags & cEncodeBC1HigherQuality) ? 3 : (flags & cEncodeBC1HighQuality ? 2 : 1); + for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) + { + int prev_lr = lr, prev_lg = lg, prev_lb = lb, prev_hr = hr, prev_hg = hg, prev_hb = hb; + + // This is where the real magic happens. We have an array of candidate selectors, so let's use least squares to compute the optimal low/high endpoint colors. + vec3F xl, xh; + if (!compute_least_squares_endpoints_rgb(pSrc_pixels, sels, &xl, &xh)) + { + if (avg_r < 0) + { + int total_r = 0, total_g = 0, total_b = 0; + for (uint32_t i = 0; i < 16; i++) + { + total_r += pSrc_pixels[i].r; + total_g += pSrc_pixels[i].g; + total_b += pSrc_pixels[i].b; + } + + avg_r = (total_r + 8) >> 4; + avg_g = (total_g + 8) >> 4; + avg_b = (total_b + 8) >> 4; + } + + // All selectors equal - treat it as a solid block which should always be equal or better. + lr = g_bc1_match5_equals_1[avg_r].m_hi; + lg = g_bc1_match6_equals_1[avg_g].m_hi; + lb = g_bc1_match5_equals_1[avg_b].m_hi; + + hr = g_bc1_match5_equals_1[avg_r].m_lo; + hg = g_bc1_match6_equals_1[avg_g].m_lo; + hb = g_bc1_match5_equals_1[avg_b].m_lo; + + // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. + } + else + { + lr = basisu::clamp((int)((xl.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + lg = basisu::clamp((int)((xl.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + lb = basisu::clamp((int)((xl.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + + hr = basisu::clamp((int)((xh.c[0]) * (31.0f / 255.0f) + .5f), 0, 31); + hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); + hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); + } + + if ((prev_lr == lr) && (prev_lg == lg) && (prev_lb == lb) && (prev_hr == hr) && (prev_hg == hg) && (prev_hb == hb)) + break; + + bc1_find_sels_2(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } - default: + + uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); + uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); + + // Always forbid 3 color blocks + if (lc16 == hc16) { - assert(0); - BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Invalid fmt\n"); - break; + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else + { + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + pDst_block->set_low_color(static_cast<uint16_t>(lc16)); + pDst_block->set_high_color(static_cast<uint16_t>(hc16)); + + pDst_block->m_selectors[0] = mask; + pDst_block->m_selectors[1] = mask; + pDst_block->m_selectors[2] = mask; + pDst_block->m_selectors[3] = mask; } + else + { + uint8_t invert_mask = 0; + if (lc16 < hc16) + { + std::swap(lc16, hc16); + invert_mask = 0x55; + } + + assert(lc16 > hc16); + pDst_block->set_low_color((uint16_t)lc16); + pDst_block->set_high_color((uint16_t)hc16); + + uint32_t packed_sels = 0; + static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 }; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); + + pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask; + pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; + pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; + pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; } + } - return status; + // Scale the UASTC first subset endpoints and first plane's weight indices directly to BC1's - fastest. + void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst) + { + const uint32_t mode = unpacked_src_blk.m_mode; + const astc_block_desc& astc_blk = unpacked_src_blk.m_astc; + + dxt1_block& b = *static_cast<dxt1_block*>(pDst); + + const uint32_t endpoint_range = g_uastc_mode_endpoint_ranges[mode]; + + const uint32_t total_comps = g_uastc_mode_comps[mode]; + + if (total_comps == 2) + { + const uint32_t l = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant; + const uint32_t h = g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant; + + b.set_low_color(dxt1_block::pack_color(color32(l, l, l, 255), true, 127)); + b.set_high_color(dxt1_block::pack_color(color32(h, h, h, 255), true, 127)); + } + else + { + b.set_low_color(dxt1_block::pack_color( + color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[0]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[2]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[4]].m_unquant, + 255), true, 127) + ); + + b.set_high_color(dxt1_block::pack_color( + color32(g_astc_unquant[endpoint_range][astc_blk.m_endpoints[1]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[3]].m_unquant, + g_astc_unquant[endpoint_range][astc_blk.m_endpoints[5]].m_unquant, + 255), true, 127) + ); + } + + if (b.get_low_color() == b.get_high_color()) + { + // Always forbid 3 color blocks + uint16_t lc16 = (uint16_t)b.get_low_color(); + uint16_t hc16 = (uint16_t)b.get_high_color(); + + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else + { + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + b.set_low_color(static_cast<uint16_t>(lc16)); + b.set_high_color(static_cast<uint16_t>(hc16)); + + b.m_selectors[0] = mask; + b.m_selectors[1] = mask; + b.m_selectors[2] = mask; + b.m_selectors[3] = mask; + } + else + { + bool invert = false; + if (b.get_low_color() < b.get_high_color()) + { + std::swap(b.m_low_color[0], b.m_high_color[0]); + std::swap(b.m_low_color[1], b.m_high_color[1]); + invert = true; + } + + const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]]; + + const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1; + + uint32_t sels = 0; + for (int i = 15; i >= 0; --i) + { + uint32_t s = pTran[astc_blk.m_weights[i << plane_shift]]; + + if (invert) + s ^= 1; + + sels = (sels << 2) | s; + } + b.m_selectors[0] = sels & 0xFF; + b.m_selectors[1] = (sels >> 8) & 0xFF; + b.m_selectors[2] = (sels >> 16) & 0xFF; + b.m_selectors[3] = (sels >> 24) & 0xFF; + } } - uint32_t basis_get_bytes_per_block(transcoder_texture_format fmt) + // Scale the UASTC first plane's weight indices to BC1, use 1 or 2 least squares passes to compute endpoints - no PCA needed. + void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality) { - switch (fmt) + const uint32_t mode = unpacked_src_blk.m_mode; + + const astc_block_desc& astc_blk = unpacked_src_blk.m_astc; + + dxt1_block& b = *static_cast<dxt1_block*>(pDst); + + b.set_low_color(1); + b.set_high_color(0); + + const uint8_t* pTran = s_uastc_to_bc1_weights[g_uastc_mode_weight_bits[mode]]; + + const uint32_t plane_shift = g_uastc_mode_planes[mode] - 1; + + uint32_t sels = 0; + for (int i = 15; i >= 0; --i) { - case transcoder_texture_format::cTFETC1_RGB: - case transcoder_texture_format::cTFBC1_RGB: - case transcoder_texture_format::cTFBC4_R: - case transcoder_texture_format::cTFPVRTC1_4_RGB: - case transcoder_texture_format::cTFPVRTC1_4_RGBA: - case transcoder_texture_format::cTFATC_RGB: - case transcoder_texture_format::cTFPVRTC2_4_RGB: - case transcoder_texture_format::cTFPVRTC2_4_RGBA: - case transcoder_texture_format::cTFETC2_EAC_R11: - return 8; - case transcoder_texture_format::cTFBC7_M6_RGB: - case transcoder_texture_format::cTFBC7_M5_RGBA: - case transcoder_texture_format::cTFETC2_RGBA: - case transcoder_texture_format::cTFBC3_RGBA: - case transcoder_texture_format::cTFBC5_RG: - case transcoder_texture_format::cTFASTC_4x4_RGBA: - case transcoder_texture_format::cTFATC_RGBA: - case transcoder_texture_format::cTFFXT1_RGB: - case transcoder_texture_format::cTFETC2_EAC_RG11: - return 16; - case transcoder_texture_format::cTFRGBA32: - return sizeof(uint32_t) * 16; - case transcoder_texture_format::cTFRGB565: - case transcoder_texture_format::cTFBGR565: - case transcoder_texture_format::cTFRGBA4444: - return sizeof(uint16_t) * 16; - default: - assert(0); - BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); - break; + sels <<= 2; + sels |= pTran[astc_blk.m_weights[i << plane_shift]]; } - return 0; + + b.m_selectors[0] = sels & 0xFF; + b.m_selectors[1] = (sels >> 8) & 0xFF; + b.m_selectors[2] = (sels >> 16) & 0xFF; + b.m_selectors[3] = (sels >> 24) & 0xFF; + + encode_bc1(&b, (const uint8_t*)&block_pixels[0][0].c[0], (high_quality ? cEncodeBC1HighQuality : 0) | cEncodeBC1UseSelectors); } - const char* basis_get_format_name(transcoder_texture_format fmt) + bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality) { - switch (fmt) + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) { - case transcoder_texture_format::cTFETC1_RGB: return "ETC1_RGB"; - case transcoder_texture_format::cTFBC1_RGB: return "BC1_RGB"; - case transcoder_texture_format::cTFBC4_R: return "BC4_R"; - case transcoder_texture_format::cTFPVRTC1_4_RGB: return "PVRTC1_4_RGB"; - case transcoder_texture_format::cTFPVRTC1_4_RGBA: return "PVRTC1_4_RGBA"; - case transcoder_texture_format::cTFBC7_M6_RGB: return "BC7_M6_RGB"; - case transcoder_texture_format::cTFBC7_M5_RGBA: return "BC7_M5_RGBA"; - case transcoder_texture_format::cTFETC2_RGBA: return "ETC2_RGBA"; - case transcoder_texture_format::cTFBC3_RGBA: return "BC3_RGBA"; - case transcoder_texture_format::cTFBC5_RG: return "BC5_RG"; - case transcoder_texture_format::cTFASTC_4x4_RGBA: return "ASTC_RGBA"; - case transcoder_texture_format::cTFATC_RGB: return "ATC_RGB"; - case transcoder_texture_format::cTFATC_RGBA: return "ATC_RGBA"; - case transcoder_texture_format::cTFRGBA32: return "RGBA32"; - case transcoder_texture_format::cTFRGB565: return "RGB565"; - case transcoder_texture_format::cTFBGR565: return "BGR565"; - case transcoder_texture_format::cTFRGBA4444: return "RGBA4444"; - case transcoder_texture_format::cTFFXT1_RGB: return "FXT1_RGB"; - case transcoder_texture_format::cTFPVRTC2_4_RGB: return "PVRTC2_4_RGB"; - case transcoder_texture_format::cTFPVRTC2_4_RGBA: return "PVRTC2_4_RGBA"; - case transcoder_texture_format::cTFETC2_EAC_R11: return "ETC2_EAC_R11"; - case transcoder_texture_format::cTFETC2_EAC_RG11: return "ETC2_EAC_RG11"; - default: - assert(0); - BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); - break; + encode_bc1_solid_block(pDst, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b); + return true; } - return ""; + + if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0)) + transcode_uastc_to_bc1_hint0(unpacked_src_blk, pDst); + else + { + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + if (unpacked_src_blk.m_bc1_hint1) + transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pDst, high_quality); + else + encode_bc1(pDst, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0); + } + + return true; } - const char* basis_get_texture_type_name(basis_texture_type tex_type) + static void write_bc4_solid_block(uint8_t* pDst, uint32_t a) { - switch (tex_type) + pDst[0] = (uint8_t)a; + pDst[1] = (uint8_t)a; + memset(pDst + 2, 0, 6); + } + + bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + void* pBC4_block = pDst; + dxt1_block* pBC1_block = &static_cast<dxt1_block*>(pDst)[1]; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) { - case cBASISTexType2D: return "2D"; - case cBASISTexType2DArray: return "2D array"; - case cBASISTexTypeCubemapArray: return "cubemap array"; - case cBASISTexTypeVideoFrames: return "video"; - case cBASISTexTypeVolume: return "3D"; - default: - assert(0); - BASISU_DEVEL_ERROR("basis_get_texture_type_name: Invalid tex_type\n"); - break; + write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.a); + encode_bc1_solid_block(pBC1_block, unpacked_src_blk.m_solid_color.r, unpacked_src_blk.m_solid_color.g, unpacked_src_blk.m_solid_color.b); + return true; } - return ""; + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + basist::encode_bc4(pBC4_block, &block_pixels[0][0].a, sizeof(color32)); + + if ((!high_quality) && (unpacked_src_blk.m_bc1_hint0)) + transcode_uastc_to_bc1_hint0(unpacked_src_blk, pBC1_block); + else + { + if (unpacked_src_blk.m_bc1_hint1) + transcode_uastc_to_bc1_hint1(unpacked_src_blk, block_pixels, pBC1_block, high_quality); + else + encode_bc1(pBC1_block, &block_pixels[0][0].r, high_quality ? cEncodeBC1HighQuality : 0); + } + + return true; } - bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt) + bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0) { - switch (fmt) + BASISU_NOTE_UNUSED(high_quality); + + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + void* pBC4_block = pDst; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) { - case transcoder_texture_format::cTFETC2_RGBA: - case transcoder_texture_format::cTFBC3_RGBA: - case transcoder_texture_format::cTFASTC_4x4_RGBA: - case transcoder_texture_format::cTFBC7_M5_RGBA: - case transcoder_texture_format::cTFPVRTC1_4_RGBA: - case transcoder_texture_format::cTFPVRTC2_4_RGBA: - case transcoder_texture_format::cTFATC_RGBA: - case transcoder_texture_format::cTFRGBA32: - case transcoder_texture_format::cTFRGBA4444: + write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block), unpacked_src_blk.m_solid_color.c[chan0]); return true; - default: - break; } - return false; + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + basist::encode_bc4(pBC4_block, &block_pixels[0][0].c[chan0], sizeof(color32)); + + return true; } - basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt) + bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1) { - switch (fmt) + BASISU_NOTE_UNUSED(high_quality); + + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + void* pBC4_block0 = pDst; + void* pBC4_block1 = (uint8_t*)pDst + 8; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) { - case transcoder_texture_format::cTFETC1_RGB: return basisu::texture_format::cETC1; - case transcoder_texture_format::cTFBC1_RGB: return basisu::texture_format::cBC1; - case transcoder_texture_format::cTFBC4_R: return basisu::texture_format::cBC4; - case transcoder_texture_format::cTFPVRTC1_4_RGB: return basisu::texture_format::cPVRTC1_4_RGB; - case transcoder_texture_format::cTFPVRTC1_4_RGBA: return basisu::texture_format::cPVRTC1_4_RGBA; - case transcoder_texture_format::cTFBC7_M6_RGB: return basisu::texture_format::cBC7; - case transcoder_texture_format::cTFBC7_M5_RGBA: return basisu::texture_format::cBC7; - case transcoder_texture_format::cTFETC2_RGBA: return basisu::texture_format::cETC2_RGBA; - case transcoder_texture_format::cTFBC3_RGBA: return basisu::texture_format::cBC3; - case transcoder_texture_format::cTFBC5_RG: return basisu::texture_format::cBC5; - case transcoder_texture_format::cTFASTC_4x4_RGBA: return basisu::texture_format::cASTC4x4; - case transcoder_texture_format::cTFATC_RGB: return basisu::texture_format::cATC_RGB; - case transcoder_texture_format::cTFATC_RGBA: return basisu::texture_format::cATC_RGBA_INTERPOLATED_ALPHA; - case transcoder_texture_format::cTFRGBA32: return basisu::texture_format::cRGBA32; - case transcoder_texture_format::cTFRGB565: return basisu::texture_format::cRGB565; - case transcoder_texture_format::cTFBGR565: return basisu::texture_format::cBGR565; - case transcoder_texture_format::cTFRGBA4444: return basisu::texture_format::cRGBA4444; - case transcoder_texture_format::cTFFXT1_RGB: return basisu::texture_format::cFXT1_RGB; - case transcoder_texture_format::cTFPVRTC2_4_RGB: return basisu::texture_format::cPVRTC2_4_RGBA; - case transcoder_texture_format::cTFPVRTC2_4_RGBA: return basisu::texture_format::cPVRTC2_4_RGBA; - case transcoder_texture_format::cTFETC2_EAC_R11: return basisu::texture_format::cETC2_R11_EAC; - case transcoder_texture_format::cTFETC2_EAC_RG11: return basisu::texture_format::cETC2_RG11_EAC; - default: - assert(0); - BASISU_DEVEL_ERROR("basis_get_basisu_texture_format: Invalid fmt\n"); - break; + write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block0), unpacked_src_blk.m_solid_color.c[chan0]); + write_bc4_solid_block(static_cast<uint8_t*>(pBC4_block1), unpacked_src_blk.m_solid_color.c[chan1]); + return true; } - return basisu::texture_format::cInvalidTextureFormat; + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + basist::encode_bc4(pBC4_block0, &block_pixels[0][0].c[chan0], sizeof(color32)); + basist::encode_bc4(pBC4_block1, &block_pixels[0][0].c[chan1], sizeof(color32)); + + return true; } - bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type) + static const uint8_t s_etc2_eac_bit_ofs[16] = { 45, 33, 21, 9, 42, 30, 18, 6, 39, 27, 15, 3, 36, 24, 12, 0 }; + + static void pack_eac_solid_block(eac_block& blk, uint32_t a) { - switch (tex_type) + blk.m_base = static_cast<uint8_t>(a); + blk.m_table = 13; + blk.m_multiplier = 0; + + memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); + + return; + } + + // Only checks 4 tables. + static void pack_eac(eac_block& blk, const uint8_t* pPixels, uint32_t stride) + { + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < 16; i++) { - case transcoder_texture_format::cTFRGBA32: - case transcoder_texture_format::cTFRGB565: - case transcoder_texture_format::cTFBGR565: - case transcoder_texture_format::cTFRGBA4444: + const uint32_t a = pPixels[i * stride]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + pack_eac_solid_block(blk, min_alpha); + return; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + const uint32_t SINGLE_TABLE_THRESH = 5; + if (alpha_range <= SINGLE_TABLE_THRESH) + { + // If alpha_range <= 5 table 13 is lossless + int base = clamp255((int)max_alpha - 2); + + blk.m_base = base; + blk.m_multiplier = 1; + blk.m_table = 13; + + base -= 3; + + uint64_t packed_sels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 }; + + int sel = a - base; + assert(sel >= 0 && sel <= 5); + + packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]); + } + + blk.set_selector_bits(packed_sels); + + return; + } + + const uint32_t T0 = 2, T1 = 8, T2 = 11, T3 = 13; + static const uint8_t s_tables[4] = { T0, T1, T2, T3 }; + + int base[4], mul[4]; + uint32_t mul_or = 0; + for (uint32_t i = 0; i < 4; i++) + { + const uint32_t table = s_tables[i]; + + const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + + base[i] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range))); + mul[i] = clampi((int)roundf(alpha_range / range), 1, 15); + mul_or |= mul[i]; + } + + uint32_t total_err[4] = { 0, 0, 0, 0 }; + uint8_t sels[4][16]; + + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + uint32_t l0 = UINT32_MAX, l1 = UINT32_MAX, l2 = UINT32_MAX, l3 = UINT32_MAX; + + if ((a < 7) || (a > (255 - 7))) + { + for (uint32_t s = 0; s < 8; s++) + { + const int v0 = clamp255(mul[0] * g_eac_modifier_table[T0][s] + base[0]); + const int v1 = clamp255(mul[1] * g_eac_modifier_table[T1][s] + base[1]); + const int v2 = clamp255(mul[2] * g_eac_modifier_table[T2][s] + base[2]); + const int v3 = clamp255(mul[3] * g_eac_modifier_table[T3][s] + base[3]); + + l0 = basisu::minimum(l0, (basisu::iabs(v0 - a) << 3) | s); + l1 = basisu::minimum(l1, (basisu::iabs(v1 - a) << 3) | s); + l2 = basisu::minimum(l2, (basisu::iabs(v2 - a) << 3) | s); + l3 = basisu::minimum(l3, (basisu::iabs(v3 - a) << 3) | s); + } + } + else if (mul_or == 1) + { + const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a; + + for (uint32_t s = 0; s < 8; s++) + { + const int v0 = g_eac_modifier_table[T0][s] + a0; + const int v1 = g_eac_modifier_table[T1][s] + a1; + const int v2 = g_eac_modifier_table[T2][s] + a2; + const int v3 = g_eac_modifier_table[T3][s] + a3; + + l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s); + l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s); + l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s); + l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s); + } + } + else + { + const int a0 = base[0] - a, a1 = base[1] - a, a2 = base[2] - a, a3 = base[3] - a; + + for (uint32_t s = 0; s < 8; s++) + { + const int v0 = mul[0] * g_eac_modifier_table[T0][s] + a0; + const int v1 = mul[1] * g_eac_modifier_table[T1][s] + a1; + const int v2 = mul[2] * g_eac_modifier_table[T2][s] + a2; + const int v3 = mul[3] * g_eac_modifier_table[T3][s] + a3; + + l0 = basisu::minimum(l0, (basisu::iabs(v0) << 3) | s); + l1 = basisu::minimum(l1, (basisu::iabs(v1) << 3) | s); + l2 = basisu::minimum(l2, (basisu::iabs(v2) << 3) | s); + l3 = basisu::minimum(l3, (basisu::iabs(v3) << 3) | s); + } + } + + sels[0][i] = l0 & 7; + sels[1][i] = l1 & 7; + sels[2][i] = l2 & 7; + sels[3][i] = l3 & 7; + + total_err[0] += basisu::square<uint32_t>(l0 >> 3); + total_err[1] += basisu::square<uint32_t>(l1 >> 3); + total_err[2] += basisu::square<uint32_t>(l2 >> 3); + total_err[3] += basisu::square<uint32_t>(l3 >> 3); + } + + uint32_t min_err = total_err[0], min_index = 0; + for (uint32_t i = 1; i < 4; i++) + { + if (total_err[i] < min_err) + { + min_err = total_err[i]; + min_index = i; + } + } + + blk.m_base = base[min_index]; + blk.m_multiplier = mul[min_index]; + blk.m_table = s_tables[min_index]; + + uint64_t packed_sels = 0; + const uint8_t* pSels = &sels[min_index][0]; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]); + + blk.set_selector_bits(packed_sels); + } + + // Checks all 16 tables. Around ~2 dB better vs. pack_eac(), ~1.2 dB less than near-optimal. + static void pack_eac_high_quality(eac_block& blk, const uint8_t* pPixels, uint32_t stride) + { + uint32_t min_alpha = 255, max_alpha = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t a = pPixels[i * stride]; + if (a < min_alpha) min_alpha = a; + if (a > max_alpha) max_alpha = a; + } + + if (min_alpha == max_alpha) + { + pack_eac_solid_block(blk, min_alpha); + return; + } + + const uint32_t alpha_range = max_alpha - min_alpha; + + const uint32_t SINGLE_TABLE_THRESH = 5; + if (alpha_range <= SINGLE_TABLE_THRESH) + { + // If alpha_range <= 5 table 13 is lossless + int base = clamp255((int)max_alpha - 2); + + blk.m_base = base; + blk.m_multiplier = 1; + blk.m_table = 13; + + base -= 3; + + uint64_t packed_sels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + static const uint8_t s_sels[6] = { 2, 1, 0, 4, 5, 6 }; + + int sel = a - base; + assert(sel >= 0 && sel <= 5); + + packed_sels |= (static_cast<uint64_t>(s_sels[sel]) << s_etc2_eac_bit_ofs[i]); + } + + blk.set_selector_bits(packed_sels); + + return; + } + + int base[16], mul[16]; + for (uint32_t table = 0; table < 16; table++) + { + const float range = (float)(g_eac_modifier_table[table][ETC2_EAC_MAX_VALUE_SELECTOR] - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]); + + base[table] = clamp255((int)roundf(basisu::lerp((float)min_alpha, (float)max_alpha, (float)(0 - g_eac_modifier_table[table][ETC2_EAC_MIN_VALUE_SELECTOR]) / range))); + mul[table] = clampi((int)roundf(alpha_range / range), 1, 15); + } + + uint32_t total_err[16]; + memset(total_err, 0, sizeof(total_err)); + + uint8_t sels[16][16]; + + for (uint32_t table = 0; table < 16; table++) + { + const int8_t* pTable = &g_eac_modifier_table[table][0]; + const int m = mul[table], b = base[table]; + + uint32_t prev_l = 0, prev_a = UINT32_MAX; + + for (uint32_t i = 0; i < 16; i++) + { + const int a = pPixels[i * stride]; + + if ((uint32_t)a == prev_a) + { + sels[table][i] = prev_l & 7; + total_err[table] += basisu::square<uint32_t>(prev_l >> 3); + } + else + { + uint32_t l = basisu::iabs(clamp255(m * pTable[0] + b) - a) << 3; + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[1] + b) - a) << 3) | 1); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[2] + b) - a) << 3) | 2); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[3] + b) - a) << 3) | 3); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[4] + b) - a) << 3) | 4); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[5] + b) - a) << 3) | 5); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[6] + b) - a) << 3) | 6); + l = basisu::minimum(l, (basisu::iabs(clamp255(m * pTable[7] + b) - a) << 3) | 7); + + sels[table][i] = l & 7; + total_err[table] += basisu::square<uint32_t>(l >> 3); + + prev_l = l; + prev_a = a; + } + } + } + + uint32_t min_err = total_err[0], min_index = 0; + for (uint32_t i = 1; i < 16; i++) + { + if (total_err[i] < min_err) + { + min_err = total_err[i]; + min_index = i; + } + } + + blk.m_base = base[min_index]; + blk.m_multiplier = mul[min_index]; + blk.m_table = min_index; + + uint64_t packed_sels = 0; + const uint8_t* pSels = &sels[min_index][0]; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= (static_cast<uint64_t>(pSels[i]) << s_etc2_eac_bit_ofs[i]); + + blk.set_selector_bits(packed_sels); + } + + bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0) + { + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) + { + pack_eac_solid_block(*static_cast<eac_block*>(pDst), unpacked_src_blk.m_solid_color.c[chan0]); return true; - default: - break; } - return false; + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + if (chan0 == 3) + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, pDst); + else + (high_quality ? pack_eac_high_quality : pack_eac)(*static_cast<eac_block*>(pDst), &block_pixels[0][0].c[chan0], sizeof(color32)); + + return true; } - bool basis_block_format_is_uncompressed(block_format tex_type) + bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1) { - switch (tex_type) + unpacked_uastc_block unpacked_src_blk; + if (!unpack_uastc(src_blk, unpacked_src_blk, false)) + return false; + + const uint32_t mode = unpacked_src_blk.m_mode; + + if (mode == UASTC_MODE_INDEX_SOLID_COLOR) { - case block_format::cRGB32: - case block_format::cRGBA32: - case block_format::cA32: - case block_format::cRGB565: - case block_format::cBGR565: - case block_format::cRGBA4444_COLOR: - case block_format::cRGBA4444_ALPHA: - case block_format::cRGBA4444_COLOR_OPAQUE: + pack_eac_solid_block(static_cast<eac_block*>(pDst)[0], unpacked_src_blk.m_solid_color.c[chan0]); + pack_eac_solid_block(static_cast<eac_block*>(pDst)[1], unpacked_src_blk.m_solid_color.c[chan1]); return true; - default: - break; } - return false; + + color32 block_pixels[4][4]; + const bool unpack_srgb = false; + if (!unpack_uastc(unpacked_src_blk, &block_pixels[0][0], unpack_srgb)) + return false; + + if (chan0 == 3) + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[0]); + else + (high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[0], &block_pixels[0][0].c[chan0], sizeof(color32)); + + if (chan1 == 3) + transcode_uastc_to_etc2_eac_a8(unpacked_src_blk, block_pixels, &static_cast<eac_block*>(pDst)[1]); + else + (high_quality ? pack_eac_high_quality : pack_eac)(static_cast<eac_block*>(pDst)[1], &block_pixels[0][0].c[chan1], sizeof(color32)); + return true; } - - uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt) + + // PVRTC1 + static void fixup_pvrtc1_4_modulation_rgb( + const uastc_block* pSrc_blocks, + const uint32_t* pPVRTC_endpoints, + void* pDst_blocks, + uint32_t num_blocks_x, uint32_t num_blocks_y, bool from_alpha) { - switch (fmt) + const uint32_t x_mask = num_blocks_x - 1; + const uint32_t y_mask = num_blocks_y - 1; + const uint32_t x_bits = basisu::total_bits(x_mask); + const uint32_t y_bits = basisu::total_bits(y_mask); + const uint32_t min_bits = basisu::minimum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; + + uint32_t block_index = 0; + + // really 3x3 + int e0[4][4], e1[4][4]; + + for (int y = 0; y < static_cast<int>(num_blocks_y); y++) { - case transcoder_texture_format::cTFRGBA32: - return sizeof(uint32_t); - case transcoder_texture_format::cTFRGB565: - case transcoder_texture_format::cTFBGR565: - case transcoder_texture_format::cTFRGBA4444: - return sizeof(uint16_t); - default: - break; + const uint32_t* pE_rows[3]; + + for (int ey = 0; ey < 3; ey++) + { + int by = y + ey - 1; + + const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; + + pE_rows[ey] = pE; + + for (int ex = 0; ex < 3; ex++) + { + int bx = 0 + ex - 1; + + const uint32_t e = pE[bx & x_mask]; + + e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; + e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; + } + } + + const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF]; + + for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++) + { + const uastc_block& src_block = pSrc_blocks[block_index]; + + color32 block_pixels[4][4]; + unpack_uastc(src_block, &block_pixels[0][0], false); + if (from_alpha) + { + // Just set RGB to alpha to avoid adding complexity below. + for (uint32_t i = 0; i < 16; i++) + { + const uint8_t a = ((color32*)block_pixels)[i].a; + ((color32*)block_pixels)[i].set(a, a, a, 255); + } + } + + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); + + uint32_t swizzled = x_swizzle | y_swizzle; + if (num_blocks_x != num_blocks_y) + { + swizzled &= swizzle_mask; + + if (num_blocks_x > num_blocks_y) + swizzled |= ((x >> min_bits) << (min_bits * 2)); + else + swizzled |= ((y >> min_bits) << (min_bits * 2)); + } + + pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled; + pDst_block->m_endpoints = pPVRTC_endpoints[block_index]; + + { + const uint32_t ex = 2; + int bx = x + ex - 1; + bx &= x_mask; + +#define DO_ROW(ey) \ + { \ + const uint32_t e = pE_rows[ey][bx]; \ + e0[ex][ey] = (get_opaque_endpoint_l0(e) * 255) / 31; \ + e1[ex][ey] = (get_opaque_endpoint_l1(e) * 255) / 31; \ + } + + DO_ROW(0); + DO_ROW(1); + DO_ROW(2); +#undef DO_ROW + } + + uint32_t mod = 0; + +#define DO_PIX(lx, ly, w0, w1, w2, w3) \ + { \ + int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \ + int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \ + int cl = (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b) * 16; \ + int dl = cb_l - ca_l; \ + int vl = cl - ca_l; \ + int p = vl * 16; \ + if (ca_l > cb_l) { p = -p; dl = -dl; } \ + uint32_t m = 0; \ + if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \ + if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \ + if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \ + mod |= m; \ + } + + { + const uint32_t ex = 0, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 0, 4, 4, 4, 4); + DO_PIX(1, 0, 2, 6, 2, 6); + DO_PIX(0, 1, 2, 2, 6, 6); + DO_PIX(1, 1, 1, 3, 3, 9); + } + + { + const uint32_t ex = 1, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 0, 8, 0, 8, 0); + DO_PIX(3, 0, 6, 2, 6, 2); + DO_PIX(2, 1, 4, 0, 12, 0); + DO_PIX(3, 1, 3, 1, 9, 3); + } + + { + const uint32_t ex = 0, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 2, 8, 8, 0, 0); + DO_PIX(1, 2, 4, 12, 0, 0); + DO_PIX(0, 3, 6, 6, 2, 2); + DO_PIX(1, 3, 3, 9, 1, 3); + } + + { + const uint32_t ex = 1, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 2, 16, 0, 0, 0); + DO_PIX(3, 2, 12, 4, 0, 0); + DO_PIX(2, 3, 12, 0, 4, 0); + DO_PIX(3, 3, 9, 3, 3, 1); + } +#undef DO_PIX + + pDst_block->m_modulation = mod; + + e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0]; + e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1]; + e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2]; + + e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0]; + e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1]; + e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2]; + + } // x + } // y + } + + static void fixup_pvrtc1_4_modulation_rgba( + const uastc_block* pSrc_blocks, + const uint32_t* pPVRTC_endpoints, + void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y) + { + const uint32_t x_mask = num_blocks_x - 1; + const uint32_t y_mask = num_blocks_y - 1; + const uint32_t x_bits = basisu::total_bits(x_mask); + const uint32_t y_bits = basisu::total_bits(y_mask); + const uint32_t min_bits = basisu::minimum(x_bits, y_bits); + //const uint32_t max_bits = basisu::maximum(x_bits, y_bits); + const uint32_t swizzle_mask = (1 << (min_bits * 2)) - 1; + + uint32_t block_index = 0; + + // really 3x3 + int e0[4][4], e1[4][4]; + + for (int y = 0; y < static_cast<int>(num_blocks_y); y++) + { + const uint32_t* pE_rows[3]; + + for (int ey = 0; ey < 3; ey++) + { + int by = y + ey - 1; + + const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; + + pE_rows[ey] = pE; + + for (int ex = 0; ex < 3; ex++) + { + int bx = 0 + ex - 1; + + const uint32_t e = pE[bx & x_mask]; + + e0[ex][ey] = get_endpoint_l8(e, 0); + e1[ex][ey] = get_endpoint_l8(e, 1); + } + } + + const uint32_t y_swizzle = (g_pvrtc_swizzle_table[y >> 8] << 16) | g_pvrtc_swizzle_table[y & 0xFF]; + + for (int x = 0; x < static_cast<int>(num_blocks_x); x++, block_index++) + { + const uastc_block& src_block = pSrc_blocks[block_index]; + + color32 block_pixels[4][4]; + unpack_uastc(src_block, &block_pixels[0][0], false); + + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); + + uint32_t swizzled = x_swizzle | y_swizzle; + if (num_blocks_x != num_blocks_y) + { + swizzled &= swizzle_mask; + + if (num_blocks_x > num_blocks_y) + swizzled |= ((x >> min_bits) << (min_bits * 2)); + else + swizzled |= ((y >> min_bits) << (min_bits * 2)); + } + + pvrtc4_block* pDst_block = static_cast<pvrtc4_block*>(pDst_blocks) + swizzled; + pDst_block->m_endpoints = pPVRTC_endpoints[block_index]; + + { + const uint32_t ex = 2; + int bx = x + ex - 1; + bx &= x_mask; + +#define DO_ROW(ey) \ + { \ + const uint32_t e = pE_rows[ey][bx]; \ + e0[ex][ey] = get_endpoint_l8(e, 0); \ + e1[ex][ey] = get_endpoint_l8(e, 1); \ + } + + DO_ROW(0); + DO_ROW(1); + DO_ROW(2); +#undef DO_ROW + } + + uint32_t mod = 0; + +#define DO_PIX(lx, ly, w0, w1, w2, w3) \ + { \ + int ca_l = a0 * w0 + a1 * w1 + a2 * w2 + a3 * w3; \ + int cb_l = b0 * w0 + b1 * w1 + b2 * w2 + b3 * w3; \ + int cl = 16 * (block_pixels[ly][lx].r + block_pixels[ly][lx].g + block_pixels[ly][lx].b + block_pixels[ly][lx].a); \ + int dl = cb_l - ca_l; \ + int vl = cl - ca_l; \ + int p = vl * 16; \ + if (ca_l > cb_l) { p = -p; dl = -dl; } \ + uint32_t m = 0; \ + if (p > 3 * dl) m = (uint32_t)(1 << ((ly) * 8 + (lx) * 2)); \ + if (p > 8 * dl) m = (uint32_t)(2 << ((ly) * 8 + (lx) * 2)); \ + if (p > 13 * dl) m = (uint32_t)(3 << ((ly) * 8 + (lx) * 2)); \ + mod |= m; \ + } + + { + const uint32_t ex = 0, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 0, 4, 4, 4, 4); + DO_PIX(1, 0, 2, 6, 2, 6); + DO_PIX(0, 1, 2, 2, 6, 6); + DO_PIX(1, 1, 1, 3, 3, 9); + } + + { + const uint32_t ex = 1, ey = 0; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 0, 8, 0, 8, 0); + DO_PIX(3, 0, 6, 2, 6, 2); + DO_PIX(2, 1, 4, 0, 12, 0); + DO_PIX(3, 1, 3, 1, 9, 3); + } + + { + const uint32_t ex = 0, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(0, 2, 8, 8, 0, 0); + DO_PIX(1, 2, 4, 12, 0, 0); + DO_PIX(0, 3, 6, 6, 2, 2); + DO_PIX(1, 3, 3, 9, 1, 3); + } + + { + const uint32_t ex = 1, ey = 1; + const int a0 = e0[ex][ey], a1 = e0[ex + 1][ey], a2 = e0[ex][ey + 1], a3 = e0[ex + 1][ey + 1]; + const int b0 = e1[ex][ey], b1 = e1[ex + 1][ey], b2 = e1[ex][ey + 1], b3 = e1[ex + 1][ey + 1]; + DO_PIX(2, 2, 16, 0, 0, 0); + DO_PIX(3, 2, 12, 4, 0, 0); + DO_PIX(2, 3, 12, 0, 4, 0); + DO_PIX(3, 3, 9, 3, 3, 1); + } +#undef DO_PIX + + pDst_block->m_modulation = mod; + + e0[0][0] = e0[1][0]; e0[1][0] = e0[2][0]; + e0[0][1] = e0[1][1]; e0[1][1] = e0[2][1]; + e0[0][2] = e0[1][2]; e0[1][2] = e0[2][2]; + + e1[0][0] = e1[1][0]; e1[1][0] = e1[2][0]; + e1[0][1] = e1[1][1]; e1[1][1] = e1[2][1]; + e1[0][2] = e1[1][2]; e1[1][2] = e1[2][2]; + + } // x + } // y + } + + bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha) + { + BASISU_NOTE_UNUSED(high_quality); + + if ((!num_blocks_x) || (!num_blocks_y)) + return false; + + const uint32_t width = num_blocks_x * 4; + const uint32_t height = num_blocks_y * 4; + if (!basisu::is_pow2(width) || !basisu::is_pow2(height)) + return false; + + basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y); + + for (uint32_t y = 0; y < num_blocks_y; y++) + { + for (uint32_t x = 0; x < num_blocks_x; x++) + { + color32 block_pixels[16]; + if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) + return false; + + // Get block's RGB bounding box + color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); + + if (from_alpha) + { + uint32_t low_a = 255, high_a = 0; + for (uint32_t i = 0; i < 16; i++) + { + low_a = basisu::minimum<uint32_t>(low_a, block_pixels[i].a); + high_a = basisu::maximum<uint32_t>(high_a, block_pixels[i].a); + } + low_color.set(low_a, low_a, low_a, 255); + high_color.set(high_a, high_a, high_a, 255); + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + low_color = color32::comp_min(low_color, block_pixels[i]); + high_color = color32::comp_max(high_color, block_pixels[i]); + } + } + + // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates. + pvrtc4_block temp; + temp.set_opaque_endpoint_floor(0, low_color); + temp.set_opaque_endpoint_ceil(1, high_color); + + temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints; + } } - return 0; + + fixup_pvrtc1_4_modulation_rgb(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y, from_alpha); + + return true; } - - uint32_t basis_get_block_width(transcoder_texture_format tex_type) + + bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality) { - switch (tex_type) + BASISU_NOTE_UNUSED(high_quality); + + if ((!num_blocks_x) || (!num_blocks_y)) + return false; + + const uint32_t width = num_blocks_x * 4; + const uint32_t height = num_blocks_y * 4; + if (!basisu::is_pow2(width) || !basisu::is_pow2(height)) + return false; + + basisu::vector<uint32_t> temp_endpoints(num_blocks_x * num_blocks_y); + + for (uint32_t y = 0; y < num_blocks_y; y++) { - case transcoder_texture_format::cTFFXT1_RGB: - return 8; - default: + for (uint32_t x = 0; x < num_blocks_x; x++) + { + color32 block_pixels[16]; + if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) + return false; + + // Get block's RGBA bounding box + color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); + + for (uint32_t i = 0; i < 16; i++) + { + low_color = color32::comp_min(low_color, block_pixels[i]); + high_color = color32::comp_max(high_color, block_pixels[i]); + } + + // Set PVRTC1 endpoints to floor/ceil of bounding box's coordinates. + pvrtc4_block temp; + temp.set_endpoint_floor(0, low_color); + temp.set_endpoint_ceil(1, high_color); + + temp_endpoints[x + y * num_blocks_x] = temp.m_endpoints; + } + } + + fixup_pvrtc1_4_modulation_rgba(pSrc_blocks, &temp_endpoints[0], pDst_blocks, num_blocks_x, num_blocks_y); + + return true; + } + + void uastc_init() + { + for (uint32_t range = 0; range < BC7ENC_TOTAL_ASTC_RANGES; range++) + { + if (!astc_is_valid_endpoint_range(range)) + continue; + + const uint32_t levels = astc_get_levels(range); + + uint32_t vals[256]; + for (uint32_t i = 0; i < levels; i++) + vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i; + + std::sort(vals, vals + levels); + + for (uint32_t i = 0; i < levels; i++) + { + const uint32_t order = vals[i] & 0xFF; + const uint32_t unq = vals[i] >> 8; + + g_astc_unquant[range][order].m_unquant = (uint8_t)unq; + g_astc_unquant[range][order].m_index = (uint8_t)i; + + } // i + } + + // TODO: Precompute? + // BC7 777.1 + for (int c = 0; c < 256; c++) + { + for (uint32_t lp = 0; lp < 2; lp++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + + for (uint32_t l = 0; l < 128; l++) + { + const uint32_t low = (l << 1) | lp; + + for (uint32_t h = 0; h < 128; h++) + { + const uint32_t high = (h << 1) | lp; + + const int k = (low * (64 - g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX]) + high * g_bc7_weights4[BC7ENC_MODE_6_OPTIMAL_INDEX] + 32) >> 6; + + const int err = (k - c) * (k - c); + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_bc7_mode_6_optimal_endpoints[c][lp] = best; + } // lp + + } // c + + // BC7 777 + for (int c = 0; c < 256; c++) + { + endpoint_err best; + best.m_error = (uint16_t)UINT16_MAX; + + for (uint32_t l = 0; l < 128; l++) + { + const uint32_t low = (l << 1) | (l >> 6); + + for (uint32_t h = 0; h < 128; h++) + { + const uint32_t high = (h << 1) | (h >> 6); + + const int k = (low * (64 - g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7ENC_MODE_5_OPTIMAL_INDEX] + 32) >> 6; + + const int err = (k - c) * (k - c); + if (err < best.m_error) + { + best.m_error = (uint16_t)err; + best.m_lo = (uint8_t)l; + best.m_hi = (uint8_t)h; + } + } // h + } // l + + g_bc7_mode_5_optimal_endpoints[c] = best; + + } // c + } + +#endif // #if BASISD_SUPPORT_UASTC + +// ------------------------------------------------------------------------------------------------------ +// KTX2 +// ------------------------------------------------------------------------------------------------------ + +#if BASISD_SUPPORT_KTX2 + const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; + + ktx2_transcoder::ktx2_transcoder(basist::etc1_global_selector_codebook* pGlobal_sel_codebook) : + m_etc1s_transcoder(pGlobal_sel_codebook) + { + clear(); + } + + void ktx2_transcoder::clear() + { + m_pData = nullptr; + m_data_size = 0; + + memset(&m_header, 0, sizeof(m_header)); + m_levels.clear(); + m_dfd.clear(); + m_key_values.clear(); + memset(&m_etc1s_header, 0, sizeof(m_etc1s_header)); + m_etc1s_image_descs.clear(); + + m_format = basist::basis_tex_format::cETC1S; + + m_dfd_color_model = 0; + m_dfd_color_prims = KTX2_DF_PRIMARIES_UNSPECIFIED; + m_dfd_transfer_func = 0; + m_dfd_flags = 0; + m_dfd_samples = 0; + m_dfd_chan0 = KTX2_DF_CHANNEL_UASTC_RGB; + m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB; + + m_etc1s_transcoder.clear(); + + m_def_transcoder_state.clear(); + + m_has_alpha = false; + m_is_video = false; + } + + bool ktx2_transcoder::init(const void* pData, uint32_t data_size) + { + clear(); + + if (!pData) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: pData is nullptr\n"); + assert(0); + return false; + } + + if (data_size <= sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is impossibly too small to be a valid KTX2 file\n"); + return false; + } + + if (memcmp(pData, g_ktx2_file_identifier, sizeof(g_ktx2_file_identifier)) != 0) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file identifier is not present\n"); + return false; + } + + m_pData = static_cast<const uint8_t *>(pData); + m_data_size = data_size; + + memcpy(&m_header, pData, sizeof(m_header)); + + // We only support UASTC and ETC1S + if (m_header.m_vk_format != KTX2_VK_FORMAT_UNDEFINED) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: KTX2 file must be in ETC1S or UASTC format\n"); + return false; + } + + // 3.3: "When format is VK_FORMAT_UNDEFINED, typeSize must equal 1." + if (m_header.m_type_size != 1) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid type_size\n"); + return false; + } + + // We only currently support 2D textures (plain, cubemapped, or texture array), which is by far the most common use case. + // The BasisU library does not support 1D or 3D textures at all. + if ((m_header.m_pixel_width < 1) || (m_header.m_pixel_height < 1) || (m_header.m_pixel_depth > 0)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Only 2D or cubemap textures are supported\n"); + return false; + } + + // Face count must be 1 or 6 + if ((m_header.m_face_count != 1) && (m_header.m_face_count != 6)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid face count, file is corrupted or invalid\n"); + return false; + } + + if (m_header.m_face_count > 1) + { + // 3.4: Make sure cubemaps are square. + if (m_header.m_pixel_width != m_header.m_pixel_height) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Cubemap is not square\n"); + return false; + } + } + + // 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats" + if (m_header.m_level_count < 1) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level count\n"); + return false; + } + + // Sanity check the level count. + if (m_header.m_level_count > KTX2_MAX_SUPPORTED_LEVEL_COUNT) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Too many levels or file is corrupted or invalid\n"); + return false; + } + + if (m_header.m_supercompression_scheme > KTX2_SS_ZSTANDARD) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid/unsupported supercompression or file is corrupted or invalid\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + { + if (m_header.m_sgd_byte_length <= sizeof(ktx2_etc1s_global_data_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data is too small\n"); + return false; + } + + if (m_header.m_sgd_byte_offset < sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset is too low\n"); + return false; + } + + if (m_header.m_sgd_byte_offset + m_header.m_sgd_byte_length > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Supercompression global data offset and/or length is too high\n"); + return false; + } + } + + if (!m_levels.try_resize(m_header.m_level_count)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n"); + return false; + } + + const uint32_t level_index_size_in_bytes = basisu::maximum(1U, (uint32_t)m_header.m_level_count) * sizeof(ktx2_level_index); + + if ((sizeof(ktx2_header) + level_index_size_in_bytes) > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: File is too small (can't read level index array)\n"); + return false; + } + + memcpy(&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes); + + // Sanity check the level offsets and byte sizes + for (uint32_t i = 0; i < m_levels.size(); i++) + { + if (m_levels[i].m_byte_offset < sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too low)\n"); + return false; + } + + if (!m_levels[i].m_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level byte length\n"); + } + + if ((m_levels[i].m_byte_offset + m_levels[i].m_byte_length) > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n"); + return false; + } + + const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL; + + if (m_levels[i].m_uncompressed_byte_length >= MAX_SANE_LEVEL_UNCOMP_SIZE) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + { + if (m_levels[i].m_uncompressed_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (0)\n"); + return false; + } + } + else if (m_header.m_supercompression_scheme >= KTX2_SS_ZSTANDARD) + { + if (!m_levels[i].m_uncompressed_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid uncompressed length (1)\n"); + return false; + } + } + } + + const uint32_t DFD_MINIMUM_SIZE = 44, DFD_MAXIMUM_SIZE = 60; + if ((m_header.m_dfd_byte_length != DFD_MINIMUM_SIZE) && (m_header.m_dfd_byte_length != DFD_MAXIMUM_SIZE)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD size\n"); + return false; + } + + if (((m_header.m_dfd_byte_offset + m_header.m_dfd_byte_length) > m_data_size) || (m_header.m_dfd_byte_offset < sizeof(ktx2_header))) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n"); + return false; + } + + const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset; + + if (!m_dfd.try_resize(m_header.m_dfd_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Out of memory\n"); + return false; + } + + memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length); + + // This is all hard coded for only ETC1S and UASTC. + uint32_t dfd_total_size = basisu::read_le_dword(pDFD); + + // 3.10.3: Sanity check + if (dfd_total_size != m_header.m_dfd_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n"); + return false; + } + + // 3.10.3: More sanity checking + if (m_header.m_kvd_byte_length) + { + if (dfd_total_size != m_header.m_kvd_byte_offset - m_header.m_dfd_byte_offset) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (2)\n"); + return false; + } + } + + const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t)); + const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t)); + + m_dfd_color_model = dfd_bits & 255; + m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255); + m_dfd_transfer_func = (dfd_bits >> 16) & 255; + m_dfd_flags = (dfd_bits >> 24) & 255; + + // See 3.10.1.Restrictions + if ((m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_LINEAR) && (m_dfd_transfer_func != KTX2_KHR_DF_TRANSFER_SRGB)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD transfer function\n"); + return false; + } + + if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S) + { + m_format = basist::basis_tex_format::cETC1S; + + // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD’s sample count." + // If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that. + m_has_alpha = (m_header.m_dfd_byte_length == 60); + + m_dfd_samples = m_has_alpha ? 2 : 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + if (m_has_alpha) + { + const uint32_t sample_channel1 = basisu::read_le_dword(pDFD + 11 * sizeof(uint32_t)); + m_dfd_chan1 = (ktx2_df_channel_id)((sample_channel1 >> 24) & 15); + } + } + else if (m_dfd_color_model == KTX2_KDF_DF_MODEL_UASTC) + { + m_format = basist::basis_tex_format::cUASTC4x4; + + m_dfd_samples = 1; + m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); + + // We're assuming "DATA" means RGBA so it has alpha. + m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); + } + else + { + // Unsupported DFD color model. + BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n"); + return false; + } + + if (!read_key_values()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n"); + return false; + } + + // Check for a KTXanimData key + for (uint32_t i = 0; i < m_key_values.size(); i++) + { + if (strcmp(reinterpret_cast<const char*>(m_key_values[i].m_key.data()), "KTXanimData") == 0) + { + m_is_video = true; break; + } } - return 4; + + return true; } - uint32_t basis_get_block_height(transcoder_texture_format tex_type) + uint32_t ktx2_transcoder::get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const { - (void)tex_type; - return 4; + const uint32_t etc1s_image_index = + (level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + if (etc1s_image_index >= get_etc1s_image_descs().size()) + { + assert(0); + return 0; + } + + return get_etc1s_image_descs()[etc1s_image_index].m_image_flags; + } + + const basisu::uint8_vec* ktx2_transcoder::find_key(const std::string& key_name) const + { + for (uint32_t i = 0; i < m_key_values.size(); i++) + if (strcmp((const char *)m_key_values[i].m_key.data(), key_name.c_str()) == 0) + return &m_key_values[i].m_value; + + return nullptr; } - bool basis_is_format_supported(transcoder_texture_format tex_type) + bool ktx2_transcoder::start_transcoding() { - switch (tex_type) + if (!m_pData) { - // ETC1 and uncompressed are always supported. - case transcoder_texture_format::cTFETC1_RGB: - case transcoder_texture_format::cTFRGBA32: - case transcoder_texture_format::cTFRGB565: - case transcoder_texture_format::cTFBGR565: - case transcoder_texture_format::cTFRGBA4444: - return true; -#if BASISD_SUPPORT_DXT1 - case transcoder_texture_format::cTFBC1_RGB: - return true; -#endif -#if BASISD_SUPPORT_DXT5A - case transcoder_texture_format::cTFBC4_R: - case transcoder_texture_format::cTFBC5_RG: - return true; -#endif -#if BASISD_SUPPORT_DXT1 && BASISD_SUPPORT_DXT5A - case transcoder_texture_format::cTFBC3_RGBA: - return true; -#endif -#if BASISD_SUPPORT_PVRTC1 - case transcoder_texture_format::cTFPVRTC1_4_RGB: - case transcoder_texture_format::cTFPVRTC1_4_RGBA: - return true; -#endif -#if BASISD_SUPPORT_BC7_MODE6_OPAQUE_ONLY - case transcoder_texture_format::cTFBC7_M6_RGB: - return true; -#endif -#if BASISD_SUPPORT_BC7_MODE5 - case transcoder_texture_format::cTFBC7_M5_RGBA: - return true; -#endif -#if BASISD_SUPPORT_ETC2_EAC_A8 - case transcoder_texture_format::cTFETC2_RGBA: - return true; -#endif -#if BASISD_SUPPORT_ASTC - case transcoder_texture_format::cTFASTC_4x4_RGBA: - return true; -#endif -#if BASISD_SUPPORT_ATC - case transcoder_texture_format::cTFATC_RGB: - case transcoder_texture_format::cTFATC_RGBA: - return true; -#endif -#if BASISD_SUPPORT_FXT1 - case transcoder_texture_format::cTFFXT1_RGB: - return true; + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: Must call init() first\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + { + // Check if we've already decompressed the ETC1S global data. If so don't unpack it again. + if (!m_etc1s_transcoder.get_endpoints().empty()) + return true; + + if (!decompress_etc1s_global_data()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n"); + return false; + } + + if (!m_is_video) + { + // See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key. + // Video cannot be a cubemap, and it must be a texture array. + if ((m_header.m_face_count == 1) && (m_header.m_layer_count > 1)) + { + for (uint32_t i = 0; i < m_etc1s_image_descs.size(); i++) + { + if (m_etc1s_image_descs[i].m_image_flags & KTX2_IMAGE_IS_P_FRAME) + { + m_is_video = true; + break; + } + } + } + } + } + else if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) + { +#if !BASISD_SUPPORT_KTX2_ZSTD + BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: File uses zstd supercompression, but zstd support was not enabled at compilation time (BASISD_SUPPORT_KTX2_ZSTD == 0)\n"); + return false; #endif -#if BASISD_SUPPORT_PVRTC2 - case transcoder_texture_format::cTFPVRTC2_4_RGB: - case transcoder_texture_format::cTFPVRTC2_4_RGBA: - return true; + } + + return true; + } + + bool ktx2_transcoder::get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const + { + if (level_index >= m_levels.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: level_index >= m_levels.size()\n"); + return false; + } + + if (m_header.m_face_count > 1) + { + if (face_index >= 6) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index >= 6\n"); + return false; + } + } + else if (face_index != 0) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: face_index != 0\n"); + return false; + } + + if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n"); + return false; + } + + const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1); + const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1); + const uint32_t num_blocks_x = (level_width + 3) >> 2; + const uint32_t num_blocks_y = (level_height + 3) >> 2; + + level_info.m_face_index = face_index; + level_info.m_layer_index = layer_index; + level_info.m_level_index = level_index; + level_info.m_orig_width = level_width; + level_info.m_orig_height = level_height; + level_info.m_width = num_blocks_x * 4; + level_info.m_height = num_blocks_y * 4; + level_info.m_num_blocks_x = num_blocks_x; + level_info.m_num_blocks_y = num_blocks_y; + level_info.m_total_blocks = num_blocks_x * num_blocks_y; + level_info.m_alpha_flag = m_has_alpha; + level_info.m_iframe_flag = false; + if (m_etc1s_image_descs.size()) + { + const uint32_t etc1s_image_index = + (level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + level_info.m_iframe_flag = (m_etc1s_image_descs[etc1s_image_index].m_image_flags & KTX2_IMAGE_IS_P_FRAME) == 0; + } + + return true; + } + + bool ktx2_transcoder::transcode_image_level( + uint32_t level_index, uint32_t layer_index, uint32_t face_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + basist::transcoder_texture_format fmt, + uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1, + ktx2_transcoder_state* pState) + { + if (!m_pData) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Must call init() first\n"); + return false; + } + + if (!pState) + pState = &m_def_transcoder_state; + + if (level_index >= m_levels.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n"); + return false; + } + + if (m_header.m_face_count > 1) + { + if (face_index >= 6) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index >= 6\n"); + return false; + } + } + else if (face_index != 0) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: face_index != 0\n"); + return false; + } + + if (layer_index >= basisu::maximum<uint32_t>(m_header.m_layer_count, 1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: layer_index >= maximum<uint32_t>(m_header.m_layer_count, 1)\n"); + return false; + } + + const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset; + uint64_t comp_level_data_size = m_levels[level_index].m_byte_length; + + const uint8_t* pUncomp_level_data = pComp_level_data; + uint64_t uncomp_level_data_size = comp_level_data_size; + + if (uncomp_level_data_size > UINT32_MAX) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) + { + // Check if we've already decompressed this level's supercompressed data. + if ((int)level_index != pState->m_uncomp_data_level_index) + { + // Uncompress the entire level's supercompressed data. + if (!decompress_level_data(level_index, pState->m_level_uncomp_data)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: decompress_level_data() failed\n"); + return false; + } + pState->m_uncomp_data_level_index = level_index; + } + + pUncomp_level_data = pState->m_level_uncomp_data.data(); + uncomp_level_data_size = pState->m_level_uncomp_data.size(); + } + + const uint32_t level_width = basisu::maximum<uint32_t>(m_header.m_pixel_width >> level_index, 1); + const uint32_t level_height = basisu::maximum<uint32_t>(m_header.m_pixel_height >> level_index, 1); + const uint32_t num_blocks_x = (level_width + 3) >> 2; + const uint32_t num_blocks_y = (level_height + 3) >> 2; + + if (m_format == basist::basis_tex_format::cETC1S) + { + // Ensure start_transcoding() was called. + if (m_etc1s_transcoder.get_endpoints().empty()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: must call start_transcoding() first\n"); + return false; + } + + const uint32_t etc1s_image_index = + (level_index * basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count) + + layer_index * m_header.m_face_count + + face_index; + + // Sanity check + if (etc1s_image_index >= m_etc1s_image_descs.size()) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: etc1s_image_index >= m_etc1s_image_descs.size()\n"); + assert(0); + return false; + } + + if (static_cast<uint32_t>(m_data_size) != m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: File is too large\n"); + return false; + } + + const ktx2_etc1s_image_desc& image_desc = m_etc1s_image_descs[etc1s_image_index]; + + if (!m_etc1s_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, m_pData, static_cast<uint32_t>(m_data_size), + num_blocks_x, num_blocks_y, level_width, level_height, + level_index, + m_levels[level_index].m_byte_offset + image_desc.m_rgb_slice_byte_offset, image_desc.m_rgb_slice_byte_length, + image_desc.m_alpha_slice_byte_length ? (m_levels[level_index].m_byte_offset + image_desc.m_alpha_slice_byte_offset) : 0, image_desc.m_alpha_slice_byte_length, + decode_flags, m_has_alpha, + m_is_video, output_row_pitch_in_blocks_or_pixels, &pState->m_transcoder_state, output_rows_in_pixels)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: ETC1S transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + else if (m_format == basist::basis_tex_format::cUASTC4x4) + { + // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices. + assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length); + const uint32_t total_2D_image_size = num_blocks_x * num_blocks_y * KTX2_UASTC_BLOCK_SIZE; + + const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; + + // Sanity checks + if (uncomp_ofs >= uncomp_level_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_ofs >= total_2D_image_size\n"); + return false; + } + + if ((uncomp_level_data_size - uncomp_ofs) < total_2D_image_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: (uncomp_level_data_size - uncomp_ofs) < total_2D_image_size\n"); + return false; + } + + if (!m_uastc_transcoder.transcode_image(fmt, + pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, + (const uint8_t*)pUncomp_level_data + uncomp_ofs, (uint32_t)total_2D_image_size, num_blocks_x, num_blocks_y, level_width, level_height, level_index, + 0, (uint32_t)total_2D_image_size, + decode_flags, m_has_alpha, m_is_video, output_row_pitch_in_blocks_or_pixels, nullptr, output_rows_in_pixels, channel0, channel1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: UASTC transcode_image() failed, this is either a bug or the file is corrupted/invalid\n"); + return false; + } + } + else + { + // Shouldn't get here. + BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: Internal error\n"); + assert(0); + return false; + } + + return true; + } + + bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data) + { + const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData; + const uint64_t comp_size = m_levels[level_index].m_byte_length; + + const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length; + + if (((size_t)comp_size) != comp_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Compressed data too large\n"); + return false; + } + if (((size_t)uncomp_size) != uncomp_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Uncompressed data too large\n"); + return false; + } + + if (!uncomp_data.try_resize(uncomp_size)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n"); + return false; + } + + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) + { +#if BASISD_SUPPORT_KTX2_ZSTD + size_t actualUncompSize = ZSTD_decompress(uncomp_data.data(), (size_t)uncomp_size, pComp_data, (size_t)comp_size); + if (ZSTD_isError(actualUncompSize)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression failed, file is invalid or corrupted\n"); + return false; + } + if (actualUncompSize != uncomp_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Zstd decompression returned too few bytes, file is invalid or corrupted\n"); + return false; + } +#else + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: File uses Zstd supercompression, but Zstd support was not enabled at compile time (BASISD_SUPPORT_KTX2_ZSTD is 0)\n"); + return false; #endif -#if BASISD_SUPPORT_ETC2_EAC_RG11 - case transcoder_texture_format::cTFETC2_EAC_R11: - case transcoder_texture_format::cTFETC2_EAC_RG11: + } + + return true; + } + + bool ktx2_transcoder::decompress_etc1s_global_data() + { + // Note: we don't actually support 3D textures in here yet + //uint32_t layer_pixel_depth = basisu::maximum<uint32_t>(m_header.m_pixel_depth, 1); + //for (uint32_t i = 1; i < m_header.m_level_count; i++) + // layer_pixel_depth += basisu::maximum<uint32_t>(m_header.m_pixel_depth >> i, 1); + + const uint32_t image_count = basisu::maximum<uint32_t>(m_header.m_layer_count, 1) * m_header.m_face_count * m_header.m_level_count; + assert(image_count); + + const uint8_t* pSrc = m_pData + m_header.m_sgd_byte_offset; + + memcpy(&m_etc1s_header, pSrc, sizeof(ktx2_etc1s_global_data_header)); + pSrc += sizeof(ktx2_etc1s_global_data_header); + + if ((!m_etc1s_header.m_endpoints_byte_length) || (!m_etc1s_header.m_selectors_byte_length) || (!m_etc1s_header.m_tables_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Invalid ETC1S global data\n"); + return false; + } + + if ((!m_etc1s_header.m_endpoint_count) || (!m_etc1s_header.m_selector_count)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: endpoint and/or selector count is 0, file is invalid or corrupted\n"); + return false; + } + + // Sanity check the ETC1S header. + if ((sizeof(ktx2_etc1s_global_data_header) + + sizeof(ktx2_etc1s_image_desc) * image_count + + m_etc1s_header.m_endpoints_byte_length + + m_etc1s_header.m_selectors_byte_length + + m_etc1s_header.m_tables_byte_length + + m_etc1s_header.m_extended_byte_length) > m_header.m_sgd_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n"); + return false; + } + + if (!m_etc1s_image_descs.try_resize(image_count)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n"); + return false; + } + + memcpy(m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count); + pSrc += sizeof(ktx2_etc1s_image_desc) * image_count; + + // Sanity check the ETC1S image descs + for (uint32_t i = 0; i < image_count; i++) + { + // m_etc1s_transcoder.transcode_image() will validate the slice offsets/lengths before transcoding. + + if (!m_etc1s_image_descs[i].m_rgb_slice_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (1)\n"); + return false; + } + + if (m_has_alpha) + { + if (!m_etc1s_image_descs[i].m_alpha_slice_byte_length) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: ETC1S image descs sanity check failed (2)\n"); + return false; + } + } + } + + const uint8_t* pEndpoint_data = pSrc; + const uint8_t* pSelector_data = pSrc + m_etc1s_header.m_endpoints_byte_length; + const uint8_t* pTables_data = pSrc + m_etc1s_header.m_endpoints_byte_length + m_etc1s_header.m_selectors_byte_length; + + if (!m_etc1s_transcoder.decode_tables(pTables_data, m_etc1s_header.m_tables_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n"); + return false; + } + + if (!m_etc1s_transcoder.decode_palettes( + m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length, + m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n"); + return false; + } + + return true; + } + + bool ktx2_transcoder::read_key_values() + { + if (!m_header.m_kvd_byte_length) + { + if (m_header.m_kvd_byte_offset) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset (it should be zero when the length is zero)\n"); + return false; + } + return true; -#endif - default: - break; } + if (m_header.m_kvd_byte_offset < sizeof(ktx2_header)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset\n"); + return false; + } + + if ((m_header.m_kvd_byte_offset + m_header.m_kvd_byte_length) > m_data_size) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Invalid KVD byte offset and/or length\n"); + return false; + } + + const uint8_t* pSrc = m_pData + m_header.m_kvd_byte_offset; + uint32_t src_left = m_header.m_kvd_byte_length; + + if (!m_key_values.try_reserve(8)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + while (src_left > sizeof(uint32_t)) + { + uint32_t l = basisu::read_le_dword(pSrc); + + pSrc += sizeof(uint32_t); + src_left -= sizeof(uint32_t); + + if (l < 2) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (0)\n"); + return false; + } + + if (src_left < l) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (1)\n"); + return false; + } + + if (!m_key_values.try_resize(m_key_values.size() + 1)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + basisu::uint8_vec& key_data = m_key_values.back().m_key; + basisu::uint8_vec& value_data = m_key_values.back().m_value; + + do + { + if (!l) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (2)\n"); + return false; + } + + if (!key_data.try_push_back(*pSrc++)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + src_left--; + l--; + + } while (key_data.back()); + + if (!value_data.try_resize(l)) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); + return false; + } + + if (l) + { + memcpy(value_data.data(), pSrc, l); + pSrc += l; + src_left -= l; + } + + uint32_t ofs = (uint32_t)(pSrc - m_pData) & 3; + uint32_t alignment_bytes = (4 - ofs) & 3; + + if (src_left < alignment_bytes) + { + BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Failed reading key value fields (3)\n"); + return false; + } + + pSrc += alignment_bytes; + src_left -= alignment_bytes; + } + + return true; + } + +#endif // BASISD_SUPPORT_KTX2 + + bool basisu_transcoder_supports_ktx2() + { +#if BASISD_SUPPORT_KTX2 + return true; +#else return false; +#endif } -} // namespace basist + bool basisu_transcoder_supports_ktx2_zstd() + { +#if BASISD_SUPPORT_KTX2_ZSTD + return true; +#else + return false; +#endif + } +} // namespace basist diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.h b/thirdparty/basis_universal/transcoder/basisu_transcoder.h index 770c64122d..bf3aed3dc3 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder.h +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.h @@ -1,5 +1,5 @@ // basisu_transcoder.h -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,10 +15,25 @@ // limitations under the License. #pragma once -// Set BASISU_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development. -//#define BASISU_DEVEL_MESSAGES 1 +// By default KTX2 support is enabled to simplify compilation. This implies the need for the Zstandard library (which we distribute as a single source file in the "zstd" directory) by default. +// Set BASISD_SUPPORT_KTX2 to 0 to completely disable KTX2 support as well as Zstd/miniz usage which is only required for UASTC supercompression in KTX2 files. +// Also see BASISD_SUPPORT_KTX2_ZSTD in basisu_transcoder.cpp, which individually disables Zstd usage. +#ifndef BASISD_SUPPORT_KTX2 + #define BASISD_SUPPORT_KTX2 1 +#endif + +// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support +#ifndef BASISD_SUPPORT_KTX2_ZSTD + #define BASISD_SUPPORT_KTX2_ZSTD 1 +#endif + +// Set BASISU_FORCE_DEVEL_MESSAGES to 1 to enable debug printf()'s whenever an error occurs, for easier debugging during development. +#ifndef BASISU_FORCE_DEVEL_MESSAGES + #define BASISU_FORCE_DEVEL_MESSAGES 0 +#endif #include "basisu_transcoder_internal.h" +#include "basisu_transcoder_uastc.h" #include "basisu_global_selector_palette.h" #include "basisu_file_headers.h" @@ -45,12 +60,11 @@ namespace basist cTFBC3_RGBA = 3, // Opaque+alpha, BC4 followed by a BC1 block, alpha channel will be opaque for opaque .basis files cTFBC4_R = 4, // Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified cTFBC5_RG = 5, // XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's) - cTFBC7_M6_RGB = 6, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. Highest quality of all the non-ETC1 formats. - cTFBC7_M5_RGBA = 7, // Opaque+alpha, alpha channel will be opaque for opaque .basis files + cTFBC7_RGBA = 6, // RGB or RGBA, mode 5 for ETC1S, modes (1,2,3,5,6,7) for UASTC // PVRTC1 4bpp (mobile, PowerVR devices) cTFPVRTC1_4_RGB = 8, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format. - cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doens't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format. + cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format. // ASTC (mobile, Intel devices, hopefully all desktop GPU's one day) cTFASTC_4x4_RGBA = 10, // Opaque+alpha, ASTC 4x4, alpha channel will be opaque for opaque .basis files. Transcoder uses RGB/RGBA/L/LA modes, void extent, and up to two ([0,47] and [0,255]) endpoint precisions. @@ -69,10 +83,10 @@ namespace basist cTFETC2_EAC_R11 = 20, // R only (ETC2 EAC R11 unsigned) cTFETC2_EAC_RG11 = 21, // RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps - + // Uncompressed (raw pixel) formats cTFRGBA32 = 13, // 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte. - cTFRGB565 = 14, // 166pp RGB image stored in raster (not block) order in memory, R at bit position 11 + cTFRGB565 = 14, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 11 cTFBGR565 = 15, // 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0 cTFRGBA4444 = 16, // 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0 @@ -85,27 +99,62 @@ namespace basist cTFBC3 = cTFBC3_RGBA, cTFBC4 = cTFBC4_R, cTFBC5 = cTFBC5_RG, - cTFBC7_M6_OPAQUE_ONLY = cTFBC7_M6_RGB, - cTFBC7_M5 = cTFBC7_M5_RGBA, + + // Previously, the caller had some control over which BC7 mode the transcoder output. We've simplified this due to UASTC, which supports numerous modes. + cTFBC7_M6_RGB = cTFBC7_RGBA, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. Highest quality of all the non-ETC1 formats. + cTFBC7_M5_RGBA = cTFBC7_RGBA, // Opaque+alpha, alpha channel will be opaque for opaque .basis files + cTFBC7_M6_OPAQUE_ONLY = cTFBC7_RGBA, + cTFBC7_M5 = cTFBC7_RGBA, + cTFBC7_ALT = 7, + cTFASTC_4x4 = cTFASTC_4x4_RGBA, + cTFATC_RGBA_INTERPOLATED_ALPHA = cTFATC_RGBA, }; - uint32_t basis_get_bytes_per_block(transcoder_texture_format fmt); + // For compressed texture formats, this returns the # of bytes per block. For uncompressed, it returns the # of bytes per pixel. + // NOTE: Previously, this function was called basis_get_bytes_per_block(), and it always returned 16*bytes_per_pixel for uncompressed formats which was confusing. + uint32_t basis_get_bytes_per_block_or_pixel(transcoder_texture_format fmt); + + // Returns format's name in ASCII const char* basis_get_format_name(transcoder_texture_format fmt); + + // Returns block format name in ASCII + const char* basis_get_block_format_name(block_format fmt); + + // Returns true if the format supports an alpha channel. bool basis_transcoder_format_has_alpha(transcoder_texture_format fmt); + + // Returns the basisu::texture_format corresponding to the specified transcoder_texture_format. basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt); + + // Returns the texture type's name in ASCII. const char* basis_get_texture_type_name(basis_texture_type tex_type); - + + // Returns true if the transcoder texture type is an uncompressed (raw pixel) format. bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type); + + // Returns the # of bytes per pixel for uncompressed formats, or 0 for block texture formats. uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt); - + + // Returns the block width for the specified texture format, which is currently either 4 or 8 for FXT1. uint32_t basis_get_block_width(transcoder_texture_format tex_type); + + // Returns the block height for the specified texture format, which is currently always 4. uint32_t basis_get_block_height(transcoder_texture_format tex_type); // Returns true if the specified format was enabled at compile time. - bool basis_is_format_supported(transcoder_texture_format tex_type); - + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S); + + // Validates that the output buffer is large enough to hold the entire transcoded texture. + // For uncompressed texture formats, most input parameters are in pixels, not blocks. Blocks are 4x4 pixels. + bool basis_validate_output_buffer_size(transcoder_texture_format target_format, + uint32_t output_blocks_buf_size_in_blocks_or_pixels, + uint32_t orig_width, uint32_t orig_height, + uint32_t output_row_pitch_in_blocks_or_pixels, + uint32_t output_rows_in_pixels, + uint32_t total_slice_blocks); + class basisu_transcoder; // This struct holds all state used during transcoding. For video, it needs to persist between image transcodes (it holds the previous frame). @@ -118,46 +167,161 @@ namespace basist uint8_t m_pred_bits; }; - std::vector<block_preds> m_block_endpoint_preds[2]; - + basisu::vector<block_preds> m_block_endpoint_preds[2]; + enum { cMaxPrevFrameLevels = 16 }; - std::vector<uint32_t> m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + basisu::vector<uint32_t> m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + + void clear() + { + for (uint32_t i = 0; i < 2; i++) + { + m_block_endpoint_preds[i].clear(); + + for (uint32_t j = 0; j < cMaxPrevFrameLevels; j++) + m_prev_frame_indices[i][j].clear(); + } + } }; - + // Low-level helper class that does the actual transcoding. - class basisu_lowlevel_transcoder + class basisu_lowlevel_etc1s_transcoder { friend class basisu_transcoder; - + public: - basisu_lowlevel_transcoder(const basist::etc1_global_selector_codebook *pGlobal_sel_codebook); + basisu_lowlevel_etc1s_transcoder(const basist::etc1_global_selector_codebook* pGlobal_sel_codebook); + + void set_global_codebooks(const basisu_lowlevel_etc1s_transcoder* pGlobal_codebook) { m_pGlobal_codebook = pGlobal_codebook; } + const basisu_lowlevel_etc1s_transcoder* get_global_codebooks() const { return m_pGlobal_codebook; } bool decode_palettes( - uint32_t num_endpoints, const uint8_t *pEndpoints_data, uint32_t endpoints_data_size, - uint32_t num_selectors, const uint8_t *pSelectors_data, uint32_t selectors_data_size); + uint32_t num_endpoints, const uint8_t* pEndpoints_data, uint32_t endpoints_data_size, + uint32_t num_selectors, const uint8_t* pSelectors_data, uint32_t selectors_data_size); + + bool decode_tables(const uint8_t* pTable_data, uint32_t table_data_size); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, + header.m_tex_type == cBASISTexTypeVideoFrames, (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0, slice_desc.m_level_index, + slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, pState, + astc_transcode_alpha, + pAlpha_blocks, + output_rows_in_pixels); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t rgb_offset, uint32_t rgb_length, uint32_t alpha_offset, uint32_t alpha_length, + uint32_t decode_flags = 0, + bool basis_file_has_alpha_slices = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0); + + void clear() + { + m_local_endpoints.clear(); + m_local_selectors.clear(); + m_endpoint_pred_model.clear(); + m_delta_endpoint_model.clear(); + m_selector_model.clear(); + m_selector_history_buf_rle_model.clear(); + m_selector_history_buf_size = 0; + } - bool decode_tables(const uint8_t *pTable_data, uint32_t table_data_size); + // Low-level methods + typedef basisu::vector<endpoint> endpoint_vec; + const endpoint_vec& get_endpoints() const { return m_local_endpoints; } - bool transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt, - uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header &header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, - basisu_transcoder_state *pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0); + typedef basisu::vector<selector> selector_vec; + const selector_vec& get_selectors() const { return m_local_selectors; } + + const etc1_global_selector_codebook* get_global_sel_codebook() const { return m_pGlobal_sel_codebook; } private: - typedef std::vector<endpoint> endpoint_vec; - endpoint_vec m_endpoints; + const basisu_lowlevel_etc1s_transcoder* m_pGlobal_codebook; - typedef std::vector<selector> selector_vec; - selector_vec m_selectors; + endpoint_vec m_local_endpoints; + selector_vec m_local_selectors; - const etc1_global_selector_codebook *m_pGlobal_sel_codebook; + const etc1_global_selector_codebook* m_pGlobal_sel_codebook; huffman_decoding_table m_endpoint_pred_model, m_delta_endpoint_model, m_selector_model, m_selector_history_buf_rle_model; uint32_t m_selector_history_buf_size; - + basisu_transcoder_state m_def_state; }; + enum basisu_decode_flags + { + // PVRTC1: decode non-pow2 ETC1S texture level to the next larger power of 2 (not implemented yet, but we're going to support it). Ignored if the slice's dimensions are already a power of 2. + cDecodeFlagsPVRTCDecodeToNextPow2 = 2, + + // When decoding to an opaque texture format, if the basis file has alpha, decode the alpha slice instead of the color slice to the output texture format. + // This is primarily to allow decoding of textures with alpha to multiple ETC1 textures (one for color, another for alpha). + cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4, + + // Forbid usage of BC1 3 color blocks (we don't support BC1 punchthrough alpha yet). + // This flag is used internally when decoding to BC3. + cDecodeFlagsBC1ForbidThreeColorBlocks = 8, + + // The output buffer contains alpha endpoint/selector indices. + // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. + cDecodeFlagsOutputHasAlphaIndices = 16, + + cDecodeFlagsHighQuality = 32 + }; + + class basisu_lowlevel_uastc_transcoder + { + friend class basisu_transcoder; + + public: + basisu_lowlevel_uastc_transcoder(); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0); + + bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt, + uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0) + { + return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, + output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, + pState, output_rows_in_pixels, channel0, channel1, decode_flags); + } + + // Container independent transcoding + bool transcode_image( + transcoder_texture_format target_format, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const uint8_t* pCompressed_data, uint32_t compressed_data_length, + uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index, + uint32_t slice_offset, uint32_t slice_length, + uint32_t decode_flags = 0, + bool has_alpha = false, + bool is_video = false, + uint32_t output_row_pitch_in_blocks_or_pixels = 0, + basisu_transcoder_state* pState = nullptr, + uint32_t output_rows_in_pixels = 0, + int channel0 = -1, int channel1 = -1); + }; + struct basisu_slice_info { uint32_t m_orig_width; @@ -175,19 +339,19 @@ namespace basist uint32_t m_slice_index; // the slice index in the .basis file uint32_t m_image_index; // the source image index originally provided to the encoder uint32_t m_level_index; // the mipmap level within this image - + uint32_t m_unpacked_slice_crc16; - + bool m_alpha_flag; // true if the slice has alpha data bool m_iframe_flag; // true if the slice is an I-Frame }; - typedef std::vector<basisu_slice_info> basisu_slice_info_vec; + typedef basisu::vector<basisu_slice_info> basisu_slice_info_vec; struct basisu_image_info { uint32_t m_image_index; - uint32_t m_total_levels; + uint32_t m_total_levels; uint32_t m_orig_width; uint32_t m_orig_height; @@ -199,8 +363,8 @@ namespace basist uint32_t m_num_blocks_y; uint32_t m_total_blocks; - uint32_t m_first_slice_index; - + uint32_t m_first_slice_index; + bool m_alpha_flag; // true if the image has alpha data bool m_iframe_flag; // true if the image is an I-Frame }; @@ -220,8 +384,13 @@ namespace basist uint32_t m_num_blocks_y; uint32_t m_total_blocks; - uint32_t m_first_slice_index; - + uint32_t m_first_slice_index; + + uint32_t m_rgb_file_ofs; + uint32_t m_rgb_file_len; + uint32_t m_alpha_file_ofs; + uint32_t m_alpha_file_len; + bool m_alpha_flag; // true if the image has alpha data bool m_iframe_flag; // true if the image is an I-Frame }; @@ -232,13 +401,19 @@ namespace basist uint32_t m_total_header_size; uint32_t m_total_selectors; + // will be 0 for UASTC or if the file uses global codebooks + uint32_t m_selector_codebook_ofs; uint32_t m_selector_codebook_size; uint32_t m_total_endpoints; + // will be 0 for UASTC or if the file uses global codebooks + uint32_t m_endpoint_codebook_ofs; uint32_t m_endpoint_codebook_size; + uint32_t m_tables_ofs; uint32_t m_tables_size; - uint32_t m_slices_size; + + uint32_t m_slices_size; basis_texture_type m_tex_type; uint32_t m_us_per_frame; @@ -247,14 +422,16 @@ namespace basist basisu_slice_info_vec m_slice_info; uint32_t m_total_images; // total # of images - std::vector<uint32_t> m_image_mipmap_levels; // the # of mipmap levels for each image + basisu::vector<uint32_t> m_image_mipmap_levels; // the # of mipmap levels for each image uint32_t m_userdata0; uint32_t m_userdata1; - - bool m_etc1s; // always true for basis universal + + basis_tex_format m_tex_format; // ETC1S, UASTC, etc. + bool m_y_flipped; // true if the image was Y flipped - bool m_has_alpha_slices; // true if the texture has alpha slices (even slices RGB, odd slices alpha) + bool m_etc1s; // true if the file is ETC1S + bool m_has_alpha_slices; // true if the texture has alpha slices (for ETC1S: even slices RGB, odd slices alpha) }; // High-level transcoder class which accepts .basis file data and allows the caller to query information about the file and transcode image levels to various texture formats. @@ -265,81 +442,67 @@ namespace basist basisu_transcoder& operator= (const basisu_transcoder&); public: - basisu_transcoder(const etc1_global_selector_codebook *pGlobal_sel_codebook); + basisu_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook); // Validates the .basis file. This computes a crc16 over the entire file, so it's slow. - bool validate_file_checksums(const void *pData, uint32_t data_size, bool full_validation) const; + bool validate_file_checksums(const void* pData, uint32_t data_size, bool full_validation) const; // Quick header validation - no crc16 checks. - bool validate_header(const void *pData, uint32_t data_size) const; + bool validate_header(const void* pData, uint32_t data_size) const; + + basis_texture_type get_texture_type(const void* pData, uint32_t data_size) const; + bool get_userdata(const void* pData, uint32_t data_size, uint32_t& userdata0, uint32_t& userdata1) const; - basis_texture_type get_texture_type(const void *pData, uint32_t data_size) const; - bool get_userdata(const void *pData, uint32_t data_size, uint32_t &userdata0, uint32_t &userdata1) const; - // Returns the total number of images in the basis file (always 1 or more). // Note that the number of mipmap levels for each image may differ, and that images may have different resolutions. - uint32_t get_total_images(const void *pData, uint32_t data_size) const; + uint32_t get_total_images(const void* pData, uint32_t data_size) const; + + basis_tex_format get_tex_format(const void* pData, uint32_t data_size) const; // Returns the number of mipmap levels in an image. - uint32_t get_total_image_levels(const void *pData, uint32_t data_size, uint32_t image_index) const; - + uint32_t get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const; + // Returns basic information about an image. Note that orig_width/orig_height may not be a multiple of 4. - bool get_image_level_desc(const void *pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t &orig_width, uint32_t &orig_height, uint32_t &total_blocks) const; + bool get_image_level_desc(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t& orig_width, uint32_t& orig_height, uint32_t& total_blocks) const; // Returns information about the specified image. - bool get_image_info(const void *pData, uint32_t data_size, basisu_image_info &image_info, uint32_t image_index) const; + bool get_image_info(const void* pData, uint32_t data_size, basisu_image_info& image_info, uint32_t image_index) const; // Returns information about the specified image's mipmap level. - bool get_image_level_info(const void *pData, uint32_t data_size, basisu_image_level_info &level_info, uint32_t image_index, uint32_t level_index) const; - + bool get_image_level_info(const void* pData, uint32_t data_size, basisu_image_level_info& level_info, uint32_t image_index, uint32_t level_index) const; + // Get a description of the basis file and low-level information about each slice. - bool get_file_info(const void *pData, uint32_t data_size, basisu_file_info &file_info) const; - + bool get_file_info(const void* pData, uint32_t data_size, basisu_file_info& file_info) const; + // start_transcoding() must be called before calling transcode_slice() or transcode_image_level(). - // This decompresses the selector/endpoint codebooks, so ideally you would only call this once per .basis file (not each image/mipmap level). - bool start_transcoding(const void *pData, uint32_t data_size) const; - + // For ETC1S files, this call decompresses the selector/endpoint codebooks, so ideally you would only call this once per .basis file (not each image/mipmap level). + bool start_transcoding(const void* pData, uint32_t data_size); + + bool stop_transcoding(); + // Returns true if start_transcoding() has been called. - bool get_ready_to_transcode() const { return m_lowlevel_decoder.m_endpoints.size() > 0; } + bool get_ready_to_transcode() const { return m_ready_to_transcode; } - enum - { - // PVRTC1: decode non-pow2 ETC1S texture level to the next larger power of 2 (not implemented yet, but we're going to support it). Ignored if the slice's dimensions are already a power of 2. - cDecodeFlagsPVRTCDecodeToNextPow2 = 2, - - // When decoding to an opaque texture format, if the basis file has alpha, decode the alpha slice instead of the color slice to the output texture format. - // This is primarily to allow decoding of textures with alpha to multiple ETC1 textures (one for color, another for alpha). - cDecodeFlagsTranscodeAlphaDataToOpaqueFormats = 4, - - // Forbid usage of BC1 3 color blocks (we don't support BC1 punchthrough alpha yet). - // This flag is used internally when decoding to BC3. - cDecodeFlagsBC1ForbidThreeColorBlocks = 8, - - // The output buffer contains alpha endpoint/selector indices. - // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. - cDecodeFlagsOutputHasAlphaIndices = 16 - }; - // transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats. // It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5). // If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's). // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32. // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). - // output_rows_in_pixels: Ignored unless fmt is cRGBA32. The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). + // output_rows_in_pixels: Ignored unless fmt is uncompressed (cRGBA32, etc.). The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). // Notes: // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. // - This method assumes the output texture buffer is readable. In some cases to handle alpha, the transcoder will write temporary data to the output texture in // a first pass, which will be read in a second pass. bool transcode_image_level( - const void *pData, uint32_t data_size, - uint32_t image_index, uint32_t level_index, - void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + const void* pData, uint32_t data_size, + uint32_t image_index, uint32_t level_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, transcoder_texture_format fmt, - uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state *pState = nullptr, uint32_t output_rows_in_pixels = 0) const; + uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0) const; // Finds the basis slice corresponding to the specified image/level/alpha params, or -1 if the slice can't be found. - int find_slice(const void *pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const; + int find_slice(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, bool alpha_data) const; // transcode_slice() decodes a single slice from the .basis file. It's a low-level API - most likely you want to use transcode_image_level(). // This is a low-level API, and will be needed to be called multiple times to decode some texture formats (like BC3, BC5, or ETC2). @@ -350,21 +513,39 @@ namespace basist // output_rows_in_pixels: Ignored unless fmt is cRGBA32. The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). // Notes: // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. - bool transcode_slice(const void *pData, uint32_t data_size, uint32_t slice_index, - void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, - block_format fmt, uint32_t output_block_stride_in_bytes, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state * pState = nullptr, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0) const; + bool transcode_slice(const void* pData, uint32_t data_size, uint32_t slice_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + block_format fmt, uint32_t output_block_stride_in_bytes, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state* pState = nullptr, void* pAlpha_blocks = nullptr, + uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1) const; + + static void write_opaque_alpha_blocks( + uint32_t num_blocks_x, uint32_t num_blocks_y, + void* pOutput_blocks, block_format fmt, + uint32_t block_stride_in_bytes, uint32_t output_row_pitch_in_blocks_or_pixels); + + void set_global_codebooks(const basisu_lowlevel_etc1s_transcoder* pGlobal_codebook) { m_lowlevel_etc1s_decoder.set_global_codebooks(pGlobal_codebook); } + const basisu_lowlevel_etc1s_transcoder* get_global_codebooks() const { return m_lowlevel_etc1s_decoder.get_global_codebooks(); } + + const basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() const { return m_lowlevel_etc1s_decoder; } + basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() { return m_lowlevel_etc1s_decoder; } + + const basisu_lowlevel_uastc_transcoder& get_lowlevel_uastc_decoder() const { return m_lowlevel_uastc_decoder; } + basisu_lowlevel_uastc_transcoder& get_lowlevel_uastc_decoder() { return m_lowlevel_uastc_decoder; } private: - mutable basisu_lowlevel_transcoder m_lowlevel_decoder; + mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder; + mutable basisu_lowlevel_uastc_transcoder m_lowlevel_uastc_decoder; + + bool m_ready_to_transcode; int find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const; - + bool validate_header_quick(const void* pData, uint32_t data_size) const; }; - // basisu_transcoder_init() must be called before a .basis file can be transcoded. + // basisu_transcoder_init() MUST be called before a .basis file can be transcoded. void basisu_transcoder_init(); - + enum debug_flags_t { cDebugFlagVisCRs = 1, @@ -374,4 +555,387 @@ namespace basist uint32_t get_debug_flags(); void set_debug_flags(uint32_t f); + // ------------------------------------------------------------------------------------------------------ + // Optional .KTX2 file format support + // KTX2 reading optionally requires miniz or Zstd decompressors for supercompressed UASTC files. + // ------------------------------------------------------------------------------------------------------ +#if BASISD_SUPPORT_KTX2 +#pragma pack(push) +#pragma pack(1) + struct ktx2_header + { + uint8_t m_identifier[12]; + basisu::packed_uint<4> m_vk_format; + basisu::packed_uint<4> m_type_size; + basisu::packed_uint<4> m_pixel_width; + basisu::packed_uint<4> m_pixel_height; + basisu::packed_uint<4> m_pixel_depth; + basisu::packed_uint<4> m_layer_count; + basisu::packed_uint<4> m_face_count; + basisu::packed_uint<4> m_level_count; + basisu::packed_uint<4> m_supercompression_scheme; + basisu::packed_uint<4> m_dfd_byte_offset; + basisu::packed_uint<4> m_dfd_byte_length; + basisu::packed_uint<4> m_kvd_byte_offset; + basisu::packed_uint<4> m_kvd_byte_length; + basisu::packed_uint<8> m_sgd_byte_offset; + basisu::packed_uint<8> m_sgd_byte_length; + }; + + struct ktx2_level_index + { + basisu::packed_uint<8> m_byte_offset; + basisu::packed_uint<8> m_byte_length; + basisu::packed_uint<8> m_uncompressed_byte_length; + }; + + struct ktx2_etc1s_global_data_header + { + basisu::packed_uint<2> m_endpoint_count; + basisu::packed_uint<2> m_selector_count; + basisu::packed_uint<4> m_endpoints_byte_length; + basisu::packed_uint<4> m_selectors_byte_length; + basisu::packed_uint<4> m_tables_byte_length; + basisu::packed_uint<4> m_extended_byte_length; + }; + + struct ktx2_etc1s_image_desc + { + basisu::packed_uint<4> m_image_flags; + basisu::packed_uint<4> m_rgb_slice_byte_offset; + basisu::packed_uint<4> m_rgb_slice_byte_length; + basisu::packed_uint<4> m_alpha_slice_byte_offset; + basisu::packed_uint<4> m_alpha_slice_byte_length; + }; + + struct ktx2_animdata + { + basisu::packed_uint<4> m_duration; + basisu::packed_uint<4> m_timescale; + basisu::packed_uint<4> m_loopcount; + }; +#pragma pack(pop) + + const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0; + const uint32_t KTX2_KDF_DF_MODEL_UASTC = 166; + const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163; + const uint32_t KTX2_IMAGE_IS_P_FRAME = 2; + const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; + const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased + + // The KTX2 transfer functions supported by KTX2 + const uint32_t KTX2_KHR_DF_TRANSFER_LINEAR = 1; + const uint32_t KTX2_KHR_DF_TRANSFER_SRGB = 2; + + enum ktx2_supercompression + { + KTX2_SS_NONE = 0, + KTX2_SS_BASISLZ = 1, + KTX2_SS_ZSTANDARD = 2 + }; + + extern const uint8_t g_ktx2_file_identifier[12]; + + enum ktx2_df_channel_id + { + KTX2_DF_CHANNEL_ETC1S_RGB = 0U, + KTX2_DF_CHANNEL_ETC1S_RRR = 3U, + KTX2_DF_CHANNEL_ETC1S_GGG = 4U, + KTX2_DF_CHANNEL_ETC1S_AAA = 15U, + + KTX2_DF_CHANNEL_UASTC_DATA = 0U, + KTX2_DF_CHANNEL_UASTC_RGB = 0U, + KTX2_DF_CHANNEL_UASTC_RGBA = 3U, + KTX2_DF_CHANNEL_UASTC_RRR = 4U, + KTX2_DF_CHANNEL_UASTC_RRRG = 5U, + KTX2_DF_CHANNEL_UASTC_RG = 6U, + }; + + inline const char* ktx2_get_etc1s_df_channel_id_str(ktx2_df_channel_id id) + { + switch (id) + { + case KTX2_DF_CHANNEL_ETC1S_RGB: return "RGB"; + case KTX2_DF_CHANNEL_ETC1S_RRR: return "RRR"; + case KTX2_DF_CHANNEL_ETC1S_GGG: return "GGG"; + case KTX2_DF_CHANNEL_ETC1S_AAA: return "AAA"; + default: break; + } + return "?"; + } + + inline const char* ktx2_get_uastc_df_channel_id_str(ktx2_df_channel_id id) + { + switch (id) + { + case KTX2_DF_CHANNEL_UASTC_RGB: return "RGB"; + case KTX2_DF_CHANNEL_UASTC_RGBA: return "RGBA"; + case KTX2_DF_CHANNEL_UASTC_RRR: return "RRR"; + case KTX2_DF_CHANNEL_UASTC_RRRG: return "RRRG"; + case KTX2_DF_CHANNEL_UASTC_RG: return "RG"; + default: break; + } + return "?"; + } + + enum ktx2_df_color_primaries + { + KTX2_DF_PRIMARIES_UNSPECIFIED = 0, + KTX2_DF_PRIMARIES_BT709 = 1, + KTX2_DF_PRIMARIES_SRGB = 1, + KTX2_DF_PRIMARIES_BT601_EBU = 2, + KTX2_DF_PRIMARIES_BT601_SMPTE = 3, + KTX2_DF_PRIMARIES_BT2020 = 4, + KTX2_DF_PRIMARIES_CIEXYZ = 5, + KTX2_DF_PRIMARIES_ACES = 6, + KTX2_DF_PRIMARIES_ACESCC = 7, + KTX2_DF_PRIMARIES_NTSC1953 = 8, + KTX2_DF_PRIMARIES_PAL525 = 9, + KTX2_DF_PRIMARIES_DISPLAYP3 = 10, + KTX2_DF_PRIMARIES_ADOBERGB = 11 + }; + + inline const char* ktx2_get_df_color_primaries_str(ktx2_df_color_primaries p) + { + switch (p) + { + case KTX2_DF_PRIMARIES_UNSPECIFIED: return "UNSPECIFIED"; + case KTX2_DF_PRIMARIES_BT709: return "BT709"; + case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; + case KTX2_DF_PRIMARIES_BT601_SMPTE: return "SMPTE"; + case KTX2_DF_PRIMARIES_BT2020: return "BT2020"; + case KTX2_DF_PRIMARIES_CIEXYZ: return "CIEXYZ"; + case KTX2_DF_PRIMARIES_ACES: return "ACES"; + case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; + case KTX2_DF_PRIMARIES_NTSC1953: return "NTSC1953"; + case KTX2_DF_PRIMARIES_PAL525: return "PAL525"; + case KTX2_DF_PRIMARIES_DISPLAYP3: return "DISPLAYP3"; + case KTX2_DF_PRIMARIES_ADOBERGB: return "ADOBERGB"; + default: break; + } + return "?"; + } + + // Information about a single 2D texture "image" in a KTX2 file. + struct ktx2_image_level_info + { + // The mipmap level index (0=largest), texture array layer index, and cubemap face index of the image. + uint32_t m_level_index; + uint32_t m_layer_index; + uint32_t m_face_index; + + // The image's actual (or the original source image's) width/height in pixels, which may not be divisible by 4 pixels. + uint32_t m_orig_width; + uint32_t m_orig_height; + + // The image's physical width/height, which will always be divisible by 4 pixels. + uint32_t m_width; + uint32_t m_height; + + // The texture's dimensions in 4x4 texel blocks. + uint32_t m_num_blocks_x; + uint32_t m_num_blocks_y; + + // The total number of blocks + uint32_t m_total_blocks; + + // true if the image has alpha data + bool m_alpha_flag; + + // true if the image is an I-Frame. Currently, for ETC1S textures, the first frame will always be an I-Frame, and subsequent frames will always be P-Frames. + bool m_iframe_flag; + }; + + // Thread-specific ETC1S/supercompressed UASTC transcoder state. (If you're not doing multithreading transcoding you can ignore this.) + struct ktx2_transcoder_state + { + basist::basisu_transcoder_state m_transcoder_state; + basisu::uint8_vec m_level_uncomp_data; + int m_uncomp_data_level_index; + + void clear() + { + m_transcoder_state.clear(); + m_level_uncomp_data.clear(); + m_uncomp_data_level_index = -1; + } + }; + + // This class is quite similar to basisu_transcoder. It treats KTX2 files as a simple container for ETC1S/UASTC texture data. + // It does not support 1D or 3D textures. + // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. + // It only supports raw non-supercompressed UASTC, ETC1S, UASTC+Zstd, or UASTC+zlib compressed files. + // DFD (Data Format Descriptor) parsing is purposely as simple as possible. + // If you need to know how to interpret the texture channels you'll need to parse the DFD yourself after calling get_dfd(). + class ktx2_transcoder + { + public: + ktx2_transcoder(basist::etc1_global_selector_codebook* pGlobal_sel_codebook); + + // Frees all allocations, resets object. + void clear(); + + // init() parses the KTX2 header, level index array, DFD, and key values, but nothing else. + // Importantly, it does not parse or decompress the ETC1S global supercompressed data, so some things (like which frames are I/P-Frames) won't be available until start_transcoding() is called. + // This method holds a pointer to the file data until clear() is called. + bool init(const void* pData, uint32_t data_size); + + // Returns the data/size passed to init(). + const uint8_t* get_data() const { return m_pData; } + uint32_t get_data_size() const { return m_data_size; } + + // Returns the KTX2 header. Valid after init(). + const ktx2_header& get_header() const { return m_header; } + + // Returns the KTX2 level index array. There will be one entry for each mipmap level. Valid after init(). + const basisu::vector<ktx2_level_index>& get_level_index() const { return m_levels; } + + // Returns the texture's width in texels. Always non-zero, might not be divisible by 4. Valid after init(). + uint32_t get_width() const { return m_header.m_pixel_width; } + + // Returns the texture's height in texels. Always non-zero, might not be divisible by 4. Valid after init(). + uint32_t get_height() const { return m_header.m_pixel_height; } + + // Returns the texture's number of mipmap levels. Always returns 1 or higher. Valid after init(). + uint32_t get_levels() const { return m_header.m_level_count; } + + // Returns the number of faces. Returns 1 for 2D textures and or 6 for cubemaps. Valid after init(). + uint32_t get_faces() const { return m_header.m_face_count; } + + // Returns 0 or the number of layers in the texture array or texture video. Valid after init(). + uint32_t get_layers() const { return m_header.m_layer_count; } + + // Returns cETC1S or cUASTC4x4. Valid after init(). + basist::basis_tex_format get_format() const { return m_format; } + + bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; } + + bool is_uastc() const { return get_format() == basist::basis_tex_format::cUASTC4x4; } + + // Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init(). + uint32_t get_has_alpha() const { return m_has_alpha; } + + // Returns the entire Data Format Descriptor (DFD) from the KTX2 file. Valid after init(). + // See https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html#_the_khronos_data_format_descriptor_overview + const basisu::uint8_vec& get_dfd() const { return m_dfd; } + + // Some basic DFD accessors. Valid after init(). + uint32_t get_dfd_color_model() const { return m_dfd_color_model; } + + // Returns the DFD color primary. + // We do not validate the color primaries, so the returned value may not be in the ktx2_df_color_primaries enum. + ktx2_df_color_primaries get_dfd_color_primaries() const { return m_dfd_color_prims; } + + // Returns KTX2_KHR_DF_TRANSFER_LINEAR or KTX2_KHR_DF_TRANSFER_SRGB. + uint32_t get_dfd_transfer_func() const { return m_dfd_transfer_func; } + + uint32_t get_dfd_flags() const { return m_dfd_flags; } + + // Returns 1 (ETC1S/UASTC) or 2 (ETC1S with an internal alpha channel). + uint32_t get_dfd_total_samples() const { return m_dfd_samples; } + + // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. + // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. + // It's up to the caller to decide what to do if the value isn't in the enum. + ktx2_df_channel_id get_dfd_channel_id0() const { return m_dfd_chan0; } + ktx2_df_channel_id get_dfd_channel_id1() const { return m_dfd_chan1; } + + // Key value field data. + struct key_value + { + // The key field is UTF8 and always zero terminated. + basisu::uint8_vec m_key; + + // The value may be empty. It consists of raw bytes which may or may not be zero terminated. + basisu::uint8_vec m_value; + + bool operator< (const key_value& rhs) const { return strcmp((const char*)m_key.data(), (const char *)rhs.m_key.data()) < 0; } + }; + typedef basisu::vector<key_value> key_value_vec; + + // Returns the array of key-value entries. This may be empty. Valid after init(). + // The order of key values fields in this array exactly matches the order they were stored in the file. The keys are supposed to be sorted by their Unicode code points. + const key_value_vec& get_key_values() const { return m_key_values; } + + const basisu::uint8_vec *find_key(const std::string& key_name) const; + + // Low-level ETC1S specific accessors + + // Returns the ETC1S global supercompression data header, which is only valid after start_transcoding() is called. + const ktx2_etc1s_global_data_header& get_etc1s_header() const { return m_etc1s_header; } + + // Returns the array of ETC1S image descriptors, which is only valid after get_etc1s_image_descs() is called. + const basisu::vector<ktx2_etc1s_image_desc>& get_etc1s_image_descs() const { return m_etc1s_image_descs; } + + // Must have called startTranscoding() first + uint32_t get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; + + // is_video() is only valid after start_transcoding() is called. + // For ETC1S data, if this returns true you must currently transcode the file from first to last frame, in order, without skipping any frames. + bool is_video() const { return m_is_video; } + + // start_transcoding() MUST be called before calling transcode_image(). + // This method decompresses the ETC1S global endpoint/selector codebooks, which is not free, so try to avoid calling it excessively. + bool start_transcoding(); + + // get_image_level_info() be called after init(), but the m_iframe_flag's won't be valid until start_transcoding() is called. + // You can call this method before calling transcode_image_level() to retrieve basic information about the mipmap level's dimensions, etc. + bool get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; + + // transcode_image_level() transcodes a single 2D texture or cubemap face from the KTX2 file. + // Internally it uses the same low-level transcode API's as basisu_transcoder::transcode_image_level(). + // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is + // completely transcoded before switching to another level. Every time the mipmap level is changed all supercompressed level data must be decompressed using Zstandard as a single unit. + // Currently ETC1S videos must always be transcoded from first to last frame (or KTX2 "layer"), in order, with no skipping of frames. + // By default this method is not thread safe unless you specify a pointer to a user allocated thread-specific transcoder_state struct. + bool transcode_image_level( + uint32_t level_index, uint32_t layer_index, uint32_t face_index, + void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, + basist::transcoder_texture_format fmt, + uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, + ktx2_transcoder_state *pState = nullptr); + + private: + const uint8_t* m_pData; + uint32_t m_data_size; + + ktx2_header m_header; + basisu::vector<ktx2_level_index> m_levels; + basisu::uint8_vec m_dfd; + key_value_vec m_key_values; + + ktx2_etc1s_global_data_header m_etc1s_header; + basisu::vector<ktx2_etc1s_image_desc> m_etc1s_image_descs; + + basist::basis_tex_format m_format; + + uint32_t m_dfd_color_model; + ktx2_df_color_primaries m_dfd_color_prims; + uint32_t m_dfd_transfer_func; + uint32_t m_dfd_flags; + uint32_t m_dfd_samples; + ktx2_df_channel_id m_dfd_chan0, m_dfd_chan1; + + basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder; + basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder; + + ktx2_transcoder_state m_def_transcoder_state; + + bool m_has_alpha; + bool m_is_video; + + bool decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data); + bool decompress_etc1s_global_data(); + bool read_key_values(); + }; + +#endif // BASISD_SUPPORT_KTX2 + + // Returns true if the transcoder was compiled with KTX2 support. + bool basisu_transcoder_supports_ktx2(); + + // Returns true if the transcoder was compiled with Zstandard support. + bool basisu_transcoder_supports_ktx2_zstd(); + } // namespace basisu + diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h index a9c6823d92..2422d788a9 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h @@ -1,5 +1,5 @@ // basisu_transcoder_internal.h - Universal texture format transcoder library. -// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. // // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing // @@ -20,8 +20,8 @@ #pragma warning (disable: 4127) // conditional expression is constant #endif -#define BASISD_LIB_VERSION 107 -#define BASISD_VERSION_STRING "01.11" +#define BASISD_LIB_VERSION 115 +#define BASISD_VERSION_STRING "01.15" #ifdef _DEBUG #define BASISD_BUILD_DEBUG @@ -45,38 +45,44 @@ namespace basist enum class block_format { cETC1, // ETC1S RGB + cETC2_RGBA, // full ETC2 EAC RGBA8 block cBC1, // DXT1 RGB + cBC3, // BC4 block followed by a four color BC1 block cBC4, // DXT5A (alpha block only) + cBC5, // two BC4 blocks cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA - cBC7_M6_OPAQUE_ONLY, // RGB BC7 mode 6 + cBC7, // Full BC7 block, any mode cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. + cATC_RGB, cATC_RGBA_INTERPOLATED_ALPHA, cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size + + cPVRTC2_4_RGB, + cPVRTC2_4_RGBA, + + cETC2_EAC_R11, + cETC2_EAC_RG11, cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) cRGB32, // Writes RGB components to 32bpp output pixels cRGBA32, // Writes RGB255 components to 32bpp output pixels cA32, // Writes alpha component to 32bpp output pixels - + cRGB565, cBGR565, cRGBA4444_COLOR, cRGBA4444_ALPHA, cRGBA4444_COLOR_OPAQUE, - - cPVRTC2_4_RGB, - cPVRTC2_4_RGBA, - - cETC2_EAC_R11, - + cRGBA4444, + cTotalBlockFormats }; @@ -116,7 +122,7 @@ namespace basist basisu::clear_vector(m_tree); } - bool init(uint32_t total_syms, const uint8_t *pCode_sizes) + bool init(uint32_t total_syms, const uint8_t *pCode_sizes, uint32_t fast_lookup_bits = basisu::cHuffmanFastLookupBits) { if (!total_syms) { @@ -127,8 +133,10 @@ namespace basist m_code_sizes.resize(total_syms); memcpy(&m_code_sizes[0], pCode_sizes, total_syms); + const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; + m_lookup.resize(0); - m_lookup.resize(basisu::cHuffmanFastLookupSize); + m_lookup.resize(huffman_fast_lookup_size); m_tree.resize(0); m_tree.resize(total_syms * 2); @@ -166,10 +174,10 @@ namespace basist for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); - if (code_size <= basisu::cHuffmanFastLookupBits) + if (code_size <= fast_lookup_bits) { uint32_t k = (code_size << 16) | sym_index; - while (rev_code < basisu::cHuffmanFastLookupSize) + while (rev_code < huffman_fast_lookup_size) { if (m_lookup[rev_code] != 0) { @@ -184,9 +192,9 @@ namespace basist } int tree_cur; - if (0 == (tree_cur = m_lookup[rev_code & (basisu::cHuffmanFastLookupSize - 1)])) + if (0 == (tree_cur = m_lookup[rev_code & (huffman_fast_lookup_size - 1)])) { - const uint32_t idx = rev_code & (basisu::cHuffmanFastLookupSize - 1); + const uint32_t idx = rev_code & (huffman_fast_lookup_size - 1); if (m_lookup[idx] != 0) { // Supplied codesizes can't create a valid prefix code. @@ -204,9 +212,9 @@ namespace basist return false; } - rev_code >>= (basisu::cHuffmanFastLookupBits - 1); + rev_code >>= (fast_lookup_bits - 1); - for (int j = code_size; j > (basisu::cHuffmanFastLookupBits + 1); j--) + for (int j = code_size; j > ((int)fast_lookup_bits + 1); j--) { tree_cur -= ((rev_code >>= 1) & 1); @@ -254,6 +262,8 @@ namespace basist } const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; } + const basisu::int_vec get_lookup() const { return m_lookup; } + const basisu::int16_vec get_tree() const { return m_tree; } bool is_valid() const { return m_code_sizes.size() > 0; } @@ -430,9 +440,11 @@ namespace basist return v; } - inline uint32_t decode_huffman(const huffman_decoding_table &ct) + inline uint32_t decode_huffman(const huffman_decoding_table &ct, int fast_lookup_bits = basisu::cHuffmanFastLookupBits) { assert(ct.m_code_sizes.size()); + + const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; while (m_bit_buf_size < 16) { @@ -448,14 +460,14 @@ namespace basist int code_len; int sym; - if ((sym = ct.m_lookup[m_bit_buf & (basisu::cHuffmanFastLookupSize - 1)]) >= 0) + if ((sym = ct.m_lookup[m_bit_buf & (huffman_fast_lookup_size - 1)]) >= 0) { code_len = sym >> 16; sym &= 0xFFFF; } else { - code_len = basisu::cHuffmanFastLookupBits; + code_len = fast_lookup_bits; do { sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1 @@ -635,6 +647,11 @@ namespace basist return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } + enum eNoClamp + { + cNoClamp = 0 + }; + struct color32 { union @@ -655,21 +672,33 @@ namespace basist color32() { } color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); } + void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); } + void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); } uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } bool operator== (const color32&rhs) const { return m == rhs.m; } + + static color32 comp_min(const color32& a, const color32& b) { return color32(cNoClamp, basisu::minimum(a[0], b[0]), basisu::minimum(a[1], b[1]), basisu::minimum(a[2], b[2]), basisu::minimum(a[3], b[3])); } + static color32 comp_max(const color32& a, const color32& b) { return color32(cNoClamp, basisu::maximum(a[0], b[0]), basisu::maximum(a[1], b[1]), basisu::maximum(a[2], b[2]), basisu::maximum(a[3], b[3])); } }; struct endpoint { color32 m_color5; uint8_t m_inten5; + bool operator== (const endpoint& rhs) const + { + return (m_color5.r == rhs.m_color5.r) && (m_color5.g == rhs.m_color5.g) && (m_color5.b == rhs.m_color5.b) && (m_inten5 == rhs.m_inten5); + } + bool operator!= (const endpoint& rhs) const { return !(*this == rhs); } }; struct selector @@ -682,6 +711,17 @@ namespace basist uint8_t m_lo_selector, m_hi_selector; uint8_t m_num_unique_selectors; + bool operator== (const selector& rhs) const + { + return (m_selectors[0] == rhs.m_selectors[0]) && + (m_selectors[1] == rhs.m_selectors[1]) && + (m_selectors[2] == rhs.m_selectors[2]) && + (m_selectors[3] == rhs.m_selectors[3]); + } + bool operator!= (const selector& rhs) const + { + return !(*this == rhs); + } void init_flags() { diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_astc.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_astc.inc index 7f38f4a863..cd634c0df5 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_astc.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_astc.inc @@ -478,4 +478,4 @@ {31,1,10801},{47,1,12162},{14,1,6117},{14,1,6117},{8,1,50},{20,1,7322},{0,1,1241},{21,1,914},{21,1,914},{21,1,914},{7,1,274},{35,5,1513},{9,1,585},{9,1,585},{26,1,0},{27,1,1513},{26,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{47,0,9250},{47,0,9250},{47,0,9250},{47,0,9250},{12,1,3690}, {12,1,3690},{12,1,3690},{8,1,50},{0,1,1241},{0,1,1241},{45,1,65535},{14,1,33274},{42,1,19608},{42,1,13375},{47,1,62627},{42,1,22211},{10,1,6045},{24,1,138},{36,1,39015},{0,1,1732},{35,1,1048},{5,1,766},{5,1,666},{37,1,212},{3,3,1473},{7,1,675},{23,1,410},{14,1,1},{3,3,1473},{14,1,1},{13,1,14121},{13,1,14121},{13,1,14121},{45,1,10571},{45,1,11434},{30,1,6081},{30,1,6081}, {40,1,137},{36,1,6926},{2,1,1445},{5,1,666},{5,1,666},{5,1,666},{37,1,212},{35,3,1105},{23,1,410},{23,1,410},{14,1,1},{25,1,1105},{14,1,1},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{1,1,0},{0,1,0},{1,1,0},{0,1,0},{15,0,9256},{15,0,9256},{15,0,9256},{15,0,9256},{14,1,3985},{14,1,3985},{14,1,3985},{40,1,137},{2,1,1445}, -{2,1,1445},
\ No newline at end of file +{2,1,1445}, diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_astc_0_255.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_astc_0_255.inc index 5e7a75396d..da4e7fee98 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_astc_0_255.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_astc_0_255.inc @@ -478,4 +478,4 @@ {137,255,10742},{135,255,12066},{107,255,6089},{107,255,6089},{67,255,45},{37,255,7233},{1,255,1184},{218,255,900},{218,255,900},{218,255,900},{204,255,272},{255,167,1513},{189,255,562},{189,255,562},{86,255,0},{253,213,1513},{86,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{132,0,9248},{132,0,9248},{132,0,9248},{132,0,9248},{98,255,3656}, {98,255,3656},{98,255,3656},{67,255,45},{1,255,1184},{1,255,1184},{138,255,65535},{107,255,33448},{95,255,19729},{89,255,13446},{135,255,62717},{95,255,22307},{79,255,6021},{73,255,105},{40,255,38959},{0,254,1627},{230,255,996},{224,255,756},{221,255,653},{213,255,194},{255,204,1473},{207,255,675},{198,255,405},{110,255,0},{255,230,1473},{110,255,0},{162,255,14060},{162,255,14060},{162,255,14060},{146,255,10545},{141,255,11378},{116,255,6077},{116,255,6077}, {76,255,137},{40,255,6873},{7,255,1412},{221,255,653},{221,255,653},{221,255,653},{213,255,194},{255,180,1105},{198,255,405},{198,255,405},{110,255,0},{255,218,1105},{110,255,0},{255,252,0},{255,254,0},{254,255,0},{252,255,0},{255,252,0},{255,254,0},{252,255,0},{0,255,0},{255,254,0},{0,255,0},{140,0,9248},{140,0,9248},{140,0,9248},{140,0,9248},{107,255,3929},{107,255,3929},{107,255,3929},{76,255,137},{7,255,1412}, -{7,255,1412},
\ No newline at end of file +{7,255,1412}, diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_atc_55.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_atc_55.inc index 61f7476efc..7acedd6a6f 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_atc_55.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_atc_55.inc @@ -478,4 +478,4 @@ {17,31,11312},{16,31,11037},{13,31,6429},{13,31,6429},{8,31,260},{6,31,10457},{0,31,2642},{26,31,872},{26,31,872},{26,31,872},{25,31,397},{31,22,1513},{23,31,794},{23,31,794},{13,31,1},{29,27,1513},{13,31,1},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,31,3074}, {12,31,3074},{12,31,3074},{8,31,260},{0,31,2642},{0,31,2642},{17,31,58848},{15,31,39619},{13,31,24975},{12,31,19007},{16,31,54474},{13,31,27057},{10,31,8569},{9,31,461},{8,31,51302},{0,31,5046},{28,31,979},{27,31,806},{27,31,637},{26,31,292},{31,26,1473},{26,31,953},{24,31,605},{16,31,0},{29,29,1473},{16,31,0},{19,31,13604},{19,31,13604},{19,31,13604},{18,31,11057},{16,31,10429},{14,31,6339},{14,31,6339}, {10,31,424},{8,31,9713},{1,31,2900},{27,31,637},{27,31,637},{27,31,637},{26,31,292},{30,25,1105},{24,31,605},{24,31,605},{16,31,0},{30,27,1105},{16,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{12,31,3330},{12,31,3330},{12,31,3330},{10,31,424},{1,31,2900}, -{1,31,2900},
\ No newline at end of file +{1,31,2900}, diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_atc_56.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_atc_56.inc index f57a232a85..2b56c0944c 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_atc_56.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_atc_56.inc @@ -478,4 +478,4 @@ {17,63,11312},{16,63,11037},{13,63,6429},{13,63,6429},{8,63,260},{6,63,10457},{0,63,2642},{26,63,872},{26,63,872},{26,63,872},{25,63,397},{31,45,1513},{23,63,794},{23,63,794},{13,63,1},{31,52,1513},{13,63,1},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,63,3074}, {12,63,3074},{12,63,3074},{8,63,260},{0,63,2642},{0,63,2642},{17,63,58848},{15,63,39619},{13,63,24975},{12,63,19007},{16,63,54474},{13,63,27057},{10,63,8569},{9,63,461},{8,63,51302},{0,63,5046},{28,63,979},{27,63,806},{27,63,637},{26,63,292},{30,56,1473},{26,63,953},{24,63,605},{16,63,0},{30,58,1473},{16,63,0},{19,63,13604},{19,63,13604},{19,63,13604},{18,63,11057},{16,63,10429},{14,63,6339},{14,63,6339}, {10,63,424},{8,63,9713},{1,63,2900},{27,63,637},{27,63,637},{27,63,637},{26,63,292},{31,48,1105},{24,63,605},{24,63,605},{16,63,0},{31,54,1105},{16,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{31,63,0},{0,63,0},{31,63,0},{0,63,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{12,63,3330},{12,63,3330},{12,63,3330},{10,63,424},{1,63,2900}, -{1,63,2900},
\ No newline at end of file +{1,63,2900}, diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc index 433b126a71..6669852923 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m5_alpha.inc @@ -46,4 +46,4 @@ {76,0,3},{255,1,27},{255,7,24},{255,1,27},{179,39,8},{255,22,16},{85,0,3},{255,2,27},{255,22,24},{255,7,27},{187,47,8},{255,47,16},{93,0,3},{255,4,27},{251,100,28},{182,0,7},{195,55,8},{255,71,16},{101,0,3},{255,4,27},{253,108,28},{191,0,7},{203,63,8},{255,95,16},{109,0,3},{255,7,27},{255,118,28},{200,0,7},{212,72,8},{255,123,16},{118,0,3},{246,0,7}, {255,129,28},{209,0,7},{220,80,8},{255,147,16},{126,0,3},{246,0,7},{255,138,28},{218,0,7},{228,88,8},{255,172,16},{134,0,3},{249,3,7},{245,91,8},{228,3,7},{236,96,8},{255,196,16},{142,6,3},{251,14,7},{250,102,8},{237,12,7},{245,105,8},{255,223,16},{151,15,3},{253,22,7},{254,112,8},{245,20,7},{253,113,8},{255,248,16},{159,23,3},{253,31,7},{255,124,8},{249,28,7}, {255,124,8},{255,0,0},{167,31,3},{254,39,7},{255,10,4},{252,37,7},{255,10,4},{255,0,0},{175,39,3},{255,48,7},{255,38,4},{254,48,7},{255,38,4},{255,0,0},{184,48,3},{255,56,7},{255,62,4},{255,56,7},{255,62,4},{255,0,0},{192,56,3},{255,65,7},{255,86,4},{255,65,7},{255,86,4},{255,0,0},{200,64,3},{255,74,7},{255,111,4},{255,77,7},{255,111,4},{255,0,0}, -{208,5,2},
\ No newline at end of file +{208,5,2}, diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m5_color.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m5_color.inc index 357b14b7a1..c0780988d8 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m5_color.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m5_color.inc @@ -478,4 +478,4 @@ {70,127,10779},{68,127,12146},{54,127,6176},{54,127,6176},{34,127,52},{14,127,7281},{2,127,1213},{109,127,937},{109,127,937},{109,127,937},{102,127,281},{127,84,1513},{93,127,565},{93,127,565},{43,127,0},{127,106,1513},{43,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{65,0,9250},{65,0,9250},{65,0,9250},{65,0,9250},{49,127,3656}, {49,127,3656},{49,127,3656},{34,127,52},{2,127,1213},{2,127,1213},{71,127,63180},{60,127,37225},{52,127,26137},{48,127,18128},{68,127,59595},{51,127,22636},{42,127,8480},{37,127,164},{22,127,37455},{0,126,2073},{114,127,1019},{111,127,766},{111,127,666},{105,127,205},{127,102,1473},{102,127,681},{99,127,405},{56,127,0},{127,115,1473},{56,127,0},{79,127,14066},{79,127,14066},{79,127,14066},{73,127,10571},{71,127,11450},{59,127,6166},{59,127,6166}, {37,127,148},{25,127,6914},{5,127,1413},{111,127,666},{111,127,666},{111,127,666},{105,127,205},{127,90,1105},{99,127,405},{99,127,405},{56,127,0},{127,109,1105},{56,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{127,127,0},{0,127,0},{127,127,0},{0,127,0},{69,0,9250},{69,0,9250},{69,0,9250},{69,0,9250},{52,127,3940},{52,127,3940},{52,127,3940},{37,127,148},{5,127,1413}, -{5,127,1413},
\ No newline at end of file +{5,127,1413}, diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m6.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m6.inc deleted file mode 100644 index 6b814e6132..0000000000 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_bc7_m6.inc +++ /dev/null @@ -1,4383 +0,0 @@ -// Copyright (C) 2017-2019 Binomial LLC. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -static const uint32_t g_etc1_to_bc7_m6_table0[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x80000,0x80000,0x1,0x40000,0x40000,0x40000,0x80000,0x80000,0x1,0x80000,0x80000,0x1,0x1,0x40000,0x40000,0x40000,0x80000,0x80000,0x1,0x80000,0x80000,0x1,0x1,0x80000, -0x80000,0x1,0x1,0x1,0x40000,0x40000,0x40000,0x40000,0x40000,0x80000,0x80000,0x80000,0x40000,0x40000,0x80000,0x1,0x80000,0x100000,0x180000,0x2C0000,0x6000001,0x180000,0x2C0000,0x6000001,0x2C0000,0x6000001,0x6000001,0x180000,0x2C0000,0x6000001,0x2C0000,0x6000001, -0x6000001,0x2C0000,0x6000001,0x6000001,0x6000001,0x180000,0x2C0000,0x6000001,0x2C0000,0x6000001,0x6000001,0x2C0000,0x6000001,0x6000001,0x6000001,0x2C0000,0x6000001,0x6000001,0x6000001,0x6000001,0x140000,0x100000,0x100000,0x180000,0x240000,0x440000,0x6000001,0x6000001,0x140000,0x1C0000,0xD40000,0x6000001, -0x200000,0x4002C,0x16000004,0xA000005,0x6000005,0xE000012,0x8000005,0x6000001,0x6000012,0x400000A,0x4000012,0x8000023,0x600000D,0x6000005,0x6000016,0x400000E,0x4000016,0x4000023,0x4000013,0x400001B,0x2000023,0x4002C,0x6000011,0x6000009,0x600001A,0x4000012,0x400001A,0x2040027,0x4000017,0x200001F,0x2000024,0x8002C, -0x400001C,0x2000022,0x2000027,0x200002C,0x1600000B,0x48000012,0x78000004,0xE00000C,0xA000009,0x600000C,0x6000009,0x4000011,0x1000000F,0xA000012,0x4000013,0x200001F,0x8002C,0x80024,0x12040004,0xA040004,0x6000005,0xE000012,0x8000005,0x6000001,0x6000012,0x400000A,0x4000012,0x80023,0x600000D,0x6000005,0x6000016,0x400000E, -0x4000016,0xC0023,0x4000013,0x400001B,0x2000023,0x80023,0x600000D,0x6000005,0x6000016,0x400000E,0x4000016,0xC0023,0x4000013,0x400001B,0x2000023,0xC0023,0x4000013,0x400001B,0x2000023,0x2000023,0x1600000B,0x48000012,0x5A040004,0xE00000C,0xA000009,0x600000C,0x6000009,0x4000011,0x1000000E,0xA000011,0x4000013,0x400001B, -0xC0023,0x4,0x4,0x4,0x4,0x4000000,0x4000000,0x4000000,0x2000000,0x2000000,0x1,0x2000002,0x2000002,0x2000002,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2000003,0x2000003,0x2000003,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x3, -0x3,0x3,0x3,0x3,0x4000001,0x18000000,0x4,0x2000001,0x4000001,0x1,0x1,0x1,0x2000001,0x4000001,0x2,0x2,0x3,0x4,0x4,0x4,0x4,0x4000000,0x4000000,0x4000000,0x2000000,0x2000000,0x1,0x2000002,0x2000002,0x2000002,0x1,0x1, -0x1,0x2,0x2,0x2,0x2,0x2000002,0x2000002,0x2000002,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x2,0x2,0x2,0x2,0x4000001,0x18000000,0x4,0x2000001,0x4000001,0x1,0x1,0x1,0x2000001,0x4000001,0x2,0x2, -0x2,0x80014,0x12040000,0xA040000,0x6040001,0x20C0012,0x8000005,0x6000001,0x180012,0x400000A,0x4000012,0x20C0012,0x8000005,0x6000001,0x180012,0x400000A,0x4000012,0x180012,0x400000A,0x4000012,0x4000012,0x20C0012,0x8000005,0x6000001,0x180012,0x400000A,0x4000012,0x180012,0x400000A,0x4000012,0x4000012,0x180012, -0x400000A,0x4000012,0x4000012,0x4000012,0x1C000008,0xC080012,0x5A040000,0xA040008,0xA000005,0x6000008,0x6000005,0x600000A,0x14000008,0xC000008,0x6000005,0x4000012,0x140012,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table1[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0x100000,0x100000,0x100000,0x100000,0x100000, -0x100000,0x200000,0x200000,0x200000,0x4000001,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x200000,0x200000,0x200000,0x4000001,0x200000,0x200000,0x200000,0x4000001,0x4000001,0xC0000,0xC0000,0xC0000,0x20C0000,0x40C0000,0x100000,0x100000,0x140000,0x20C0000,0x40C0000,0x180000,0x200000, -0x180000,0x140000,0x140000,0x140000,0x140000,0x1C0000,0x1C0000,0x1C0000,0x380000,0x380000,0x8000001,0x1C0000,0x1C0000,0x1C0000,0x380000,0x380000,0x8000001,0x380000,0x380000,0x8000001,0x8000001,0x1C0000,0x1C0000,0x1C0000,0x380000,0x380000,0x8000001,0x380000,0x380000,0x8000001,0x8000001,0x380000, -0x380000,0x8000001,0x8000001,0x8000001,0x180000,0x2140000,0x140000,0x2180000,0x200000,0x280000,0x2C0000,0x440000,0x180000,0x1C0000,0x280000,0x8000001,0x280000,0x200000,0x300000,0x5C0000,0xE000001,0x300000,0x5C0000,0xE000001,0x5C0000,0xE000001,0xE000001,0x300000,0x5C0000,0xE000001,0x5C0000,0xE000001, -0xE000001,0x5C0000,0xE000001,0xE000001,0xE000001,0x300000,0x5C0000,0xE000001,0x5C0000,0xE000001,0xE000001,0x5C0000,0xE000001,0xE000001,0xE000001,0x5C0000,0xE000001,0xE000001,0xE000001,0xE000001,0x280000,0x8200000,0x8200000,0x340000,0x4C0000,0x940000,0xE000001,0xE000001,0x2C0000,0x3C0000,0x1D40000,0xE000001, -0x400000,0x100088,0x220C0034,0x140C0034,0xE0C0035,0x1E080026,0x14080015,0xE080019,0x10080026,0xE040016,0xC040026,0x20000033,0x16000009,0x1004000F,0x12000012,0xE000002,0xC000016,0x10000033,0xE000011,0xA00001B,0xA000033,0x180088,0x1200003D,0xE000034,0x12000036,0xE00001B,0xC000026,0xC00004B,0xC000023,0xA00002B,0xA000043,0x2C0088, -0xA000054,0xA00004C,0x800005F,0x600008C,0x48000002,0x8C080026,0x9E0C0034,0x26000001,0x18000002,0x12000002,0x10000002,0xE000002,0x3200000E,0x1E000005,0x1000000B,0xA00002B,0x200088,0x140034,0x1E100008,0x12100009,0xE100009,0x1A0C0012,0x120C0001,0x100C0001,0x100C0012,0xE0C0005,0xC0C0012,0x200033,0x14040009,0xE080009,0x12000012,0xE000002, -0xC040012,0x3C0033,0xE000011,0xA00001B,0xA000033,0x200033,0x14040009,0xE080009,0x12000012,0xE000002,0xC040012,0x3C0033,0xE000011,0xA00001B,0xA000033,0x3C0033,0xE000011,0xA00001B,0xA000033,0xA000033,0x44040001,0x6E0C0012,0x80100008,0x20040001,0x18000002,0x12000002,0xE040002,0xE000002,0x32000005,0x1E000001,0x1000000A,0xA00001B, -0x2C0033,0xC0034,0xC0034,0xC0034,0xC0034,0x14080014,0x14080014,0x14080014,0xC080014,0xC080014,0x8040015,0x16000008,0x16000008,0x16000008,0xE000001,0xE000001,0xA000005,0xA00000A,0xA00000A,0x8000001,0x600000A,0x20C0033,0x20C0033,0x20C0033,0xC000015,0xC000015,0x8000013,0x8000019,0x8000019,0x800000A,0x600000E,0x180033, -0x180033,0x6000023,0x4000023,0x4000033,0x48000001,0x5C080014,0xC0034,0x24000001,0x18000001,0x12000001,0x10000001,0xC000001,0x24000009,0x1C000004,0xE000009,0x800000A,0x140033,0x100008,0x100008,0x100008,0x100008,0x100C0000,0x100C0000,0x100C0000,0xA0C0001,0xA0C0001,0x80C0001,0x180008,0x180008,0x180008,0xA080001,0xA080001, -0x8080001,0x2C0008,0x2C0008,0x8000001,0x600000A,0x180008,0x180008,0x180008,0xA080001,0xA080001,0x8080001,0x2C0008,0x2C0008,0x8000001,0x600000A,0x2C0008,0x2C0008,0x8000001,0x600000A,0x600000A,0x3A040000,0x3E0C0000,0x100008,0x1E040000,0x14040000,0x10040000,0xC080001,0xC040000,0x26040001,0x1C000000,0x200008,0x8000001, -0x200008,0x180014,0x1A140000,0x12140000,0xE140001,0x2240012,0x120C0001,0xE100001,0x4C0012,0xE000001,0xC000012,0x2240012,0x120C0001,0xE100001,0x4C0012,0xE000001,0xC000012,0x4C0012,0xE000001,0xC000012,0xC000012,0x2240012,0x120C0001,0xE100001,0x4C0012,0xE000001,0xC000012,0x4C0012,0xE000001,0xC000012,0xC000012,0x4C0012, -0xE000001,0xC000012,0xC000012,0xC000012,0x44040001,0x1C0012,0x62140000,0x26000000,0x18000001,0x12000001,0xE040001,0xE000002,0x36000002,0x1E000001,0x10040000,0xC000012,0x340012,0x40014,0x40014,0x40014,0x40014,0x40014,0x40014,0x40014,0x40014,0x40014,0x40014,0x10000000,0x10000000,0x10000000,0x10000000,0x10000000, -0x10000000,0x8000000,0x8000000,0x8000000,0x4000001,0x80012,0x80012,0x80012,0x80012,0x80012,0x80012,0x6000005,0x6000005,0x6000005,0x4000005,0xC0012,0xC0012,0xC0012,0x400000A,0x2000012,0x58000000,0x40014,0x40014,0x28000000,0x1C000000,0x14000000,0x14000000,0xE000000,0x20000005,0x16000002,0xA000001,0x6000005, -0xC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table2[] = { -0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x2C0000, -0x2C0000,0x2C0000,0x2C0000,0x6000001,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x180000,0x200000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x280000,0x280000,0x280000,0x280000,0x280000, -0x280000,0x500000,0x500000,0x500000,0xC000001,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x500000,0x500000,0x500000,0xC000001,0x500000,0x500000,0x500000,0xC000001,0xC000001,0x61C0000,0x1C0000,0x1C0000,0x200000,0x4200000,0x2240000,0x2240000,0x22C0000,0x200000,0x4200000,0x380000,0x500000, -0x380000,0x240000,0x240000,0x240000,0x240000,0x340000,0x340000,0x340000,0x680000,0x680000,0x10000001,0x340000,0x340000,0x340000,0x680000,0x680000,0x10000001,0x680000,0x680000,0x10000001,0x10000001,0x340000,0x340000,0x340000,0x680000,0x680000,0x10000001,0x680000,0x680000,0x10000001,0x10000001,0x680000, -0x680000,0x10000001,0x10000001,0x10000001,0x2280000,0xA240000,0x240000,0x300000,0x3C0000,0x4C0000,0x540000,0x800000,0x2C0000,0x340000,0x4C0000,0x10000001,0x4C0000,0x300000,0x2440000,0x8C0000,0x16000001,0x2440000,0x8C0000,0x16000001,0x8C0000,0x16000001,0x16000001,0x2440000,0x8C0000,0x16000001,0x8C0000,0x16000001, -0x16000001,0x8C0000,0x16000001,0x16000001,0x16000001,0x2440000,0x8C0000,0x16000001,0x8C0000,0x16000001,0x16000001,0x8C0000,0x16000001,0x16000001,0x16000001,0x8C0000,0x16000001,0x16000001,0x16000001,0x16000001,0x3C0000,0x340000,0x340000,0x500000,0x740000,0xE00000,0x16000001,0x16000001,0x400000,0x580000,0x9E40000,0x16000001, -0x640000,0x200088,0x2A1C0034,0x1C1C0034,0x161C0035,0x26180026,0x1C180015,0x16180019,0x18180026,0x16140016,0x14140026,0x28100033,0x1E100009,0x1814000F,0x1A100012,0x16100002,0x14100016,0x18100033,0x160C000F,0x140C001A,0x12100033,0x300088,0x22040033,0x16100034,0x1C040026,0x18080013,0x140C0024,0x1A000035,0x1604000A,0x14000013,0x12040033,0x5C0088, -0x1600003C,0x12000034,0x10000044,0xE00008C,0x50100002,0x94180026,0xA61C0034,0x2E100001,0x20100002,0x1A100002,0x18100002,0x16100002,0x4A080001,0x2A0C0001,0x180C0009,0x14000013,0x400088,0x240034,0x26200008,0x1A200009,0x16200009,0x221C0012,0x1A1C0001,0x181C0001,0x181C0012,0x161C0005,0x141C0012,0x380033,0x1C140009,0x16180009,0x1A100012,0x16100002, -0x14140012,0x700033,0x16040009,0x14000012,0x12000033,0x380033,0x1C140009,0x16180009,0x1A100012,0x16100002,0x14140012,0x700033,0x16040009,0x14000012,0x12000033,0x700033,0x16040009,0x14000012,0x12000033,0x12000033,0x4C140001,0x761C0012,0x88200008,0x28140001,0x20100002,0x1A100002,0x16140002,0x160C0002,0x4A080001,0x26100001,0x180C0008,0x14000012, -0x500033,0x1C0034,0x1C0034,0x1C0034,0x1C0034,0x1C180014,0x1C180014,0x1C180014,0x14180014,0x14180014,0x10140015,0x1E100008,0x1E100008,0x1E100008,0x16100001,0x16100001,0x12100005,0x1210000A,0x1210000A,0x10100001,0xE10000A,0x2240033,0x2240033,0x2240033,0x18080012,0x18080012,0x10100013,0x16040009,0x16040009,0x10080002,0xE08000A,0x4C0033, -0x4C0033,0x10000013,0xE00000E,0xC000033,0x50100001,0x64180014,0x1C0034,0x2C100001,0x20100001,0x1A100001,0x18100001,0x14100001,0x48080000,0x2A0C0000,0x16100009,0x10080002,0x340033,0x200008,0x200008,0x200008,0x200008,0x181C0000,0x181C0000,0x181C0000,0x121C0001,0x121C0001,0x101C0001,0x300008,0x300008,0x300008,0x12180001,0x12180001, -0x10180001,0x5C0008,0x5C0008,0x10100001,0xE00000A,0x300008,0x300008,0x300008,0x12180001,0x12180001,0x10180001,0x5C0008,0x5C0008,0x10100001,0xE00000A,0x5C0008,0x5C0008,0x10100001,0xE00000A,0xE00000A,0x42140000,0x461C0000,0x200008,0x26140000,0x1C140000,0x18140000,0x14180001,0x14140000,0x3E0C0000,0x24100000,0x400008,0x10100001, -0x400008,0x280014,0x22240000,0x1A240000,0x16240001,0x23C0012,0x1A1C0001,0x16200001,0x7C0012,0x16100001,0x14000012,0x23C0012,0x1A1C0001,0x16200001,0x7C0012,0x16100001,0x14000012,0x7C0012,0x16100001,0x14000012,0x14000012,0x23C0012,0x1A1C0001,0x16200001,0x7C0012,0x16100001,0x14000012,0x7C0012,0x16100001,0x14000012,0x14000012,0x7C0012, -0x16100001,0x14000012,0x14000012,0x14000012,0x5C0C0000,0x2C0012,0x6A240000,0x2E100000,0x20100001,0x1C0C0000,0x16140001,0x16080001,0x52040000,0x2E080000,0x18140000,0x14000012,0x580012,0x140014,0x140014,0x140014,0x140014,0x140014,0x140014,0x140014,0x140014,0x140014,0x140014,0x18100000,0x18100000,0x18100000,0x18100000,0x18100000, -0x18100000,0x10100000,0x10100000,0x10100000,0xC100001,0x200012,0x200012,0x200012,0x200012,0x200012,0x200012,0x10080001,0x10080001,0x10080001,0xC0C0001,0x3C0012,0x3C0012,0x3C0012,0xC000002,0xA000012,0x60100000,0x140014,0x140014,0x30100000,0x24100000,0x1C100000,0x1C100000,0x16100000,0x48080000,0x2A0C0000,0x12100001,0x10080001, -0x2C0012,}; -static const uint32_t g_etc1_to_bc7_m6_table3[] = { -0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x5C0000, -0x5C0000,0x5C0000,0x5C0000,0xE000001,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x8200000,0x8200000,0x8200000,0x300000,0x400000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x400000,0x400000,0x400000,0x400000,0x400000, -0x400000,0x800000,0x800000,0x800000,0x14000001,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x800000,0x800000,0x800000,0x14000001,0x800000,0x800000,0x800000,0x14000001,0x14000001,0xE2C0000,0x2C0000,0x2C0000,0x340000,0x4340000,0x3C0000,0x3C0000,0x480000,0x340000,0x4340000,0x5C0000,0x800000, -0x5C0000,0x340000,0x340000,0x340000,0x340000,0x4C0000,0x4C0000,0x4C0000,0x980000,0x980000,0x18000001,0x4C0000,0x4C0000,0x4C0000,0x980000,0x980000,0x18000001,0x980000,0x980000,0x18000001,0x18000001,0x4C0000,0x4C0000,0x4C0000,0x980000,0x980000,0x18000001,0x980000,0x980000,0x18000001,0x18000001,0x980000, -0x980000,0x18000001,0x18000001,0x18000001,0x3C0000,0x380000,0x340000,0x2440000,0x2540000,0x6C0000,0x7C0000,0xBC0000,0x400000,0x4C0000,0x6C0000,0x18000001,0x6C0000,0x400000,0x25C0000,0xBC0000,0x1E000001,0x25C0000,0xBC0000,0x1E000001,0xBC0000,0x1E000001,0x1E000001,0x25C0000,0xBC0000,0x1E000001,0xBC0000,0x1E000001, -0x1E000001,0xBC0000,0x1E000001,0x1E000001,0x1E000001,0x25C0000,0xBC0000,0x1E000001,0xBC0000,0x1E000001,0x1E000001,0xBC0000,0x1E000001,0x1E000001,0x1E000001,0xBC0000,0x1E000001,0x1E000001,0x1E000001,0x1E000001,0x500000,0x440000,0x440000,0x6C0000,0x9C0000,0x1300000,0x1E000001,0x1E000001,0x2540000,0x780000,0x11F40000,0x1E000001, -0x880000,0x300088,0x322C0034,0x242C0034,0x1E2C0035,0x2E280026,0x24280015,0x1E280019,0x20280026,0x1E240016,0x1C240026,0x30200033,0x26200009,0x2024000F,0x22200012,0x1E200002,0x1C200016,0x20200033,0x1E1C000F,0x1C1C001A,0x1A200033,0x2440088,0x2A140033,0x1E200034,0x24140026,0x20180013,0x1C1C0024,0x240C0033,0x1E14000A,0x1C100013,0x1A140033,0x8C0088, -0x1E000034,0x1C000024,0x1A000037,0x1600008C,0x58200002,0x9C280026,0xAE2C0034,0x36200001,0x28200002,0x22200002,0x20200002,0x1E200002,0x52180001,0x321C0001,0x201C0009,0x1C100013,0x640088,0x340034,0x2E300008,0x22300009,0x1E300009,0x2A2C0012,0x222C0001,0x202C0001,0x202C0012,0x1E2C0005,0x1C2C0012,0x500033,0x24240009,0x1E280009,0x22200012,0x1E200002, -0x1C240012,0xA00033,0x1E140009,0x1C100012,0x1A000033,0x500033,0x24240009,0x1E280009,0x22200012,0x1E200002,0x1C240012,0xA00033,0x1E140009,0x1C100012,0x1A000033,0xA00033,0x1E140009,0x1C100012,0x1A000033,0x1A000033,0x54240001,0x7E2C0012,0x90300008,0x30240001,0x28200002,0x22200002,0x1E240002,0x1E1C0002,0x52180001,0x2E200001,0x201C0008,0x1C100012, -0x700033,0x2C0034,0x2C0034,0x2C0034,0x2C0034,0x24280014,0x24280014,0x24280014,0x1C280014,0x1C280014,0x18240015,0x26200008,0x26200008,0x26200008,0x1E200001,0x1E200001,0x1A200005,0x1A20000A,0x1A20000A,0x18200001,0x1620000A,0x23C0033,0x23C0033,0x23C0033,0x20180012,0x20180012,0x18200013,0x1E140009,0x1E140009,0x18180002,0x1618000A,0x7C0033, -0x7C0033,0x180C0013,0x1604000A,0x14000033,0x58200001,0x6C280014,0x2C0034,0x34200001,0x28200001,0x22200001,0x20200001,0x1C200001,0x50180000,0x321C0000,0x1E200009,0x18180002,0x580033,0x300008,0x300008,0x300008,0x300008,0x202C0000,0x202C0000,0x202C0000,0x1A2C0001,0x1A2C0001,0x182C0001,0x2440008,0x2440008,0x2440008,0x1A280001,0x1A280001, -0x18280001,0x8C0008,0x8C0008,0x18200001,0x1600000A,0x2440008,0x2440008,0x2440008,0x1A280001,0x1A280001,0x18280001,0x8C0008,0x8C0008,0x18200001,0x1600000A,0x8C0008,0x8C0008,0x18200001,0x1600000A,0x1600000A,0x4A240000,0x4E2C0000,0x300008,0x2E240000,0x24240000,0x20240000,0x1C280001,0x1C240000,0x461C0000,0x2C200000,0x640008,0x18200001, -0x640008,0x380014,0x2A340000,0x22340000,0x1E340001,0x540012,0x222C0001,0x1E300001,0xAC0012,0x1E200001,0x1C000012,0x540012,0x222C0001,0x1E300001,0xAC0012,0x1E200001,0x1C000012,0xAC0012,0x1E200001,0x1C000012,0x1C000012,0x540012,0x222C0001,0x1E300001,0xAC0012,0x1E200001,0x1C000012,0xAC0012,0x1E200001,0x1C000012,0x1C000012,0xAC0012, -0x1E200001,0x1C000012,0x1C000012,0x1C000012,0x641C0000,0x63C0012,0x72340000,0x36200000,0x28200001,0x241C0000,0x1E240001,0x1E180001,0x5A140000,0x36180000,0x20240000,0x1C000012,0x780012,0x240014,0x240014,0x240014,0x240014,0x240014,0x240014,0x240014,0x240014,0x240014,0x240014,0x20200000,0x20200000,0x20200000,0x20200000,0x20200000, -0x20200000,0x18200000,0x18200000,0x18200000,0x14200001,0x380012,0x380012,0x380012,0x380012,0x380012,0x380012,0x18180001,0x18180001,0x18180001,0x141C0001,0x700012,0x700012,0x700012,0x140C0001,0x12000012,0x68200000,0x240014,0x240014,0x38200000,0x2C200000,0x24200000,0x24200000,0x1E200000,0x50180000,0x321C0000,0x1A200001,0x18180001, -0x500012,}; -static const uint32_t g_etc1_to_bc7_m6_table4[] = { -0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x940000, -0x940000,0x940000,0x940000,0x18000000,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x2340000,0x2340000,0x2340000,0x480000,0x680000,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000, -0x5C0000,0xB80000,0xB80000,0xB80000,0x1E000000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0xB80000,0xB80000,0xB80000,0x1E000000,0xB80000,0xB80000,0xB80000,0x1E000000,0x1E000000,0x8400000,0x3C0001,0x3C0001,0x480000,0x24C0000,0x540000,0x540000,0x680000,0x480000,0x24C0000,0x800000,0xB80000, -0x800000,0x440001,0x440001,0x440001,0x440001,0x680000,0x680000,0x680000,0xD00000,0xD00000,0x22000000,0x680000,0x680000,0x680000,0xD00000,0xD00000,0x22000000,0xD00000,0xD00000,0x22000000,0x22000000,0x680000,0x680000,0x680000,0xD00000,0xD00000,0x22000000,0xD00000,0xD00000,0x22000000,0x22000000,0xD00000, -0xD00000,0x22000000,0x22000000,0x22000000,0x500000,0xC480000,0x440001,0x25C0000,0x740000,0x940000,0xA80000,0x1000000,0x580000,0x680000,0x940000,0x22000000,0x940000,0x500001,0x780000,0xF40000,0x28000000,0x780000,0xF40000,0x28000000,0xF40000,0x28000000,0x28000000,0x780000,0xF40000,0x28000000,0xF40000,0x28000000, -0x28000000,0xF40000,0x28000000,0x28000000,0x28000000,0x780000,0xF40000,0x28000000,0xF40000,0x28000000,0x28000000,0xF40000,0x28000000,0x28000000,0x28000000,0xF40000,0x28000000,0x28000000,0x28000000,0x28000000,0x4640000,0x580000,0x580000,0x880000,0xC80000,0x1880000,0x28000000,0x28000000,0x700000,0x980000,0x1BE80000,0x28000000, -0xAC0000,0x40008C,0x3E3C0033,0x2E3C0033,0x283C0033,0x38380024,0x2E380013,0x2A38001A,0x2A380024,0x26380016,0x24380026,0x36340034,0x2E34000A,0x2A34000F,0x2C300013,0x28300002,0x24340016,0x28340034,0x2630000F,0x24300019,0x22300035,0x600088,0x32280033,0x28300033,0x2C280026,0x282C0012,0x24300026,0x2E1C0033,0x28200009,0x24240015,0x22280034,0xC40088, -0x280C0033,0x24140026,0x22080034,0x20000088,0x62340002,0xB4380024,0xC63C0033,0x3C340002,0x30340002,0x2A340002,0x28340002,0x28300002,0x5E280001,0x3E2C0001,0x28300009,0x24240015,0x8C0088,0x480033,0x3644000A,0x2C40000A,0x2840000A,0x30400013,0x2C3C0002,0x28400001,0x28400013,0x283C0005,0x243C0015,0x2680033,0x2E340009,0x2838000A,0x2C300012,0x28300001, -0x24380014,0xD80033,0x28200008,0x24240014,0x22000034,0x2680033,0x2E340009,0x2838000A,0x2C300012,0x28300001,0x24380014,0xD80033,0x28200008,0x24240014,0x22000034,0xD80033,0x28200008,0x24240014,0x22000034,0x22000034,0x62340001,0x78400013,0x8A44000A,0x3C340001,0x30340001,0x2A340001,0x28340001,0x28300001,0x5E280001,0x3A300001,0x28300009,0x24240014, -0x980033,0x3C0033,0x3C0033,0x3C0033,0x3C0033,0x30380012,0x30380012,0x30380012,0x26380012,0x26380012,0x22380012,0x2E340009,0x2E340009,0x2E340009,0x26340002,0x26340002,0x22340005,0x24300009,0x24300009,0x22300001,0x20300009,0x580033,0x580033,0x580033,0x282C0012,0x282C0012,0x22300012,0x26280009,0x26280009,0x22280001,0x20280009,0xB00033, -0xB00033,0x221C0012,0x20100008,0x1E000034,0x56340002,0x84380012,0x3C0033,0x3A340001,0x30340001,0x2A340001,0x28340002,0x26300002,0x5E280000,0x38300001,0x28300008,0x22280001,0x7C0033,0x40000A,0x40000A,0x40000A,0x40000A,0x28400001,0x28400001,0x28400001,0x243C0001,0x243C0001,0x223C0001,0x600008,0x600008,0x600008,0x24380001,0x24380001, -0x22380001,0xC40008,0xC40008,0x222C0000,0x20000008,0x600008,0x600008,0x600008,0x24380001,0x24380001,0x22380001,0xC40008,0xC40008,0x222C0000,0x20000008,0xC40008,0xC40008,0x222C0000,0x20000008,0x20000008,0x4A380000,0x48400001,0x40000A,0x3A340000,0x2C380000,0x28380000,0x26380000,0x24380001,0x4C300000,0x34340000,0x8C0008,0x222C0000, -0x8C0008,0x4C0012,0x32480001,0x2A480001,0x28440001,0x700012,0x2C3C0001,0x28400000,0xE40012,0x282C0000,0x24000014,0x700012,0x2C3C0001,0x28400000,0xE40012,0x282C0000,0x24000014,0xE40012,0x282C0000,0x24000014,0x24000014,0x700012,0x2C3C0001,0x28400000,0xE40012,0x282C0000,0x24000014,0xE40012,0x282C0000,0x24000014,0x24000014,0xE40012, -0x282C0000,0x24000014,0x24000014,0x24000014,0x722C0000,0x500012,0x6C480001,0x40300000,0x30340001,0x2C300000,0x28340000,0x28240000,0x66240000,0x3E2C0000,0x28380001,0x24000014,0xA00012,0x380012,0x380012,0x380012,0x380012,0x380012,0x380012,0x380012,0x380012,0x380012,0x380012,0x28340001,0x28340001,0x28340001,0x28340001,0x28340001, -0x28340001,0x20340001,0x20340001,0x20340001,0x1E300001,0x2500012,0x2500012,0x2500012,0x2500012,0x2500012,0x2500012,0x22280001,0x22280001,0x22280001,0x1E2C0000,0xA40012,0xA40012,0xA40012,0x1E180000,0x1A000014,0x62340001,0x380012,0x380012,0x3A340001,0x30340001,0x2A340001,0x2A340001,0x26340001,0x5E280000,0x402C0000,0x24300000,0x22280001, -0x740012,}; -static const uint32_t g_etc1_to_bc7_m6_table5[] = { -0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0xC40000, -0xC40000,0xC40000,0xC40000,0x20000000,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0xA440000,0xA440000,0xA440000,0x600000,0x8C0000,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x740000,0x740000,0x740000,0x740000,0x740000, -0x740000,0xE80000,0xE80000,0xE80000,0x26000000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0xE80000,0xE80000,0xE80000,0x26000000,0xE80000,0xE80000,0xE80000,0x26000000,0x26000000,0x540000,0x4C0001,0x4C0001,0x4580000,0x2600000,0x2680000,0x2680000,0x2800000,0x4580000,0x2600000,0xA40000,0xE80000, -0xA40000,0x540001,0x540001,0x540001,0x540001,0x800000,0x800000,0x800000,0x1000000,0x1000000,0x2A000000,0x800000,0x800000,0x800000,0x1000000,0x1000000,0x2A000000,0x1000000,0x1000000,0x2A000000,0x2A000000,0x800000,0x800000,0x800000,0x1000000,0x1000000,0x2A000000,0x1000000,0x1000000,0x2A000000,0x2A000000,0x1000000, -0x1000000,0x2A000000,0x2A000000,0x2A000000,0x640000,0x5C0000,0x540001,0x740000,0x900000,0xB40000,0xD00000,0x13C0000,0x6C0000,0x800000,0xB40000,0x2A000000,0xB40000,0x600001,0x900000,0x1240000,0x30000000,0x900000,0x1240000,0x30000000,0x1240000,0x30000000,0x30000000,0x900000,0x1240000,0x30000000,0x1240000,0x30000000, -0x30000000,0x1240000,0x30000000,0x30000000,0x30000000,0x900000,0x1240000,0x30000000,0x1240000,0x30000000,0x30000000,0x1240000,0x30000000,0x30000000,0x30000000,0x1240000,0x30000000,0x30000000,0x30000000,0x30000000,0x4780000,0x680000,0x680000,0xA40000,0xEC0000,0x1D80000,0x30000000,0x30000000,0x840000,0xB80000,0x23F80000,0x30000000, -0xD00000,0x50008C,0x464C0033,0x364C0033,0x304C0033,0x40480024,0x36480013,0x3248001A,0x32480024,0x2E480016,0x2C480026,0x3E440034,0x3644000A,0x3244000F,0x34400013,0x30400002,0x2C440016,0x30440034,0x2E40000F,0x2C400019,0x2A400035,0x780088,0x3A380033,0x30400033,0x34380026,0x303C0012,0x2C400026,0x362C0033,0x30300009,0x2C340015,0x2A380034,0xF40088, -0x301C0033,0x2C240026,0x2A180034,0x28000088,0x6A440002,0xBC480024,0xCE4C0033,0x44440002,0x38440002,0x32440002,0x30440002,0x30400002,0x66380001,0x463C0001,0x30400009,0x2C340015,0xAC0088,0x580033,0x3E54000A,0x3450000A,0x3050000A,0x38500013,0x344C0002,0x30500001,0x30500013,0x304C0005,0x2C4C0015,0x2800033,0x36440009,0x3048000A,0x34400012,0x30400001, -0x2C480014,0x1080033,0x30300008,0x2C340014,0x2A000034,0x2800033,0x36440009,0x3048000A,0x34400012,0x30400001,0x2C480014,0x1080033,0x30300008,0x2C340014,0x2A000034,0x1080033,0x30300008,0x2C340014,0x2A000034,0x2A000034,0x6A440001,0x80500013,0x9254000A,0x44440001,0x38440001,0x32440001,0x30440001,0x30400001,0x66380001,0x42400001,0x30400009,0x2C340014, -0xB80033,0x4C0033,0x4C0033,0x4C0033,0x4C0033,0x38480012,0x38480012,0x38480012,0x2E480012,0x2E480012,0x2A480012,0x36440009,0x36440009,0x36440009,0x2E440002,0x2E440002,0x2A440005,0x2C400009,0x2C400009,0x2A400001,0x28400009,0x700033,0x700033,0x700033,0x303C0012,0x303C0012,0x2A400012,0x2E380009,0x2E380009,0x2A380001,0x28380009,0xE40033, -0xE40033,0x2A2C0012,0x28200008,0x26000034,0x5E440002,0x8C480012,0x4C0033,0x42440001,0x38440001,0x32440001,0x30440002,0x2E400002,0x66380000,0x40400001,0x30400008,0x2A380001,0xA00033,0x50000A,0x50000A,0x50000A,0x50000A,0x30500001,0x30500001,0x30500001,0x2C4C0001,0x2C4C0001,0x2A4C0001,0x780008,0x780008,0x780008,0x2C480001,0x2C480001, -0x2A480001,0xF40008,0xF40008,0x2A3C0000,0x28000008,0x780008,0x780008,0x780008,0x2C480001,0x2C480001,0x2A480001,0xF40008,0xF40008,0x2A3C0000,0x28000008,0xF40008,0xF40008,0x2A3C0000,0x28000008,0x28000008,0x52480000,0x50500001,0x50000A,0x42440000,0x34480000,0x30480000,0x2E480000,0x2C480001,0x54400000,0x3C440000,0xAC0008,0x2A3C0000, -0xAC0008,0x5C0012,0x3A580001,0x32580001,0x30540001,0x880012,0x344C0001,0x30500000,0x1140012,0x303C0000,0x2C000014,0x880012,0x344C0001,0x30500000,0x1140012,0x303C0000,0x2C000014,0x1140012,0x303C0000,0x2C000014,0x2C000014,0x880012,0x344C0001,0x30500000,0x1140012,0x303C0000,0x2C000014,0x1140012,0x303C0000,0x2C000014,0x2C000014,0x1140012, -0x303C0000,0x2C000014,0x2C000014,0x2C000014,0x7A3C0000,0x8600012,0x74580001,0x48400000,0x38440001,0x34400000,0x30440000,0x30340000,0x6E340000,0x463C0000,0x30480001,0x2C000014,0xC00012,0x480012,0x480012,0x480012,0x480012,0x480012,0x480012,0x480012,0x480012,0x480012,0x480012,0x30440001,0x30440001,0x30440001,0x30440001,0x30440001, -0x30440001,0x28440001,0x28440001,0x28440001,0x26400001,0x2680012,0x2680012,0x2680012,0x2680012,0x2680012,0x2680012,0x2A380001,0x2A380001,0x2A380001,0x263C0000,0xD80012,0xD80012,0xD80012,0x26280000,0x22000014,0x6A440001,0x480012,0x480012,0x42440001,0x38440001,0x32440001,0x32440001,0x2E440001,0x66380000,0x483C0000,0x2C400000,0x2A380001, -0x980012,}; -static const uint32_t g_etc1_to_bc7_m6_table6[] = { -0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0xF40000, -0xF40000,0xF40000,0xF40000,0x28000000,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x580000,0x580000,0x580000,0x780000,0xAC0000,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000, -0x8C0000,0x1180000,0x1180000,0x1180000,0x2E000000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x1180000,0x1180000,0x1180000,0x2E000000,0x1180000,0x1180000,0x1180000,0x2E000000,0x2E000000,0x640000,0x5C0001,0x5C0001,0x6C0000,0x2740000,0x800000,0x800000,0x9C0000,0x6C0000,0x2740000,0xC80000,0x1180000, -0xC80000,0x640001,0x640001,0x640001,0x640001,0x980000,0x980000,0x980000,0x1300000,0x1300000,0x32000000,0x980000,0x980000,0x980000,0x1300000,0x1300000,0x32000000,0x1300000,0x1300000,0x32000000,0x32000000,0x980000,0x980000,0x980000,0x1300000,0x1300000,0x32000000,0x1300000,0x1300000,0x32000000,0x32000000,0x1300000, -0x1300000,0x32000000,0x32000000,0x32000000,0x4740000,0x6C0000,0x640001,0x2880000,0xAC0000,0xD80000,0xF80000,0x1780000,0x800000,0x980000,0xD80000,0x32000000,0xD80000,0x700001,0xA80000,0x1580000,0x38000000,0xA80000,0x1580000,0x38000000,0x1580000,0x38000000,0x38000000,0xA80000,0x1580000,0x38000000,0x1580000,0x38000000, -0x38000000,0x1580000,0x38000000,0x38000000,0x38000000,0xA80000,0x1580000,0x38000000,0x1580000,0x38000000,0x38000000,0x1580000,0x38000000,0x38000000,0x38000000,0x1580000,0x38000000,0x38000000,0x38000000,0x38000000,0x48C0000,0x4780000,0x4780000,0xC00000,0x1140000,0x7F80000,0x38000000,0x38000000,0x2980000,0xD40000,0x2DCC0000,0x38000000, -0xF00000,0x60008C,0x4E5C0033,0x3E5C0033,0x385C0033,0x48580024,0x3E580013,0x3A58001A,0x3A580024,0x36580016,0x34580026,0x46540034,0x3E54000A,0x3A54000F,0x3C500013,0x38500002,0x34540016,0x38540034,0x3650000F,0x34500019,0x32500035,0x900088,0x42480033,0x38500033,0x3C480026,0x384C0012,0x34500026,0x3E3C0033,0x38400009,0x34440015,0x32480034,0x1240088, -0x382C0033,0x34340026,0x32280034,0x30000088,0x72540002,0xC4580024,0xD65C0033,0x4C540002,0x40540002,0x3A540002,0x38540002,0x38500002,0x6E480001,0x4E4C0001,0x38500009,0x34440015,0xD00088,0x680033,0x4664000A,0x3C60000A,0x3860000A,0x40600013,0x3C5C0002,0x38600001,0x38600013,0x385C0005,0x345C0015,0x2980033,0x3E540009,0x3858000A,0x3C500012,0x38500001, -0x34580014,0x1380033,0x38400008,0x34440014,0x32000034,0x2980033,0x3E540009,0x3858000A,0x3C500012,0x38500001,0x34580014,0x1380033,0x38400008,0x34440014,0x32000034,0x1380033,0x38400008,0x34440014,0x32000034,0x32000034,0x72540001,0x88600013,0x9A64000A,0x4C540001,0x40540001,0x3A540001,0x38540001,0x38500001,0x6E480001,0x4A500001,0x38500009,0x34440014, -0xDC0033,0x5C0033,0x5C0033,0x5C0033,0x5C0033,0x40580012,0x40580012,0x40580012,0x36580012,0x36580012,0x32580012,0x3E540009,0x3E540009,0x3E540009,0x36540002,0x36540002,0x32540005,0x34500009,0x34500009,0x32500001,0x30500009,0x880033,0x880033,0x880033,0x384C0012,0x384C0012,0x32500012,0x36480009,0x36480009,0x32480001,0x30480009,0x1140033, -0x1140033,0x323C0012,0x30300008,0x2E000034,0x66540002,0x94580012,0x5C0033,0x4A540001,0x40540001,0x3A540001,0x38540002,0x36500002,0x6E480000,0x48500001,0x38500008,0x32480001,0xC00033,0x60000A,0x60000A,0x60000A,0x60000A,0x38600001,0x38600001,0x38600001,0x345C0001,0x345C0001,0x325C0001,0x900008,0x900008,0x900008,0x34580001,0x34580001, -0x32580001,0x1240008,0x1240008,0x324C0000,0x30000008,0x900008,0x900008,0x900008,0x34580001,0x34580001,0x32580001,0x1240008,0x1240008,0x324C0000,0x30000008,0x1240008,0x1240008,0x324C0000,0x30000008,0x30000008,0x5A580000,0x58600001,0x60000A,0x4A540000,0x3C580000,0x38580000,0x36580000,0x34580001,0x5C500000,0x44540000,0xD00008,0x324C0000, -0xD00008,0x6C0012,0x42680001,0x3A680001,0x38640001,0xA00012,0x3C5C0001,0x38600000,0x1440012,0x384C0000,0x34000014,0xA00012,0x3C5C0001,0x38600000,0x1440012,0x384C0000,0x34000014,0x1440012,0x384C0000,0x34000014,0x34000014,0xA00012,0x3C5C0001,0x38600000,0x1440012,0x384C0000,0x34000014,0x1440012,0x384C0000,0x34000014,0x34000014,0x1440012, -0x384C0000,0x34000014,0x34000014,0x34000014,0x824C0000,0x740012,0x7C680001,0x50500000,0x40540001,0x3C500000,0x38540000,0x38440000,0x76440000,0x4E4C0000,0x38580001,0x34000014,0xE40012,0x580012,0x580012,0x580012,0x580012,0x580012,0x580012,0x580012,0x580012,0x580012,0x580012,0x38540001,0x38540001,0x38540001,0x38540001,0x38540001, -0x38540001,0x30540001,0x30540001,0x30540001,0x2E500001,0x2800012,0x2800012,0x2800012,0x2800012,0x2800012,0x2800012,0x32480001,0x32480001,0x32480001,0x2E4C0000,0x1080012,0x1080012,0x1080012,0x2E380000,0x2A000014,0x72540001,0x580012,0x580012,0x4A540001,0x40540001,0x3A540001,0x3A540001,0x36540001,0x6E480000,0x504C0000,0x34500000,0x32480001, -0xB80012,}; -static const uint32_t g_etc1_to_bc7_m6_table7[] = { -0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x1240000, -0x1240000,0x1240000,0x1240000,0x30000000,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x680000,0x680000,0x680000,0x900000,0xD00000,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000, -0xA40000,0x14C0000,0x14C0000,0x14C0000,0x36000000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0x14C0000,0x14C0000,0x14C0000,0x36000000,0x14C0000,0x14C0000,0x14C0000,0x36000000,0x36000000,0x2740000,0x6C0001,0x6C0001,0x800000,0x2880000,0x940000,0x940000,0xB80000,0x800000,0x2880000,0xE80000,0x14C0000, -0xE80000,0x740001,0x740001,0x740001,0x740001,0xB00000,0xB00000,0xB00000,0x1640000,0x1640000,0x3A000000,0xB00000,0xB00000,0xB00000,0x1640000,0x1640000,0x3A000000,0x1640000,0x1640000,0x3A000000,0x3A000000,0xB00000,0xB00000,0xB00000,0x1640000,0x1640000,0x3A000000,0x1640000,0x1640000,0x3A000000,0x3A000000,0x1640000, -0x1640000,0x3A000000,0x3A000000,0x3A000000,0x880000,0x67C0000,0x740001,0xA00000,0xC40000,0xF80000,0x1200000,0x1B40000,0x940000,0xB00000,0xF80000,0x3A000000,0xF80000,0x800001,0xC00000,0x1880000,0x40000000,0xC00000,0x1880000,0x40000000,0x1880000,0x40000000,0x40000000,0xC00000,0x1880000,0x40000000,0x1880000,0x40000000, -0x40000000,0x1880000,0x40000000,0x40000000,0x40000000,0xC00000,0x1880000,0x40000000,0x1880000,0x40000000,0x40000000,0x1880000,0x40000000,0x40000000,0x40000000,0x1880000,0x40000000,0x40000000,0x40000000,0x40000000,0x4A00000,0xC880000,0xC880000,0xD80000,0x13C0000,0x11F80000,0x40000000,0x40000000,0xB00000,0xF40000,0x35DC0000,0x40000000, -0x1140000,0x70008C,0x566C0033,0x466C0033,0x406C0033,0x50680024,0x46680013,0x4268001A,0x42680024,0x3E680016,0x3C680026,0x4E640034,0x4664000A,0x4264000F,0x44600013,0x40600002,0x3C640016,0x40640034,0x3E60000F,0x3C600019,0x3A600035,0xA80088,0x4A580033,0x40600033,0x44580026,0x405C0012,0x3C600026,0x464C0033,0x40500009,0x3C540015,0x3A580034,0x1580088, -0x403C0033,0x3C440026,0x3A380034,0x38000088,0x7A640002,0xCC680024,0xDE6C0033,0x54640002,0x48640002,0x42640002,0x40640002,0x40600002,0x76580001,0x565C0001,0x40600009,0x3C540015,0xF00088,0x780033,0x4E74000A,0x4470000A,0x4070000A,0x48700013,0x446C0002,0x40700001,0x40700013,0x406C0005,0x3C6C0015,0x2B00033,0x46640009,0x4068000A,0x44600012,0x40600001, -0x3C680014,0x1680033,0x40500008,0x3C540014,0x3A000034,0x2B00033,0x46640009,0x4068000A,0x44600012,0x40600001,0x3C680014,0x1680033,0x40500008,0x3C540014,0x3A000034,0x1680033,0x40500008,0x3C540014,0x3A000034,0x3A000034,0x7A640001,0x90700013,0xA274000A,0x54640001,0x48640001,0x42640001,0x40640001,0x40600001,0x76580001,0x52600001,0x40600009,0x3C540014, -0xFC0033,0x6C0033,0x6C0033,0x6C0033,0x6C0033,0x48680012,0x48680012,0x48680012,0x3E680012,0x3E680012,0x3A680012,0x46640009,0x46640009,0x46640009,0x3E640002,0x3E640002,0x3A640005,0x3C600009,0x3C600009,0x3A600001,0x38600009,0xA00033,0xA00033,0xA00033,0x405C0012,0x405C0012,0x3A600012,0x3E580009,0x3E580009,0x3A580001,0x38580009,0x1440033, -0x1440033,0x3A4C0012,0x38400008,0x36000034,0x6E640002,0x9C680012,0x6C0033,0x52640001,0x48640001,0x42640001,0x40640002,0x3E600002,0x76580000,0x50600001,0x40600008,0x3A580001,0xE40033,0x70000A,0x70000A,0x70000A,0x70000A,0x40700001,0x40700001,0x40700001,0x3C6C0001,0x3C6C0001,0x3A6C0001,0xA80008,0xA80008,0xA80008,0x3C680001,0x3C680001, -0x3A680001,0x1580008,0x1580008,0x3A5C0000,0x38000008,0xA80008,0xA80008,0xA80008,0x3C680001,0x3C680001,0x3A680001,0x1580008,0x1580008,0x3A5C0000,0x38000008,0x1580008,0x1580008,0x3A5C0000,0x38000008,0x38000008,0x62680000,0x60700001,0x70000A,0x52640000,0x44680000,0x40680000,0x3E680000,0x3C680001,0x64600000,0x4C640000,0xF00008,0x3A5C0000, -0xF00008,0x7C0012,0x4A780001,0x42780001,0x40740001,0xB80012,0x446C0001,0x40700000,0x1740012,0x405C0000,0x3C000014,0xB80012,0x446C0001,0x40700000,0x1740012,0x405C0000,0x3C000014,0x1740012,0x405C0000,0x3C000014,0x3C000014,0xB80012,0x446C0001,0x40700000,0x1740012,0x405C0000,0x3C000014,0x1740012,0x405C0000,0x3C000014,0x3C000014,0x1740012, -0x405C0000,0x3C000014,0x3C000014,0x3C000014,0x8A5C0000,0x840012,0x84780001,0x58600000,0x48640001,0x44600000,0x40640000,0x40540000,0x7E540000,0x565C0000,0x40680001,0x3C000014,0x1080012,0x680012,0x680012,0x680012,0x680012,0x680012,0x680012,0x680012,0x680012,0x680012,0x680012,0x40640001,0x40640001,0x40640001,0x40640001,0x40640001, -0x40640001,0x38640001,0x38640001,0x38640001,0x36600001,0x2980012,0x2980012,0x2980012,0x2980012,0x2980012,0x2980012,0x3A580001,0x3A580001,0x3A580001,0x365C0000,0x1380012,0x1380012,0x1380012,0x36480000,0x32000014,0x7A640001,0x680012,0x680012,0x52640001,0x48640001,0x42640001,0x42640001,0x3E640001,0x76580000,0x585C0000,0x3C600000,0x3A580001, -0xDC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table8[] = { -0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0x15C0000, -0x15C0000,0x15C0000,0x15C0000,0x38000001,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x7C0000,0x7C0000,0x7C0000,0xAC0000,0xF40000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000, -0x2BC0000,0x1800000,0x1800000,0x1800000,0x3E000001,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x1800000,0x1800000,0x1800000,0x3E000001,0x1800000,0x1800000,0x1800000,0x3E000001,0x3E000001,0x880000,0x800000,0x800000,0x940000,0xA00000,0x2AC0000,0x2AC0000,0x2D40000,0x940000,0xA00000,0x1100000,0x1800000, -0x1100000,0x880000,0x880000,0x880000,0x880000,0xC80000,0xC80000,0xC80000,0x1980000,0x1980000,0x42000001,0xC80000,0xC80000,0xC80000,0x1980000,0x1980000,0x42000001,0x1980000,0x1980000,0x42000001,0x42000001,0xC80000,0xC80000,0xC80000,0x1980000,0x1980000,0x42000001,0x1980000,0x1980000,0x42000001,0x42000001,0x1980000, -0x1980000,0x42000001,0x42000001,0x42000001,0x49C0000,0x900000,0x880000,0xB80000,0xE40000,0x1200000,0x14C0000,0x1F80000,0x2A80000,0xC80000,0x1200000,0x42000001,0x1200000,0x940000,0xDC0000,0x1BC0000,0x48000001,0xDC0000,0x1BC0000,0x48000001,0x1BC0000,0x48000001,0x48000001,0xDC0000,0x1BC0000,0x48000001,0x1BC0000,0x48000001, -0x48000001,0x1BC0000,0x48000001,0x48000001,0x48000001,0xDC0000,0x1BC0000,0x48000001,0x1BC0000,0x48000001,0x48000001,0x1BC0000,0x48000001,0x48000001,0x48000001,0x1BC0000,0x48000001,0x48000001,0x48000001,0x48000001,0xB80000,0x69C0000,0x69C0000,0xF80000,0x1680000,0x1DF40000,0x48000001,0x48000001,0xC80000,0x1140000,0x3FD00000,0x48000001, -0x1380000,0x840088,0x5C800034,0x4E800034,0x48800035,0x587C0026,0x4E7C0015,0x487C0019,0x4A7C0026,0x48780016,0x46780026,0x5A740033,0x50740009,0x4A78000F,0x4C740012,0x48740002,0x46740016,0x4A740033,0x4870000F,0x4670001A,0x44740033,0xC40088,0x54680033,0x48740034,0x4E680026,0x4A6C0013,0x46700024,0x4E600033,0x4868000A,0x46640013,0x44680033,0x18C0088, -0x48540034,0x46500024,0x44440033,0x4000008C,0x82740002,0xC67C0026,0xD8800034,0x60740001,0x52740002,0x4C740002,0x4A740002,0x48740002,0x7C6C0001,0x5C700001,0x4A700009,0x46640013,0x1180088,0x880034,0x58840008,0x4C840009,0x48840009,0x54800012,0x4C800001,0x4A800001,0x4A800012,0x48800005,0x46800012,0xCC0033,0x4E780009,0x487C0009,0x4C740012,0x48740002, -0x46780012,0x1A00033,0x48680009,0x46640012,0x44000033,0xCC0033,0x4E780009,0x487C0009,0x4C740012,0x48740002,0x46780012,0x1A00033,0x48680009,0x46640012,0x44000033,0x1A00033,0x48680009,0x46640012,0x44000033,0x44000033,0x7E780001,0xA8800012,0xBA840008,0x5A780001,0x52740002,0x4C740002,0x48780002,0x48700002,0x7C6C0001,0x58740001,0x4A700008,0x46640012, -0x1240033,0x800034,0x800034,0x800034,0x800034,0x4E7C0014,0x4E7C0014,0x4E7C0014,0x467C0014,0x467C0014,0x42780015,0x50740008,0x50740008,0x50740008,0x48740001,0x48740001,0x44740005,0x4474000A,0x4474000A,0x42740001,0x4074000A,0xBC0033,0xBC0033,0xBC0033,0x4A6C0012,0x4A6C0012,0x42740013,0x48680009,0x48680009,0x426C0002,0x406C000A,0x17C0033, -0x17C0033,0x42600013,0x4058000A,0x3E000033,0x82740001,0x967C0014,0x800034,0x5E740001,0x52740001,0x4C740001,0x4A740001,0x46740001,0x7A6C0000,0x5C700000,0x48740009,0x426C0002,0x10C0033,0x840008,0x840008,0x840008,0x840008,0x4A800000,0x4A800000,0x4A800000,0x44800001,0x44800001,0x42800001,0xC40008,0xC40008,0xC40008,0x447C0001,0x447C0001, -0x427C0001,0x18C0008,0x18C0008,0x42740001,0x4000000A,0xC40008,0xC40008,0xC40008,0x447C0001,0x447C0001,0x427C0001,0x18C0008,0x18C0008,0x42740001,0x4000000A,0x18C0008,0x18C0008,0x42740001,0x4000000A,0x4000000A,0x74780000,0x78800000,0x840008,0x58780000,0x4E780000,0x4A780000,0x467C0001,0x46780000,0x70700000,0x56740000,0x1180008,0x42740001, -0x1180008,0x8C0014,0x54880000,0x4C880000,0x48880001,0x2D00012,0x4C800001,0x48840001,0x1AC0012,0x48740001,0x46000012,0x2D00012,0x4C800001,0x48840001,0x1AC0012,0x48740001,0x46000012,0x1AC0012,0x48740001,0x46000012,0x46000012,0x2D00012,0x4C800001,0x48840001,0x1AC0012,0x48740001,0x46000012,0x1AC0012,0x48740001,0x46000012,0x46000012,0x1AC0012, -0x48740001,0x46000012,0x46000012,0x46000012,0x8E700000,0x980012,0x9C880000,0x60740000,0x52740001,0x4E700000,0x48780001,0x486C0001,0x84680000,0x606C0000,0x4A780000,0x46000012,0x12C0012,0x780014,0x780014,0x780014,0x780014,0x780014,0x780014,0x780014,0x780014,0x780014,0x780014,0x4A740000,0x4A740000,0x4A740000,0x4A740000,0x4A740000, -0x4A740000,0x42740000,0x42740000,0x42740000,0x3E740001,0xB40012,0xB40012,0xB40012,0xB40012,0xB40012,0xB40012,0x426C0001,0x426C0001,0x426C0001,0x3E700001,0x1700012,0x1700012,0x1700012,0x3E600001,0x3C000012,0x92740000,0x780014,0x780014,0x62740000,0x56740000,0x4E740000,0x4E740000,0x48740000,0x7A6C0000,0x5C700000,0x44740001,0x426C0001, -0x1000012,}; -static const uint32_t g_etc1_to_bc7_m6_table9[] = { -0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0x18C0000, -0x18C0000,0x18C0000,0x18C0000,0x40000001,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x8C0000,0x8C0000,0x8C0000,0xC40000,0x1180000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000, -0xD40000,0x1B00000,0x1B00000,0x1B00000,0x46000001,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0x1B00000,0x1B00000,0x1B00000,0x46000001,0x1B00000,0x1B00000,0x1B00000,0x46000001,0x46000001,0x4980000,0x900000,0x900000,0xA80000,0xB40000,0xC40000,0xC40000,0xF00000,0xA80000,0xB40000,0x1300000,0x1B00000, -0x1300000,0x980000,0x980000,0x980000,0x980000,0xE00000,0xE00000,0xE00000,0x1CC0000,0x1CC0000,0x4A000001,0xE00000,0xE00000,0xE00000,0x1CC0000,0x1CC0000,0x4A000001,0x1CC0000,0x1CC0000,0x4A000001,0x4A000001,0xE00000,0xE00000,0xE00000,0x1CC0000,0x1CC0000,0x4A000001,0x1CC0000,0x1CC0000,0x4A000001,0x4A000001,0x1CC0000, -0x1CC0000,0x4A000001,0x4A000001,0x4A000001,0xB00000,0x8A00000,0x980000,0x2CC0000,0x1000000,0x1400000,0x1740000,0xBFC0000,0x2BC0000,0xE00000,0x1400000,0x4A000001,0x1400000,0xA40000,0xF40000,0x1F00000,0x50000001,0xF40000,0x1F00000,0x50000001,0x1F00000,0x50000001,0x50000001,0xF40000,0x1F00000,0x50000001,0x1F00000,0x50000001, -0x50000001,0x1F00000,0x50000001,0x50000001,0x50000001,0xF40000,0x1F00000,0x50000001,0x1F00000,0x50000001,0x50000001,0x1F00000,0x50000001,0x50000001,0x50000001,0x1F00000,0x50000001,0x50000001,0x50000001,0x50000001,0x2CC0000,0xEAC0000,0xEAC0000,0x1140000,0x1900000,0x27F40000,0x50000001,0x50000001,0x2DC0000,0x1340000,0x47E00000,0x50000001, -0x15C0000,0x940088,0x64900034,0x56900034,0x50900035,0x608C0026,0x568C0015,0x508C0019,0x528C0026,0x50880016,0x4E880026,0x62840033,0x58840009,0x5288000F,0x54840012,0x50840002,0x4E840016,0x52840033,0x5080000F,0x4E80001A,0x4C840033,0xDC0088,0x5C780033,0x50840034,0x56780026,0x527C0013,0x4E800024,0x56700033,0x5078000A,0x4E740013,0x4C780033,0x1BC0088, -0x50640034,0x4E600024,0x4C540033,0x4800008C,0x8A840002,0xCE8C0026,0xE0900034,0x68840001,0x5A840002,0x54840002,0x52840002,0x50840002,0x847C0001,0x64800001,0x52800009,0x4E740013,0x1380088,0x980034,0x60940008,0x54940009,0x50940009,0x5C900012,0x54900001,0x52900001,0x52900012,0x50900005,0x4E900012,0xE40033,0x56880009,0x508C0009,0x54840012,0x50840002, -0x4E880012,0x1D00033,0x50780009,0x4E740012,0x4C000033,0xE40033,0x56880009,0x508C0009,0x54840012,0x50840002,0x4E880012,0x1D00033,0x50780009,0x4E740012,0x4C000033,0x1D00033,0x50780009,0x4E740012,0x4C000033,0x4C000033,0x86880001,0xB0900012,0xC2940008,0x62880001,0x5A840002,0x54840002,0x50880002,0x50800002,0x847C0001,0x60840001,0x52800008,0x4E740012, -0x1480033,0x900034,0x900034,0x900034,0x900034,0x568C0014,0x568C0014,0x568C0014,0x4E8C0014,0x4E8C0014,0x4A880015,0x58840008,0x58840008,0x58840008,0x50840001,0x50840001,0x4C840005,0x4C84000A,0x4C84000A,0x4A840001,0x4884000A,0x2D00033,0x2D00033,0x2D00033,0x527C0012,0x527C0012,0x4A840013,0x50780009,0x50780009,0x4A7C0002,0x487C000A,0x1AC0033, -0x1AC0033,0x4A700013,0x4868000A,0x46000033,0x8A840001,0x9E8C0014,0x900034,0x66840001,0x5A840001,0x54840001,0x52840001,0x4E840001,0x827C0000,0x64800000,0x50840009,0x4A7C0002,0x12C0033,0x940008,0x940008,0x940008,0x940008,0x52900000,0x52900000,0x52900000,0x4C900001,0x4C900001,0x4A900001,0xDC0008,0xDC0008,0xDC0008,0x4C8C0001,0x4C8C0001, -0x4A8C0001,0x1BC0008,0x1BC0008,0x4A840001,0x4800000A,0xDC0008,0xDC0008,0xDC0008,0x4C8C0001,0x4C8C0001,0x4A8C0001,0x1BC0008,0x1BC0008,0x4A840001,0x4800000A,0x1BC0008,0x1BC0008,0x4A840001,0x4800000A,0x4800000A,0x7C880000,0x80900000,0x940008,0x60880000,0x56880000,0x52880000,0x4E8C0001,0x4E880000,0x78800000,0x5E840000,0x1380008,0x4A840001, -0x1380008,0x9C0014,0x5C980000,0x54980000,0x50980001,0x2E80012,0x54900001,0x50940001,0x1DC0012,0x50840001,0x4E000012,0x2E80012,0x54900001,0x50940001,0x1DC0012,0x50840001,0x4E000012,0x1DC0012,0x50840001,0x4E000012,0x4E000012,0x2E80012,0x54900001,0x50940001,0x1DC0012,0x50840001,0x4E000012,0x1DC0012,0x50840001,0x4E000012,0x4E000012,0x1DC0012, -0x50840001,0x4E000012,0x4E000012,0x4E000012,0x96800000,0xA80012,0xA4980000,0x68840000,0x5A840001,0x56800000,0x50880001,0x507C0001,0x8C780000,0x687C0000,0x52880000,0x4E000012,0x1500012,0x880014,0x880014,0x880014,0x880014,0x880014,0x880014,0x880014,0x880014,0x880014,0x880014,0x52840000,0x52840000,0x52840000,0x52840000,0x52840000, -0x52840000,0x4A840000,0x4A840000,0x4A840000,0x46840001,0xCC0012,0xCC0012,0xCC0012,0xCC0012,0xCC0012,0xCC0012,0x4A7C0001,0x4A7C0001,0x4A7C0001,0x46800001,0x1A00012,0x1A00012,0x1A00012,0x46700001,0x44000012,0x9A840000,0x880014,0x880014,0x6A840000,0x5E840000,0x56840000,0x56840000,0x50840000,0x827C0000,0x64800000,0x4C840001,0x4A7C0001, -0x1240012,}; -static const uint32_t g_etc1_to_bc7_m6_table10[] = { -0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0x1BC0000, -0x1BC0000,0x1BC0000,0x1BC0000,0x48000001,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x69C0000,0x69C0000,0x69C0000,0xDC0000,0x1380000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000, -0xEC0000,0x1E40000,0x1E40000,0x1E40000,0x4E000001,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0x1E40000,0x1E40000,0x1E40000,0x4E000001,0x1E40000,0x1E40000,0x1E40000,0x4E000001,0x4E000001,0xCA80000,0xA00000,0xA00000,0x4B80000,0xC80000,0xD80000,0xD80000,0x10C0000,0x4B80000,0xC80000,0x1540000,0x1E40000, -0x1540000,0xA80000,0xA80000,0xA80000,0xA80000,0xF80000,0xF80000,0xF80000,0x1FC0000,0x1FC0000,0x52000001,0xF80000,0xF80000,0xF80000,0x1FC0000,0x1FC0000,0x52000001,0x1FC0000,0x1FC0000,0x52000001,0x52000001,0xF80000,0xF80000,0xF80000,0x1FC0000,0x1FC0000,0x52000001,0x1FC0000,0x1FC0000,0x52000001,0x52000001,0x1FC0000, -0x1FC0000,0x52000001,0x52000001,0x52000001,0xC40000,0xB40000,0xA80000,0xE40000,0x1180000,0x1640000,0x19C0000,0x17F80000,0x4D00000,0xF80000,0x1640000,0x52000001,0x1640000,0xB40000,0x10C0000,0xBF80000,0x58000001,0x10C0000,0xBF80000,0x58000001,0xBF80000,0x58000001,0x58000001,0x10C0000,0xBF80000,0x58000001,0xBF80000,0x58000001, -0x58000001,0xBF80000,0x58000001,0x58000001,0x58000001,0x10C0000,0xBF80000,0x58000001,0xBF80000,0x58000001,0x58000001,0xBF80000,0x58000001,0x58000001,0x58000001,0xBF80000,0x58000001,0x58000001,0x58000001,0x58000001,0x2E00000,0xC00000,0xC00000,0x32C0000,0x1B80000,0x31F40000,0x58000001,0x58000001,0xF40000,0x1500000,0x4FF00000,0x58000001, -0x17C0000,0xA40088,0x6CA00034,0x5EA00034,0x58A00035,0x689C0026,0x5E9C0015,0x589C0019,0x5A9C0026,0x58980016,0x56980026,0x6A940033,0x60940009,0x5A98000F,0x5C940012,0x58940002,0x56940016,0x5A940033,0x5890000F,0x5690001A,0x54940033,0xF40088,0x64880033,0x58940034,0x5E880026,0x5A8C0013,0x56900024,0x5E800033,0x5888000A,0x56840013,0x54880033,0x1F00088, -0x58740034,0x56700024,0x54640033,0x5000008C,0x92940002,0xD69C0026,0xE8A00034,0x70940001,0x62940002,0x5C940002,0x5A940002,0x58940002,0x8C8C0001,0x6C900001,0x5A900009,0x56840013,0x15C0088,0xA80034,0x68A40008,0x5CA40009,0x58A40009,0x64A00012,0x5CA00001,0x5AA00001,0x5AA00012,0x58A00005,0x56A00012,0xFC0033,0x5E980009,0x589C0009,0x5C940012,0x58940002, -0x56980012,0x3F80033,0x58880009,0x56840012,0x54000033,0xFC0033,0x5E980009,0x589C0009,0x5C940012,0x58940002,0x56980012,0x3F80033,0x58880009,0x56840012,0x54000033,0x3F80033,0x58880009,0x56840012,0x54000033,0x54000033,0x8E980001,0xB8A00012,0xCAA40008,0x6A980001,0x62940002,0x5C940002,0x58980002,0x58900002,0x8C8C0001,0x68940001,0x5A900008,0x56840012, -0x1680033,0xA00034,0xA00034,0xA00034,0xA00034,0x5E9C0014,0x5E9C0014,0x5E9C0014,0x569C0014,0x569C0014,0x52980015,0x60940008,0x60940008,0x60940008,0x58940001,0x58940001,0x54940005,0x5494000A,0x5494000A,0x52940001,0x5094000A,0x2E80033,0x2E80033,0x2E80033,0x5A8C0012,0x5A8C0012,0x52940013,0x58880009,0x58880009,0x528C0002,0x508C000A,0x1DC0033, -0x1DC0033,0x52800013,0x5078000A,0x4E000033,0x92940001,0xA69C0014,0xA00034,0x6E940001,0x62940001,0x5C940001,0x5A940001,0x56940001,0x8A8C0000,0x6C900000,0x58940009,0x528C0002,0x1500033,0xA40008,0xA40008,0xA40008,0xA40008,0x5AA00000,0x5AA00000,0x5AA00000,0x54A00001,0x54A00001,0x52A00001,0xF40008,0xF40008,0xF40008,0x549C0001,0x549C0001, -0x529C0001,0x1F00008,0x1F00008,0x52940001,0x5000000A,0xF40008,0xF40008,0xF40008,0x549C0001,0x549C0001,0x529C0001,0x1F00008,0x1F00008,0x52940001,0x5000000A,0x1F00008,0x1F00008,0x52940001,0x5000000A,0x5000000A,0x84980000,0x88A00000,0xA40008,0x68980000,0x5E980000,0x5A980000,0x569C0001,0x56980000,0x80900000,0x66940000,0x15C0008,0x52940001, -0x15C0008,0xAC0014,0x64A80000,0x5CA80000,0x58A80001,0x3000012,0x5CA00001,0x58A40001,0x5FC0012,0x58940001,0x56000012,0x3000012,0x5CA00001,0x58A40001,0x5FC0012,0x58940001,0x56000012,0x5FC0012,0x58940001,0x56000012,0x56000012,0x3000012,0x5CA00001,0x58A40001,0x5FC0012,0x58940001,0x56000012,0x5FC0012,0x58940001,0x56000012,0x56000012,0x5FC0012, -0x58940001,0x56000012,0x56000012,0x56000012,0x9E900000,0x4B80012,0xACA80000,0x70940000,0x62940001,0x5E900000,0x58980001,0x588C0001,0x94880000,0x708C0000,0x5A980000,0x56000012,0x1700012,0x980014,0x980014,0x980014,0x980014,0x980014,0x980014,0x980014,0x980014,0x980014,0x980014,0x5A940000,0x5A940000,0x5A940000,0x5A940000,0x5A940000, -0x5A940000,0x52940000,0x52940000,0x52940000,0x4E940001,0xE40012,0xE40012,0xE40012,0xE40012,0xE40012,0xE40012,0x528C0001,0x528C0001,0x528C0001,0x4E900001,0x1D00012,0x1D00012,0x1D00012,0x4E800001,0x4C000012,0xA2940000,0x980014,0x980014,0x72940000,0x66940000,0x5E940000,0x5E940000,0x58940000,0x8A8C0000,0x6C900000,0x54940001,0x528C0001, -0x1480012,}; -static const uint32_t g_etc1_to_bc7_m6_table11[] = { -0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0x1F00000, -0x1F00000,0x1F00000,0x1F00000,0x50000001,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xEAC0000,0xEAC0000,0xEAC0000,0xF40000,0x15C0000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000, -0x1040000,0x7FC0000,0x7FC0000,0x7FC0000,0x56000001,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x7FC0000,0x7FC0000,0x7FC0000,0x56000001,0x7FC0000,0x7FC0000,0x7FC0000,0x56000001,0x56000001,0xBC0000,0xB00000,0xB00000,0xCC0000,0xDC0000,0xF00000,0xF00000,0x1280000,0xCC0000,0xDC0000,0x1740000,0x7FC0000, -0x1740000,0xB80000,0xB80000,0xB80000,0xB80000,0x1100000,0x1100000,0x1100000,0xDFC0000,0xDFC0000,0x5A000001,0x1100000,0x1100000,0x1100000,0xDFC0000,0xDFC0000,0x5A000001,0xDFC0000,0xDFC0000,0x5A000001,0x5A000001,0x1100000,0x1100000,0x1100000,0xDFC0000,0xDFC0000,0x5A000001,0xDFC0000,0xDFC0000,0x5A000001,0x5A000001,0xDFC0000, -0xDFC0000,0x5A000001,0x5A000001,0x5A000001,0x2D40000,0xC40000,0xB80000,0x2F80000,0x1340000,0x1880000,0x1C00000,0x21FC0000,0x4E40000,0x1100000,0x1880000,0x5A000001,0x1880000,0xC40000,0x1240000,0x17F80000,0x60000001,0x1240000,0x17F80000,0x60000001,0x17F80000,0x60000001,0x60000001,0x1240000,0x17F80000,0x60000001,0x17F80000,0x60000001, -0x60000001,0x17F80000,0x60000001,0x60000001,0x60000001,0x1240000,0x17F80000,0x60000001,0x17F80000,0x60000001,0x60000001,0x17F80000,0x60000001,0x60000001,0x60000001,0x17F80000,0x60000001,0x60000001,0x60000001,0x60000001,0x2F40000,0xD00000,0xD00000,0x1480000,0x1E00000,0x3BF40000,0x60000001,0x60000001,0x3080000,0x1700000,0x59C40000,0x60000001, -0x1A00000,0xB40088,0x74B00034,0x66B00034,0x60B00035,0x70AC0026,0x66AC0015,0x60AC0019,0x62AC0026,0x60A80016,0x5EA80026,0x72A40033,0x68A40009,0x62A8000F,0x64A40012,0x60A40002,0x5EA40016,0x62A40033,0x60A0000F,0x5EA0001A,0x5CA40033,0x10C0088,0x6C980033,0x60A40034,0x66980026,0x629C0013,0x5EA00024,0x66900033,0x6098000A,0x5E940013,0x5C980033,0xBF80088, -0x60840034,0x5E800024,0x5C740033,0x5800008C,0x9AA40002,0xDEAC0026,0xF0B00034,0x78A40001,0x6AA40002,0x64A40002,0x62A40002,0x60A40002,0x949C0001,0x74A00001,0x62A00009,0x5E940013,0x17C0088,0xB80034,0x70B40008,0x64B40009,0x60B40009,0x6CB00012,0x64B00001,0x62B00001,0x62B00012,0x60B00005,0x5EB00012,0x1140033,0x66A80009,0x60AC0009,0x64A40012,0x60A40002, -0x5EA80012,0xFF80033,0x60980009,0x5E940012,0x5C000033,0x1140033,0x66A80009,0x60AC0009,0x64A40012,0x60A40002,0x5EA80012,0xFF80033,0x60980009,0x5E940012,0x5C000033,0xFF80033,0x60980009,0x5E940012,0x5C000033,0x5C000033,0x96A80001,0xC0B00012,0xD2B40008,0x72A80001,0x6AA40002,0x64A40002,0x60A80002,0x60A00002,0x949C0001,0x70A40001,0x62A00008,0x5E940012, -0x18C0033,0xB00034,0xB00034,0xB00034,0xB00034,0x66AC0014,0x66AC0014,0x66AC0014,0x5EAC0014,0x5EAC0014,0x5AA80015,0x68A40008,0x68A40008,0x68A40008,0x60A40001,0x60A40001,0x5CA40005,0x5CA4000A,0x5CA4000A,0x5AA40001,0x58A4000A,0x3000033,0x3000033,0x3000033,0x629C0012,0x629C0012,0x5AA40013,0x60980009,0x60980009,0x5A9C0002,0x589C000A,0x5FC0033, -0x5FC0033,0x5A900013,0x5888000A,0x56000033,0x9AA40001,0xAEAC0014,0xB00034,0x76A40001,0x6AA40001,0x64A40001,0x62A40001,0x5EA40001,0x929C0000,0x74A00000,0x60A40009,0x5A9C0002,0x1700033,0xB40008,0xB40008,0xB40008,0xB40008,0x62B00000,0x62B00000,0x62B00000,0x5CB00001,0x5CB00001,0x5AB00001,0x10C0008,0x10C0008,0x10C0008,0x5CAC0001,0x5CAC0001, -0x5AAC0001,0xBF80008,0xBF80008,0x5AA40001,0x5800000A,0x10C0008,0x10C0008,0x10C0008,0x5CAC0001,0x5CAC0001,0x5AAC0001,0xBF80008,0xBF80008,0x5AA40001,0x5800000A,0xBF80008,0xBF80008,0x5AA40001,0x5800000A,0x5800000A,0x8CA80000,0x90B00000,0xB40008,0x70A80000,0x66A80000,0x62A80000,0x5EAC0001,0x5EA80000,0x88A00000,0x6EA40000,0x17C0008,0x5AA40001, -0x17C0008,0xBC0014,0x6CB80000,0x64B80000,0x60B80001,0x3180012,0x64B00001,0x60B40001,0x11FC0012,0x60A40001,0x5E000012,0x3180012,0x64B00001,0x60B40001,0x11FC0012,0x60A40001,0x5E000012,0x11FC0012,0x60A40001,0x5E000012,0x5E000012,0x3180012,0x64B00001,0x60B40001,0x11FC0012,0x60A40001,0x5E000012,0x11FC0012,0x60A40001,0x5E000012,0x5E000012,0x11FC0012, -0x60A40001,0x5E000012,0x5E000012,0x5E000012,0xA6A00000,0xCC80012,0xB4B80000,0x78A40000,0x6AA40001,0x66A00000,0x60A80001,0x609C0001,0x9C980000,0x789C0000,0x62A80000,0x5E000012,0x1940012,0xA80014,0xA80014,0xA80014,0xA80014,0xA80014,0xA80014,0xA80014,0xA80014,0xA80014,0xA80014,0x62A40000,0x62A40000,0x62A40000,0x62A40000,0x62A40000, -0x62A40000,0x5AA40000,0x5AA40000,0x5AA40000,0x56A40001,0xFC0012,0xFC0012,0xFC0012,0xFC0012,0xFC0012,0xFC0012,0x5A9C0001,0x5A9C0001,0x5A9C0001,0x56A00001,0x3F80012,0x3F80012,0x3F80012,0x56900001,0x54000012,0xAAA40000,0xA80014,0xA80014,0x7AA40000,0x6EA40000,0x66A40000,0x66A40000,0x60A40000,0x929C0000,0x74A00000,0x5CA40001,0x5A9C0001, -0x1680012,}; -static const uint32_t g_etc1_to_bc7_m6_table12[] = { -0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0xBFC0000, -0xBFC0000,0xBFC0000,0xBFC0000,0x5A000000,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0x8C00000,0x8C00000,0x8C00000,0x30C0000,0x1800000,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000, -0x1200000,0x15F80000,0x15F80000,0x15F80000,0x60000000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x15F80000,0x15F80000,0x15F80000,0x60000000,0x15F80000,0x15F80000,0x15F80000,0x60000000,0x60000000,0xECC0000,0xC00001,0xC00001,0x2E00000,0xF40000,0x1080000,0x1080000,0x1440000,0x2E00000,0xF40000,0x19C0000,0x15F80000, -0x19C0000,0xC80001,0xC80001,0xC80001,0xC80001,0x12C0000,0x12C0000,0x12C0000,0x1BF80000,0x1BF80000,0x64000000,0x12C0000,0x12C0000,0x12C0000,0x1BF80000,0x1BF80000,0x64000000,0x1BF80000,0x1BF80000,0x64000000,0x64000000,0x12C0000,0x12C0000,0x12C0000,0x1BF80000,0x1BF80000,0x64000000,0x1BF80000,0x1BF80000,0x64000000,0x64000000,0x1BF80000, -0x1BF80000,0x64000000,0x64000000,0x64000000,0x6E80000,0xD80000,0xC80001,0x3100000,0x1540000,0x1AC0000,0x1EC0000,0x2DFC0000,0xFC0000,0x12C0000,0x1AC0000,0x64000000,0x1AC0000,0xD40001,0x33C0000,0x23FC0000,0x6A000000,0x33C0000,0x23FC0000,0x6A000000,0x23FC0000,0x6A000000,0x6A000000,0x33C0000,0x23FC0000,0x6A000000,0x23FC0000,0x6A000000, -0x6A000000,0x23FC0000,0x6A000000,0x6A000000,0x6A000000,0x33C0000,0x23FC0000,0x6A000000,0x23FC0000,0x6A000000,0x6A000000,0x23FC0000,0x6A000000,0x6A000000,0x6A000000,0x23FC0000,0x6A000000,0x6A000000,0x6A000000,0x6A000000,0x10C0000,0xE40000,0xE40000,0x1680000,0x7F80000,0x45FC0000,0x6A000000,0x6A000000,0x1240000,0x1900000,0x61F40000,0x6A000000, -0x1C80000,0xC4008C,0x80C00033,0x70C00033,0x6AC00033,0x7ABC0024,0x70BC0013,0x6CBC001A,0x6CBC0024,0x68BC0016,0x66BC0026,0x78B80034,0x70B8000A,0x6CB8000F,0x6EB40013,0x6AB40002,0x66B80016,0x6AB80034,0x68B4000F,0x66B40019,0x64B40035,0x3240088,0x74AC0033,0x6AB40033,0x6EAC0026,0x6AB00012,0x66B40026,0x70A00033,0x6AA40009,0x66A80015,0x64AC0034,0x17FC0088, -0x6A900033,0x66980026,0x648C0034,0x62000088,0xA4B80002,0xF6BC0024,0xF8C00034,0x7EB80002,0x72B80002,0x6CB80002,0x6AB80002,0x6AB40002,0xA0AC0001,0x80B00001,0x6AB40009,0x66A80015,0x1A40088,0xCC0033,0x78C8000A,0x6EC4000A,0x6AC4000A,0x72C40013,0x6EC00002,0x6AC40001,0x6AC40013,0x6AC00005,0x66C00015,0x1300033,0x70B80009,0x6ABC000A,0x6EB40012,0x6AB40001, -0x66BC0014,0x1DF40033,0x6AA40008,0x66A80014,0x64000034,0x1300033,0x70B80009,0x6ABC000A,0x6EB40012,0x6AB40001,0x66BC0014,0x1DF40033,0x6AA40008,0x66A80014,0x64000034,0x1DF40033,0x6AA40008,0x66A80014,0x64000034,0x64000034,0xA4B80001,0xBAC40013,0xCCC8000A,0x7EB80001,0x72B80001,0x6CB80001,0x6AB80001,0x6AB40001,0xA0AC0001,0x7CB40001,0x6AB40009,0x66A80014, -0x1B00033,0xC00033,0xC00033,0xC00033,0xC00033,0x72BC0012,0x72BC0012,0x72BC0012,0x68BC0012,0x68BC0012,0x64BC0012,0x70B80009,0x70B80009,0x70B80009,0x68B80002,0x68B80002,0x64B80005,0x66B40009,0x66B40009,0x64B40001,0x62B40009,0x11C0033,0x11C0033,0x11C0033,0x6AB00012,0x6AB00012,0x64B40012,0x68AC0009,0x68AC0009,0x64AC0001,0x62AC0009,0x13FC0033, -0x13FC0033,0x64A00012,0x62940008,0x60000034,0x98B80002,0xC6BC0012,0xC00033,0x7CB80001,0x72B80001,0x6CB80001,0x6AB80002,0x68B40002,0xA0AC0000,0x7AB40001,0x6AB40008,0x64AC0001,0x1980033,0xC4000A,0xC4000A,0xC4000A,0xC4000A,0x6AC40001,0x6AC40001,0x6AC40001,0x66C00001,0x66C00001,0x64C00001,0x3240008,0x3240008,0x3240008,0x66BC0001,0x66BC0001, -0x64BC0001,0x17FC0008,0x17FC0008,0x64B00000,0x62000008,0x3240008,0x3240008,0x3240008,0x66BC0001,0x66BC0001,0x64BC0001,0x17FC0008,0x17FC0008,0x64B00000,0x62000008,0x17FC0008,0x17FC0008,0x64B00000,0x62000008,0x62000008,0x8CBC0000,0x8AC40001,0xC4000A,0x7CB80000,0x6EBC0000,0x6ABC0000,0x68BC0000,0x66BC0001,0x8EB40000,0x76B80000,0x1A40008,0x64B00000, -0x1A40008,0xD00012,0x74CC0001,0x6CCC0001,0x6AC80001,0x1340012,0x6EC00001,0x6AC40000,0x1FF80012,0x6AB00000,0x66000014,0x1340012,0x6EC00001,0x6AC40000,0x1FF80012,0x6AB00000,0x66000014,0x1FF80012,0x6AB00000,0x66000014,0x66000014,0x1340012,0x6EC00001,0x6AC40000,0x1FF80012,0x6AB00000,0x66000014,0x1FF80012,0x6AB00000,0x66000014,0x66000014,0x1FF80012, -0x6AB00000,0x66000014,0x66000014,0x66000014,0xB4B00000,0x6DC0012,0xAECC0001,0x82B40000,0x72B80001,0x6EB40000,0x6AB80000,0x6AA80000,0xA8A80000,0x80B00000,0x6ABC0001,0x66000014,0x1B80012,0xBC0012,0xBC0012,0xBC0012,0xBC0012,0xBC0012,0xBC0012,0xBC0012,0xBC0012,0xBC0012,0xBC0012,0x6AB80001,0x6AB80001,0x6AB80001,0x6AB80001,0x6AB80001, -0x6AB80001,0x62B80001,0x62B80001,0x62B80001,0x60B40001,0x1180012,0x1180012,0x1180012,0x1180012,0x1180012,0x1180012,0x64AC0001,0x64AC0001,0x64AC0001,0x60B00000,0x11F80012,0x11F80012,0x11F80012,0x609C0000,0x5C000014,0xA4B80001,0xBC0012,0xBC0012,0x7CB80001,0x72B80001,0x6CB80001,0x6CB80001,0x68B80001,0xA0AC0000,0x82B00000,0x66B40000,0x64AC0001, -0x1900012,}; -static const uint32_t g_etc1_to_bc7_m6_table13[] = { -0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000,0x17FC0000, -0x17FC0000,0x17FC0000,0x17FC0000,0x62000000,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xD40000,0xD40000,0xD40000,0x3240000,0x1A40000,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000, -0x1380000,0x21F80000,0x21F80000,0x21F80000,0x68000000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x21F80000,0x21F80000,0x21F80000,0x68000000,0x21F80000,0x21F80000,0x21F80000,0x68000000,0x68000000,0xE00000,0xD00001,0xD00001,0xF40000,0x1080000,0x11C0000,0x11C0000,0x1600000,0xF40000,0x1080000,0x1BC0000,0x21F80000, -0x1BC0000,0xD80001,0xD80001,0xD80001,0xD80001,0x1440000,0x1440000,0x1440000,0x27F80000,0x27F80000,0x6C000000,0x1440000,0x1440000,0x1440000,0x27F80000,0x27F80000,0x6C000000,0x27F80000,0x27F80000,0x6C000000,0x6C000000,0x1440000,0x1440000,0x1440000,0x27F80000,0x27F80000,0x6C000000,0x27F80000,0x27F80000,0x6C000000,0x6C000000,0x27F80000, -0x27F80000,0x6C000000,0x6C000000,0x6C000000,0x2FC0000,0xE80000,0xD80001,0x1280000,0x36C0000,0x1D00000,0x9FC0000,0x39F80000,0x1100000,0x1440000,0x1D00000,0x6C000000,0x1D00000,0xE40001,0x1540000,0x2FFC0000,0x72000000,0x1540000,0x2FFC0000,0x72000000,0x2FFC0000,0x72000000,0x72000000,0x1540000,0x2FFC0000,0x72000000,0x2FFC0000,0x72000000, -0x72000000,0x2FFC0000,0x72000000,0x72000000,0x72000000,0x1540000,0x2FFC0000,0x72000000,0x2FFC0000,0x72000000,0x72000000,0x2FFC0000,0x72000000,0x72000000,0x72000000,0x2FFC0000,0x72000000,0x72000000,0x72000000,0x72000000,0x1200000,0x2F40000,0x2F40000,0x3800000,0x15F80000,0x4FFC0000,0x72000000,0x72000000,0x1380000,0x1B00000,0x6BC80000,0x72000000, -0x1E80000,0xD4008C,0x88D00033,0x78D00033,0x72D00033,0x82CC0024,0x78CC0013,0x74CC001A,0x74CC0024,0x70CC0016,0x6ECC0026,0x80C80034,0x78C8000A,0x74C8000F,0x76C40013,0x72C40002,0x6EC80016,0x72C80034,0x70C4000F,0x6EC40019,0x6CC40035,0x33C0088,0x7CBC0033,0x72C40033,0x76BC0026,0x72C00012,0x6EC40026,0x78B00033,0x72B40009,0x6EB80015,0x6CBC0034,0x23FC0088, -0x72A00033,0x6EA80026,0x6C9C0034,0x6A000088,0xACC80002,0xFECC0024,0xF0D00037,0x86C80002,0x7AC80002,0x74C80002,0x72C80002,0x72C40002,0xA8BC0001,0x88C00001,0x72C40009,0x6EB80015,0x1C80088,0xDC0033,0x80D8000A,0x76D4000A,0x72D4000A,0x7AD40013,0x76D00002,0x72D40001,0x72D40013,0x72D00005,0x6ED00015,0x3440033,0x78C80009,0x72CC000A,0x76C40012,0x72C40001, -0x6ECC0014,0x27FC0033,0x72B40008,0x6EB80014,0x6C000034,0x3440033,0x78C80009,0x72CC000A,0x76C40012,0x72C40001,0x6ECC0014,0x27FC0033,0x72B40008,0x6EB80014,0x6C000034,0x27FC0033,0x72B40008,0x6EB80014,0x6C000034,0x6C000034,0xACC80001,0xC2D40013,0xD4D8000A,0x86C80001,0x7AC80001,0x74C80001,0x72C80001,0x72C40001,0xA8BC0001,0x84C40001,0x72C40009,0x6EB80014, -0x1D40033,0xD00033,0xD00033,0xD00033,0xD00033,0x7ACC0012,0x7ACC0012,0x7ACC0012,0x70CC0012,0x70CC0012,0x6CCC0012,0x78C80009,0x78C80009,0x78C80009,0x70C80002,0x70C80002,0x6CC80005,0x6EC40009,0x6EC40009,0x6CC40001,0x6AC40009,0x1340033,0x1340033,0x1340033,0x72C00012,0x72C00012,0x6CC40012,0x70BC0009,0x70BC0009,0x6CBC0001,0x6ABC0009,0x1FF80033, -0x1FF80033,0x6CB00012,0x6AA40008,0x68000034,0xA0C80002,0xCECC0012,0xD00033,0x84C80001,0x7AC80001,0x74C80001,0x72C80002,0x70C40002,0xA8BC0000,0x82C40001,0x72C40008,0x6CBC0001,0x1B80033,0xD4000A,0xD4000A,0xD4000A,0xD4000A,0x72D40001,0x72D40001,0x72D40001,0x6ED00001,0x6ED00001,0x6CD00001,0x33C0008,0x33C0008,0x33C0008,0x6ECC0001,0x6ECC0001, -0x6CCC0001,0x23FC0008,0x23FC0008,0x6CC00000,0x6A000008,0x33C0008,0x33C0008,0x33C0008,0x6ECC0001,0x6ECC0001,0x6CCC0001,0x23FC0008,0x23FC0008,0x6CC00000,0x6A000008,0x23FC0008,0x23FC0008,0x6CC00000,0x6A000008,0x6A000008,0x94CC0000,0x92D40001,0xD4000A,0x84C80000,0x76CC0000,0x72CC0000,0x70CC0000,0x6ECC0001,0x96C40000,0x7EC80000,0x1C80008,0x6CC00000, -0x1C80008,0xE00012,0x7CDC0001,0x74DC0001,0x72D80001,0x14C0012,0x76D00001,0x72D40000,0x2BF80012,0x72C00000,0x6E000014,0x14C0012,0x76D00001,0x72D40000,0x2BF80012,0x72C00000,0x6E000014,0x2BF80012,0x72C00000,0x6E000014,0x6E000014,0x14C0012,0x76D00001,0x72D40000,0x2BF80012,0x72C00000,0x6E000014,0x2BF80012,0x72C00000,0x6E000014,0x6E000014,0x2BF80012, -0x72C00000,0x6E000014,0x6E000014,0x6E000014,0xBCC00000,0xEEC0012,0xB6DC0001,0x8AC40000,0x7AC80001,0x76C40000,0x72C80000,0x72B80000,0xB0B80000,0x88C00000,0x72CC0001,0x6E000014,0x1DC0012,0xCC0012,0xCC0012,0xCC0012,0xCC0012,0xCC0012,0xCC0012,0xCC0012,0xCC0012,0xCC0012,0xCC0012,0x72C80001,0x72C80001,0x72C80001,0x72C80001,0x72C80001, -0x72C80001,0x6AC80001,0x6AC80001,0x6AC80001,0x68C40001,0x1300012,0x1300012,0x1300012,0x1300012,0x1300012,0x1300012,0x6CBC0001,0x6CBC0001,0x6CBC0001,0x68C00000,0x1DF40012,0x1DF40012,0x1DF40012,0x68AC0000,0x64000014,0xACC80001,0xCC0012,0xCC0012,0x84C80001,0x7AC80001,0x74C80001,0x74C80001,0x70C80001,0xA8BC0000,0x8AC00000,0x6EC40000,0x6CBC0001, -0x1B00012,}; -static const uint32_t g_etc1_to_bc7_m6_table14[] = { -0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x23FC0000, -0x23FC0000,0x23FC0000,0x23FC0000,0x6A000000,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xE40000,0xE40000,0xE40000,0x33C0000,0x1C80000,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000, -0x1500000,0x2DF80000,0x2DF80000,0x2DF80000,0x70000000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x2DF80000,0x2DF80000,0x2DF80000,0x70000000,0x2DF80000,0x2DF80000,0x2DF80000,0x70000000,0x70000000,0xF00000,0xE00001,0xE00001,0x7040000,0x11C0000,0x1340000,0x1340000,0x17C0000,0x7040000,0x11C0000,0x1E00000,0x2DF80000, -0x1E00000,0xE80001,0xE80001,0xE80001,0xE80001,0x15C0000,0x15C0000,0x15C0000,0x33F80000,0x33F80000,0x74000000,0x15C0000,0x15C0000,0x15C0000,0x33F80000,0x33F80000,0x74000000,0x33F80000,0x33F80000,0x74000000,0x74000000,0x15C0000,0x15C0000,0x15C0000,0x33F80000,0x33F80000,0x74000000,0x33F80000,0x33F80000,0x74000000,0x74000000,0x33F80000, -0x33F80000,0x74000000,0x74000000,0x74000000,0x1100000,0x4F80000,0xE80001,0x33C0000,0x1880000,0x1F00000,0x17FC0000,0x43FC0000,0x1240000,0x15C0000,0x1F00000,0x74000000,0x1F00000,0xF40001,0x16C0000,0x3BFC0000,0x7A000000,0x16C0000,0x3BFC0000,0x7A000000,0x3BFC0000,0x7A000000,0x7A000000,0x16C0000,0x3BFC0000,0x7A000000,0x3BFC0000,0x7A000000, -0x7A000000,0x3BFC0000,0x7A000000,0x7A000000,0x7A000000,0x16C0000,0x3BFC0000,0x7A000000,0x3BFC0000,0x7A000000,0x7A000000,0x3BFC0000,0x7A000000,0x7A000000,0x7A000000,0x3BFC0000,0x7A000000,0x7A000000,0x7A000000,0x7A000000,0x1340000,0xB040000,0xB040000,0x19C0000,0x21FC0000,0x59FC0000,0x7A000000,0x7A000000,0x34C0000,0x1CC0000,0x73D80000,0x7A000000, -0x7FC0000,0xE4008C,0x90E00033,0x80E00033,0x7AE00033,0x8ADC0024,0x80DC0013,0x7CDC001A,0x7CDC0024,0x78DC0016,0x76DC0026,0x88D80034,0x80D8000A,0x7CD8000F,0x7ED40013,0x7AD40002,0x76D80016,0x7AD80034,0x78D4000F,0x76D40019,0x74D40035,0x1540088,0x84CC0033,0x7AD40033,0x7ECC0026,0x7AD00012,0x76D40026,0x80C00033,0x7AC40009,0x76C80015,0x74CC0034,0x2FFC0088, -0x7AB00033,0x76B80026,0x74AC0034,0x72000088,0xB4D80002,0xF6DC0026,0xF8E00037,0x8ED80002,0x82D80002,0x7CD80002,0x7AD80002,0x7AD40002,0xB0CC0001,0x90D00001,0x7AD40009,0x76C80015,0x1E80088,0xEC0033,0x88E8000A,0x7EE4000A,0x7AE4000A,0x82E40013,0x7EE00002,0x7AE40001,0x7AE40013,0x7AE00005,0x76E00015,0x35C0033,0x80D80009,0x7ADC000A,0x7ED40012,0x7AD40001, -0x76DC0014,0x33FC0033,0x7AC40008,0x76C80014,0x74000034,0x35C0033,0x80D80009,0x7ADC000A,0x7ED40012,0x7AD40001,0x76DC0014,0x33FC0033,0x7AC40008,0x76C80014,0x74000034,0x33FC0033,0x7AC40008,0x76C80014,0x74000034,0x74000034,0xB4D80001,0xCAE40013,0xDCE8000A,0x8ED80001,0x82D80001,0x7CD80001,0x7AD80001,0x7AD40001,0xB0CC0001,0x8CD40001,0x7AD40009,0x76C80014, -0x1F40033,0xE00033,0xE00033,0xE00033,0xE00033,0x82DC0012,0x82DC0012,0x82DC0012,0x78DC0012,0x78DC0012,0x74DC0012,0x80D80009,0x80D80009,0x80D80009,0x78D80002,0x78D80002,0x74D80005,0x76D40009,0x76D40009,0x74D40001,0x72D40009,0x14C0033,0x14C0033,0x14C0033,0x7AD00012,0x7AD00012,0x74D40012,0x78CC0009,0x78CC0009,0x74CC0001,0x72CC0009,0x2BF80033, -0x2BF80033,0x74C00012,0x72B40008,0x70000034,0xA8D80002,0xD6DC0012,0xE00033,0x8CD80001,0x82D80001,0x7CD80001,0x7AD80002,0x78D40002,0xB0CC0000,0x8AD40001,0x7AD40008,0x74CC0001,0x1DC0033,0xE4000A,0xE4000A,0xE4000A,0xE4000A,0x7AE40001,0x7AE40001,0x7AE40001,0x76E00001,0x76E00001,0x74E00001,0x1540008,0x1540008,0x1540008,0x76DC0001,0x76DC0001, -0x74DC0001,0x2FFC0008,0x2FFC0008,0x74D00000,0x72000008,0x1540008,0x1540008,0x1540008,0x76DC0001,0x76DC0001,0x74DC0001,0x2FFC0008,0x2FFC0008,0x74D00000,0x72000008,0x2FFC0008,0x2FFC0008,0x74D00000,0x72000008,0x72000008,0x9CDC0000,0x9AE40001,0xE4000A,0x8CD80000,0x7EDC0000,0x7ADC0000,0x78DC0000,0x76DC0001,0x9ED40000,0x86D80000,0x1E80008,0x74D00000, -0x1E80008,0xF00012,0x84EC0001,0x7CEC0001,0x7AE80001,0x1640012,0x7EE00001,0x7AE40000,0x37F80012,0x7AD00000,0x76000014,0x1640012,0x7EE00001,0x7AE40000,0x37F80012,0x7AD00000,0x76000014,0x37F80012,0x7AD00000,0x76000014,0x76000014,0x1640012,0x7EE00001,0x7AE40000,0x37F80012,0x7AD00000,0x76000014,0x37F80012,0x7AD00000,0x76000014,0x76000014,0x37F80012, -0x7AD00000,0x76000014,0x76000014,0x76000014,0xC4D00000,0x1000012,0xBEEC0001,0x92D40000,0x82D80001,0x7ED40000,0x7AD80000,0x7AC80000,0xB8C80000,0x90D00000,0x7ADC0001,0x76000014,0x1FC0012,0xDC0012,0xDC0012,0xDC0012,0xDC0012,0xDC0012,0xDC0012,0xDC0012,0xDC0012,0xDC0012,0xDC0012,0x7AD80001,0x7AD80001,0x7AD80001,0x7AD80001,0x7AD80001, -0x7AD80001,0x72D80001,0x72D80001,0x72D80001,0x70D40001,0x3440012,0x3440012,0x3440012,0x3440012,0x3440012,0x3440012,0x74CC0001,0x74CC0001,0x74CC0001,0x70D00000,0x27FC0012,0x27FC0012,0x27FC0012,0x70BC0000,0x6C000014,0xB4D80001,0xDC0012,0xDC0012,0x8CD80001,0x82D80001,0x7CD80001,0x7CD80001,0x78D80001,0xB0CC0000,0x92D00000,0x76D40000,0x74CC0001, -0x1D40012,}; -static const uint32_t g_etc1_to_bc7_m6_table15[] = { -0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x2FFC0000, -0x2FFC0000,0x2FFC0000,0x2FFC0000,0x72000000,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0x2F40000,0x2F40000,0x2F40000,0x1540000,0x1E80000,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000, -0x1680000,0x39F80000,0x39F80000,0x39F80000,0x78000000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x39F80000,0x39F80000,0x39F80000,0x78000000,0x39F80000,0x39F80000,0x39F80000,0x78000000,0x78000000,0x9000000,0xF00001,0xF00001,0x3180000,0x1300000,0x1480000,0x1480000,0x3940000,0x3180000,0x1300000,0x3FC0000,0x39F80000, -0x3FC0000,0xF80001,0xF80001,0xF80001,0xF80001,0x1740000,0x1740000,0x1740000,0x3FF80000,0x3FF80000,0x7C000000,0x1740000,0x1740000,0x1740000,0x3FF80000,0x3FF80000,0x7C000000,0x3FF80000,0x3FF80000,0x7C000000,0x7C000000,0x1740000,0x1740000,0x1740000,0x3FF80000,0x3FF80000,0x7C000000,0x3FF80000,0x3FF80000,0x7C000000,0x7C000000,0x3FF80000, -0x3FF80000,0x7C000000,0x7C000000,0x7C000000,0x7200000,0xD080000,0xF80001,0x1540000,0x1A40000,0xBFC0000,0x25FC0000,0x4FF80000,0x1380000,0x1740000,0xBFC0000,0x7C000000,0xBFC0000,0x1040001,0x1840000,0x47FC0000,0x82000000,0x1840000,0x47FC0000,0x82000000,0x47FC0000,0x82000000,0x82000000,0x1840000,0x47FC0000,0x82000000,0x47FC0000,0x82000000, -0x82000000,0x47FC0000,0x82000000,0x82000000,0x82000000,0x1840000,0x47FC0000,0x82000000,0x47FC0000,0x82000000,0x82000000,0x47FC0000,0x82000000,0x82000000,0x82000000,0x47FC0000,0x82000000,0x82000000,0x82000000,0x82000000,0x1480000,0x1180000,0x1180000,0x1B80000,0x2FFC0000,0x65F00000,0x82000000,0x82000000,0x1640000,0x1EC0000,0x7BE80000,0x82000000, -0x17FC0000,0xF4008C,0x98F00033,0x88F00033,0x82F00033,0x92EC0024,0x88EC0013,0x84EC001A,0x84EC0024,0x80EC0016,0x7EEC0026,0x90E80034,0x88E8000A,0x84E8000F,0x86E40013,0x82E40002,0x7EE80016,0x82E80034,0x80E4000F,0x7EE40019,0x7CE40035,0x16C0088,0x8CDC0033,0x82E40033,0x86DC0026,0x82E00012,0x7EE40026,0x88D00033,0x82D40009,0x7ED80015,0x7CDC0034,0x3BFC0088, -0x82C00033,0x7EC80026,0x7CBC0034,0x7A000088,0xBCE80002,0xFEEC0026,0xF0F0003C,0x96E80002,0x8AE80002,0x84E80002,0x82E80002,0x82E40002,0xB8DC0001,0x98E00001,0x82E40009,0x7ED80015,0x7FC0088,0xFC0033,0x90F8000A,0x86F4000A,0x82F4000A,0x8AF40013,0x86F00002,0x82F40001,0x82F40013,0x82F00005,0x7EF00015,0x3740033,0x88E80009,0x82EC000A,0x86E40012,0x82E40001, -0x7EEC0014,0x3FFC0033,0x82D40008,0x7ED80014,0x7C000034,0x3740033,0x88E80009,0x82EC000A,0x86E40012,0x82E40001,0x7EEC0014,0x3FFC0033,0x82D40008,0x7ED80014,0x7C000034,0x3FFC0033,0x82D40008,0x7ED80014,0x7C000034,0x7C000034,0xBCE80001,0xD2F40013,0xE4F8000A,0x96E80001,0x8AE80001,0x84E80001,0x82E80001,0x82E40001,0xB8DC0001,0x94E40001,0x82E40009,0x7ED80014, -0xDFC0033,0xF00033,0xF00033,0xF00033,0xF00033,0x8AEC0012,0x8AEC0012,0x8AEC0012,0x80EC0012,0x80EC0012,0x7CEC0012,0x88E80009,0x88E80009,0x88E80009,0x80E80002,0x80E80002,0x7CE80005,0x7EE40009,0x7EE40009,0x7CE40001,0x7AE40009,0x1640033,0x1640033,0x1640033,0x82E00012,0x82E00012,0x7CE40012,0x80DC0009,0x80DC0009,0x7CDC0001,0x7ADC0009,0x37F80033, -0x37F80033,0x7CD00012,0x7AC40008,0x78000034,0xB0E80002,0xDEEC0012,0xF00033,0x94E80001,0x8AE80001,0x84E80001,0x82E80002,0x80E40002,0xB8DC0000,0x92E40001,0x82E40008,0x7CDC0001,0x1FC0033,0xF4000A,0xF4000A,0xF4000A,0xF4000A,0x82F40001,0x82F40001,0x82F40001,0x7EF00001,0x7EF00001,0x7CF00001,0x16C0008,0x16C0008,0x16C0008,0x7EEC0001,0x7EEC0001, -0x7CEC0001,0x3BFC0008,0x3BFC0008,0x7CE00000,0x7A000008,0x16C0008,0x16C0008,0x16C0008,0x7EEC0001,0x7EEC0001,0x7CEC0001,0x3BFC0008,0x3BFC0008,0x7CE00000,0x7A000008,0x3BFC0008,0x3BFC0008,0x7CE00000,0x7A000008,0x7A000008,0xA4EC0000,0xA2F40001,0xF4000A,0x94E80000,0x86EC0000,0x82EC0000,0x80EC0000,0x7EEC0001,0xA6E40000,0x8EE80000,0x7FC0008,0x7CE00000, -0x7FC0008,0x1000012,0x8CFC0001,0x84FC0001,0x82F80001,0x17C0012,0x86F00001,0x82F40000,0x43F80012,0x82E00000,0x7E000014,0x17C0012,0x86F00001,0x82F40000,0x43F80012,0x82E00000,0x7E000014,0x43F80012,0x82E00000,0x7E000014,0x7E000014,0x17C0012,0x86F00001,0x82F40000,0x43F80012,0x82E00000,0x7E000014,0x43F80012,0x82E00000,0x7E000014,0x7E000014,0x43F80012, -0x82E00000,0x7E000014,0x7E000014,0x7E000014,0xCCE00000,0x1100012,0xC6FC0001,0x9AE40000,0x8AE80001,0x86E40000,0x82E80000,0x82D80000,0xC0D80000,0x98E00000,0x82EC0001,0x7E000014,0x11FC0012,0xEC0012,0xEC0012,0xEC0012,0xEC0012,0xEC0012,0xEC0012,0xEC0012,0xEC0012,0xEC0012,0xEC0012,0x82E80001,0x82E80001,0x82E80001,0x82E80001,0x82E80001, -0x82E80001,0x7AE80001,0x7AE80001,0x7AE80001,0x78E40001,0x35C0012,0x35C0012,0x35C0012,0x35C0012,0x35C0012,0x35C0012,0x7CDC0001,0x7CDC0001,0x7CDC0001,0x78E00000,0x33FC0012,0x33FC0012,0x33FC0012,0x78CC0000,0x74000014,0xBCE80001,0xEC0012,0xEC0012,0x94E80001,0x8AE80001,0x84E80001,0x84E80001,0x80E80001,0xB8DC0000,0x9AE00000,0x7EE40000,0x7CDC0001, -0x1F40012,}; -static const uint32_t g_etc1_to_bc7_m6_table16[] = { -0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x3DF80000, -0x3DF80000,0x3DF80000,0x3DF80000,0x7A000001,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0x1080000,0x1080000,0x1080000,0x1700000,0x9FC0000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x3800000,0x3800000,0x3800000,0x3800000,0x3800000, -0x3800000,0x45FC0000,0x45FC0000,0x45FC0000,0x80000001,0x3800000,0x3800000,0x3800000,0x3800000,0x3800000,0x3800000,0x45FC0000,0x45FC0000,0x45FC0000,0x80000001,0x45FC0000,0x45FC0000,0x45FC0000,0x80000001,0x80000001,0x3140000,0x1040000,0x1040000,0x52C0000,0x3440000,0x1600000,0x1600000,0x1B40000,0x52C0000,0x3440000,0x15FC0000,0x45FC0000, -0x15FC0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x38C0000,0x38C0000,0x38C0000,0x4BFC0000,0x4BFC0000,0x84000001,0x38C0000,0x38C0000,0x38C0000,0x4BFC0000,0x4BFC0000,0x84000001,0x4BFC0000,0x4BFC0000,0x84000001,0x84000001,0x38C0000,0x38C0000,0x38C0000,0x4BFC0000,0x4BFC0000,0x84000001,0x4BFC0000,0x4BFC0000,0x84000001,0x84000001,0x4BFC0000, -0x4BFC0000,0x84000001,0x84000001,0x84000001,0x1380000,0x71C0000,0x10C0000,0x16C0000,0x3C00000,0x1DF80000,0x35F80000,0x5BF80000,0x1500000,0x38C0000,0x1DF80000,0x84000001,0x1DF80000,0x1180000,0x1A00000,0x55F80000,0x8A000001,0x1A00000,0x55F80000,0x8A000001,0x55F80000,0x8A000001,0x8A000001,0x1A00000,0x55F80000,0x8A000001,0x55F80000,0x8A000001, -0x8A000001,0x55F80000,0x8A000001,0x8A000001,0x8A000001,0x1A00000,0x55F80000,0x8A000001,0x55F80000,0x8A000001,0x8A000001,0x55F80000,0x8A000001,0x8A000001,0x8A000001,0x55F80000,0x8A000001,0x8A000001,0x8A000001,0x8A000001,0x1600000,0xD280000,0xD280000,0x3D40000,0x3FF80000,0x6FF80000,0x8A000001,0x8A000001,0x17C0000,0xBFC0000,0x85DC0000,0x8A000001, -0x27FC0000,0x1080088,0x9F040034,0x91040034,0x8B040035,0x9B000026,0x91000015,0x8B000019,0x8D000026,0x8AFC0016,0x88FC0026,0x9CF80033,0x92F80009,0x8CFC000F,0x8EF80012,0x8AF80002,0x88F80016,0x8CF80033,0x8AF4000F,0x88F4001A,0x86F80033,0x1880088,0x96EC0033,0x8AF80034,0x90EC0026,0x8CF00013,0x88F40024,0x90E40033,0x8AEC000A,0x88E80013,0x86EC0033,0x49F80088, -0x8AD80034,0x88D40024,0x86C80033,0x8200008C,0xC4F80002,0xF9000028,0xFB040038,0xA2F80001,0x94F80002,0x8EF80002,0x8CF80002,0x8AF80002,0xBEF00001,0x9EF40001,0x8CF40009,0x88E80013,0x19FC0088,0x10C0034,0x9B080008,0x8F080009,0x8B080009,0x97040012,0x8F040001,0x8D040001,0x8D040012,0x8B040005,0x89040012,0x1900033,0x90FC0009,0x8B000009,0x8EF80012,0x8AF80002, -0x88FC0012,0x4DFC0033,0x8AEC0009,0x88E80012,0x86000033,0x1900033,0x90FC0009,0x8B000009,0x8EF80012,0x8AF80002,0x88FC0012,0x4DFC0033,0x8AEC0009,0x88E80012,0x86000033,0x4DFC0033,0x8AEC0009,0x88E80012,0x86000033,0x86000033,0xC0FC0001,0xEB040012,0xFD080008,0x9CFC0001,0x94F80002,0x8EF80002,0x8AFC0002,0x8AF40002,0xBEF00001,0x9AF80001,0x8CF40008,0x88E80012, -0x1FF80033,0x1040034,0x1040034,0x1040034,0x1040034,0x91000014,0x91000014,0x91000014,0x89000014,0x89000014,0x84FC0015,0x92F80008,0x92F80008,0x92F80008,0x8AF80001,0x8AF80001,0x86F80005,0x86F8000A,0x86F8000A,0x84F80001,0x82F8000A,0x1800033,0x1800033,0x1800033,0x8CF00012,0x8CF00012,0x84F80013,0x8AEC0009,0x8AEC0009,0x84F00002,0x82F0000A,0x45F80033, -0x45F80033,0x84E40013,0x82DC000A,0x80000033,0xC4F80001,0xD9000014,0x1040034,0xA0F80001,0x94F80001,0x8EF80001,0x8CF80001,0x88F80001,0xBCF00000,0x9EF40000,0x8AF80009,0x84F00002,0x13FC0033,0x1080008,0x1080008,0x1080008,0x1080008,0x8D040000,0x8D040000,0x8D040000,0x87040001,0x87040001,0x85040001,0x1880008,0x1880008,0x1880008,0x87000001,0x87000001, -0x85000001,0x49F80008,0x49F80008,0x84F80001,0x8200000A,0x1880008,0x1880008,0x1880008,0x87000001,0x87000001,0x85000001,0x49F80008,0x49F80008,0x84F80001,0x8200000A,0x49F80008,0x49F80008,0x84F80001,0x8200000A,0x8200000A,0xB6FC0000,0xBB040000,0x1080008,0x9AFC0000,0x90FC0000,0x8CFC0000,0x89000001,0x88FC0000,0xB2F40000,0x98F80000,0x19FC0008,0x84F80001, -0x19FC0008,0x1100014,0x970C0000,0x8F0C0000,0x8B0C0001,0x1980012,0x8F040001,0x8B080001,0x51F80012,0x8AF80001,0x88000012,0x1980012,0x8F040001,0x8B080001,0x51F80012,0x8AF80001,0x88000012,0x51F80012,0x8AF80001,0x88000012,0x88000012,0x1980012,0x8F040001,0x8B080001,0x51F80012,0x8AF80001,0x88000012,0x51F80012,0x8AF80001,0x88000012,0x88000012,0x51F80012, -0x8AF80001,0x88000012,0x88000012,0x88000012,0xD0F40000,0x1240012,0xDF0C0000,0xA2F80000,0x94F80001,0x90F40000,0x8AFC0001,0x8AF00001,0xC6EC0000,0xA2F00000,0x8CFC0000,0x88000012,0x21FC0012,0xFC0014,0xFC0014,0xFC0014,0xFC0014,0xFC0014,0xFC0014,0xFC0014,0xFC0014,0xFC0014,0xFC0014,0x8CF80000,0x8CF80000,0x8CF80000,0x8CF80000,0x8CF80000, -0x8CF80000,0x84F80000,0x84F80000,0x84F80000,0x80F80001,0x1780012,0x1780012,0x1780012,0x1780012,0x1780012,0x1780012,0x84F00001,0x84F00001,0x84F00001,0x80F40001,0x41FC0012,0x41FC0012,0x41FC0012,0x80E40001,0x7E000012,0xD4F80000,0xFC0014,0xFC0014,0xA4F80000,0x98F80000,0x90F80000,0x90F80000,0x8AF80000,0xBCF00000,0x9EF40000,0x86F80001,0x84F00001, -0xFFC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table17[] = { -0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x49F80000, -0x49F80000,0x49F80000,0x49F80000,0x82000001,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x5180000,0x5180000,0x5180000,0x1880000,0x19FC0000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000, -0x3980000,0x51FC0000,0x51FC0000,0x51FC0000,0x88000001,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000,0x51FC0000,0x51FC0000,0x51FC0000,0x88000001,0x51FC0000,0x51FC0000,0x51FC0000,0x88000001,0x88000001,0xB240000,0x1140000,0x1140000,0x1400000,0x3580000,0x1780000,0x1780000,0x1D00000,0x1400000,0x3580000,0x23FC0000,0x51FC0000, -0x23FC0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x3A40000,0x3A40000,0x3A40000,0x57FC0000,0x57FC0000,0x8C000001,0x3A40000,0x3A40000,0x3A40000,0x57FC0000,0x57FC0000,0x8C000001,0x57FC0000,0x57FC0000,0x8C000001,0x8C000001,0x3A40000,0x3A40000,0x3A40000,0x57FC0000,0x57FC0000,0x8C000001,0x57FC0000,0x57FC0000,0x8C000001,0x8C000001,0x57FC0000, -0x57FC0000,0x8C000001,0x8C000001,0x8C000001,0x5480000,0xF2C0000,0x11C0000,0x3800000,0x1DC0000,0x2BFC0000,0x41FC0000,0x65FC0000,0x1640000,0x3A40000,0x2BFC0000,0x8C000001,0x2BFC0000,0x1280000,0x1B80000,0x61F80000,0x92000001,0x1B80000,0x61F80000,0x92000001,0x61F80000,0x92000001,0x92000001,0x1B80000,0x61F80000,0x92000001,0x61F80000,0x92000001, -0x92000001,0x61F80000,0x92000001,0x92000001,0x92000001,0x1B80000,0x61F80000,0x92000001,0x61F80000,0x92000001,0x92000001,0x61F80000,0x92000001,0x92000001,0x92000001,0x61F80000,0x92000001,0x92000001,0x92000001,0x92000001,0x1740000,0x13C0000,0x13C0000,0x1F00000,0x4BFC0000,0x79F80000,0x92000001,0x92000001,0x3900000,0x1BFC0000,0x8DEC0000,0x92000001, -0x37FC0000,0x1180088,0xA7140034,0x99140034,0x93140035,0xA3100026,0x99100015,0x93100019,0x95100026,0x930C0016,0x910C0026,0xA5080033,0x9B080009,0x950C000F,0x97080012,0x93080002,0x91080016,0x95080033,0x9304000F,0x9104001A,0x8F080033,0x1A00088,0x9EFC0033,0x93080034,0x98FC0026,0x95000013,0x91040024,0x98F40033,0x92FC000A,0x90F80013,0x8EFC0033,0x55F80088, -0x92E80034,0x90E40024,0x8ED80033,0x8A00008C,0xCD080002,0xF110002E,0xF314003D,0xAB080001,0x9D080002,0x97080002,0x95080002,0x93080002,0xC7000001,0xA7040001,0x95040009,0x90F80013,0x27FC0088,0x11C0034,0xA3180008,0x97180009,0x93180009,0x9F140012,0x97140001,0x95140001,0x95140012,0x93140005,0x91140012,0x1A80033,0x990C0009,0x93100009,0x97080012,0x93080002, -0x910C0012,0x59FC0033,0x92FC0009,0x90F80012,0x8E000033,0x1A80033,0x990C0009,0x93100009,0x97080012,0x93080002,0x910C0012,0x59FC0033,0x92FC0009,0x90F80012,0x8E000033,0x59FC0033,0x92FC0009,0x90F80012,0x8E000033,0x8E000033,0xC90C0001,0xF3140012,0xF5180009,0xA50C0001,0x9D080002,0x97080002,0x930C0002,0x93040002,0xC7000001,0xA3080001,0x95040008,0x90F80012, -0x2DFC0033,0x1140034,0x1140034,0x1140034,0x1140034,0x99100014,0x99100014,0x99100014,0x91100014,0x91100014,0x8D0C0015,0x9B080008,0x9B080008,0x9B080008,0x93080001,0x93080001,0x8F080005,0x8F08000A,0x8F08000A,0x8D080001,0x8B08000A,0x1980033,0x1980033,0x1980033,0x95000012,0x95000012,0x8D080013,0x92FC0009,0x92FC0009,0x8D000002,0x8B00000A,0x51F80033, -0x51F80033,0x8CF40013,0x8AEC000A,0x88000033,0xCD080001,0xE1100014,0x1140034,0xA9080001,0x9D080001,0x97080001,0x95080001,0x91080001,0xC5000000,0xA7040000,0x93080009,0x8D000002,0x21FC0033,0x1180008,0x1180008,0x1180008,0x1180008,0x95140000,0x95140000,0x95140000,0x8F140001,0x8F140001,0x8D140001,0x1A00008,0x1A00008,0x1A00008,0x8F100001,0x8F100001, -0x8D100001,0x55F80008,0x55F80008,0x8D080001,0x8A00000A,0x1A00008,0x1A00008,0x1A00008,0x8F100001,0x8F100001,0x8D100001,0x55F80008,0x55F80008,0x8D080001,0x8A00000A,0x55F80008,0x55F80008,0x8D080001,0x8A00000A,0x8A00000A,0xBF0C0000,0xC3140000,0x1180008,0xA30C0000,0x990C0000,0x950C0000,0x91100001,0x910C0000,0xBB040000,0xA1080000,0x27FC0008,0x8D080001, -0x27FC0008,0x1200014,0x9F1C0000,0x971C0000,0x931C0001,0x1B00012,0x97140001,0x93180001,0x5DF40012,0x93080001,0x90000012,0x1B00012,0x97140001,0x93180001,0x5DF40012,0x93080001,0x90000012,0x5DF40012,0x93080001,0x90000012,0x90000012,0x1B00012,0x97140001,0x93180001,0x5DF40012,0x93080001,0x90000012,0x5DF40012,0x93080001,0x90000012,0x90000012,0x5DF40012, -0x93080001,0x90000012,0x90000012,0x90000012,0xD9040000,0x3340012,0xE71C0000,0xAB080000,0x9D080001,0x99040000,0x930C0001,0x93000001,0xCEFC0000,0xAB000000,0x950C0000,0x90000012,0x31FC0012,0x10C0014,0x10C0014,0x10C0014,0x10C0014,0x10C0014,0x10C0014,0x10C0014,0x10C0014,0x10C0014,0x10C0014,0x95080000,0x95080000,0x95080000,0x95080000,0x95080000, -0x95080000,0x8D080000,0x8D080000,0x8D080000,0x89080001,0x1900012,0x1900012,0x1900012,0x1900012,0x1900012,0x1900012,0x8D000001,0x8D000001,0x8D000001,0x89040001,0x4DFC0012,0x4DFC0012,0x4DFC0012,0x88F40001,0x86000012,0xDD080000,0x10C0014,0x10C0014,0xAD080000,0xA1080000,0x99080000,0x99080000,0x93080000,0xC5000000,0xA7040000,0x8F080001,0x8D000001, -0x1FF80012,}; -static const uint32_t g_etc1_to_bc7_m6_table18[] = { -0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x55F80000, -0x55F80000,0x55F80000,0x55F80000,0x8A000001,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0xD280000,0xD280000,0xD280000,0x1A00000,0x27FC0000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x3B00000, -0x3B00000,0x5DFC0000,0x5DFC0000,0x5DFC0000,0x90000001,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x5DFC0000,0x5DFC0000,0x5DFC0000,0x90000001,0x5DFC0000,0x5DFC0000,0x5DFC0000,0x90000001,0x90000001,0x1380000,0x1240000,0x1240000,0x1540000,0x36C0000,0x18C0000,0x18C0000,0x1EC0000,0x1540000,0x36C0000,0x33FC0000,0x5DFC0000, -0x33FC0000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x3BC0000,0x3BC0000,0x3BC0000,0x63FC0000,0x63FC0000,0x94000001,0x3BC0000,0x3BC0000,0x3BC0000,0x63FC0000,0x63FC0000,0x94000001,0x63FC0000,0x63FC0000,0x94000001,0x94000001,0x3BC0000,0x3BC0000,0x3BC0000,0x63FC0000,0x63FC0000,0x94000001,0x63FC0000,0x63FC0000,0x94000001,0x94000001,0x63FC0000, -0x63FC0000,0x94000001,0x94000001,0x94000001,0x15C0000,0x1400000,0x12C0000,0x1980000,0x1F80000,0x3BFC0000,0x4FFC0000,0x71F80000,0x1780000,0x3BC0000,0x3BFC0000,0x94000001,0x3BFC0000,0x1380000,0x1D00000,0x6DF80000,0x9A000001,0x1D00000,0x6DF80000,0x9A000001,0x6DF80000,0x9A000001,0x9A000001,0x1D00000,0x6DF80000,0x9A000001,0x6DF80000,0x9A000001, -0x9A000001,0x6DF80000,0x9A000001,0x9A000001,0x9A000001,0x1D00000,0x6DF80000,0x9A000001,0x6DF80000,0x9A000001,0x9A000001,0x6DF80000,0x9A000001,0x9A000001,0x9A000001,0x6DF80000,0x9A000001,0x9A000001,0x9A000001,0x9A000001,0x1880000,0x14C0000,0x14C0000,0xDFC0000,0x59FC0000,0x83F80000,0x9A000001,0x9A000001,0x1A80000,0x2DFC0000,0x95FC0000,0x9A000001, -0x45FC0000,0x1280088,0xAF240034,0xA1240034,0x9B240035,0xAB200026,0xA1200015,0x9B200019,0x9D200026,0x9B1C0016,0x991C0026,0xAD180033,0xA3180009,0x9D1C000F,0x9F180012,0x9B180002,0x99180016,0x9D180033,0x9B14000F,0x9914001A,0x97180033,0x1B80088,0xA70C0033,0x9B180034,0xA10C0026,0x9D100013,0x99140024,0xA1040033,0x9B0C000A,0x99080013,0x970C0033,0x61F80088, -0x9AF80034,0x98F40024,0x96E80033,0x9200008C,0xD5180002,0xF920002E,0xFB24003D,0xB3180001,0xA5180002,0x9F180002,0x9D180002,0x9B180002,0xCF100001,0xAF140001,0x9D140009,0x99080013,0x37FC0088,0x12C0034,0xAB280008,0x9F280009,0x9B280009,0xA7240012,0x9F240001,0x9D240001,0x9D240012,0x9B240005,0x99240012,0x1C00033,0xA11C0009,0x9B200009,0x9F180012,0x9B180002, -0x991C0012,0x65F80033,0x9B0C0009,0x99080012,0x96000033,0x1C00033,0xA11C0009,0x9B200009,0x9F180012,0x9B180002,0x991C0012,0x65F80033,0x9B0C0009,0x99080012,0x96000033,0x65F80033,0x9B0C0009,0x99080012,0x96000033,0x96000033,0xD11C0001,0xFB240012,0xFD280009,0xAD1C0001,0xA5180002,0x9F180002,0x9B1C0002,0x9B140002,0xCF100001,0xAB180001,0x9D140008,0x99080012, -0x3DF80033,0x1240034,0x1240034,0x1240034,0x1240034,0xA1200014,0xA1200014,0xA1200014,0x99200014,0x99200014,0x951C0015,0xA3180008,0xA3180008,0xA3180008,0x9B180001,0x9B180001,0x97180005,0x9718000A,0x9718000A,0x95180001,0x9318000A,0x1B00033,0x1B00033,0x1B00033,0x9D100012,0x9D100012,0x95180013,0x9B0C0009,0x9B0C0009,0x95100002,0x9310000A,0x5DF40033, -0x5DF40033,0x95040013,0x92FC000A,0x90000033,0xD5180001,0xE9200014,0x1240034,0xB1180001,0xA5180001,0x9F180001,0x9D180001,0x99180001,0xCD100000,0xAF140000,0x9B180009,0x95100002,0x31FC0033,0x1280008,0x1280008,0x1280008,0x1280008,0x9D240000,0x9D240000,0x9D240000,0x97240001,0x97240001,0x95240001,0x1B80008,0x1B80008,0x1B80008,0x97200001,0x97200001, -0x95200001,0x61F80008,0x61F80008,0x95180001,0x9200000A,0x1B80008,0x1B80008,0x1B80008,0x97200001,0x97200001,0x95200001,0x61F80008,0x61F80008,0x95180001,0x9200000A,0x61F80008,0x61F80008,0x95180001,0x9200000A,0x9200000A,0xC71C0000,0xCB240000,0x1280008,0xAB1C0000,0xA11C0000,0x9D1C0000,0x99200001,0x991C0000,0xC3140000,0xA9180000,0x37FC0008,0x95180001, -0x37FC0008,0x1300014,0xA72C0000,0x9F2C0000,0x9B2C0001,0x3C40012,0x9F240001,0x9B280001,0x67FC0012,0x9B180001,0x98000012,0x3C40012,0x9F240001,0x9B280001,0x67FC0012,0x9B180001,0x98000012,0x67FC0012,0x9B180001,0x98000012,0x98000012,0x3C40012,0x9F240001,0x9B280001,0x67FC0012,0x9B180001,0x98000012,0x67FC0012,0x9B180001,0x98000012,0x98000012,0x67FC0012, -0x9B180001,0x98000012,0x98000012,0x98000012,0xE1140000,0xB440012,0xEF2C0000,0xB3180000,0xA5180001,0xA1140000,0x9B1C0001,0x9B100001,0xD70C0000,0xB3100000,0x9D1C0000,0x98000012,0x3FFC0012,0x11C0014,0x11C0014,0x11C0014,0x11C0014,0x11C0014,0x11C0014,0x11C0014,0x11C0014,0x11C0014,0x11C0014,0x9D180000,0x9D180000,0x9D180000,0x9D180000,0x9D180000, -0x9D180000,0x95180000,0x95180000,0x95180000,0x91180001,0x1A80012,0x1A80012,0x1A80012,0x1A80012,0x1A80012,0x1A80012,0x95100001,0x95100001,0x95100001,0x91140001,0x59FC0012,0x59FC0012,0x59FC0012,0x91040001,0x8E000012,0xE5180000,0x11C0014,0x11C0014,0xB5180000,0xA9180000,0xA1180000,0xA1180000,0x9B180000,0xCD100000,0xAF140000,0x97180001,0x95100001, -0x2DFC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table19[] = { -0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x61F80000, -0x61F80000,0x61F80000,0x61F80000,0x92000001,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x13C0000,0x13C0000,0x13C0000,0x1B80000,0x37FC0000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x1C80000, -0x1C80000,0x69FC0000,0x69FC0000,0x69FC0000,0x98000001,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x69FC0000,0x69FC0000,0x69FC0000,0x98000001,0x69FC0000,0x69FC0000,0x69FC0000,0x98000001,0x98000001,0x1480000,0x1340000,0x1340000,0x5640000,0x3800000,0x1A40000,0x1A40000,0x7FC0000,0x5640000,0x3800000,0x41FC0000,0x69FC0000, -0x41FC0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x1D40000,0x1D40000,0x1D40000,0x6FFC0000,0x6FFC0000,0x9C000001,0x1D40000,0x1D40000,0x1D40000,0x6FFC0000,0x6FFC0000,0x9C000001,0x6FFC0000,0x6FFC0000,0x9C000001,0x9C000001,0x1D40000,0x1D40000,0x1D40000,0x6FFC0000,0x6FFC0000,0x9C000001,0x6FFC0000,0x6FFC0000,0x9C000001,0x9C000001,0x6FFC0000, -0x6FFC0000,0x9C000001,0x9C000001,0x9C000001,0x1700000,0x1500000,0x13C0000,0x3AC0000,0x11FC0000,0x49FC0000,0x5DF80000,0x7BFC0000,0x18C0000,0x1D40000,0x49FC0000,0x9C000001,0x49FC0000,0x1480000,0x1E80000,0x79F80000,0xA2000001,0x1E80000,0x79F80000,0xA2000001,0x79F80000,0xA2000001,0xA2000001,0x1E80000,0x79F80000,0xA2000001,0x79F80000,0xA2000001, -0xA2000001,0x79F80000,0xA2000001,0xA2000001,0xA2000001,0x1E80000,0x79F80000,0xA2000001,0x79F80000,0xA2000001,0xA2000001,0x79F80000,0xA2000001,0xA2000001,0xA2000001,0x79F80000,0xA2000001,0xA2000001,0xA2000001,0xA2000001,0x19C0000,0x75C0000,0x75C0000,0x21FC0000,0x67F80000,0x8DF80000,0xA2000001,0xA2000001,0x3BC0000,0x3DFC0000,0x9FD00000,0xA2000001, -0x55FC0000,0x1380088,0xB7340034,0xA9340034,0xA3340035,0xB3300026,0xA9300015,0xA3300019,0xA5300026,0xA32C0016,0xA12C0026,0xB5280033,0xAB280009,0xA52C000F,0xA7280012,0xA3280002,0xA1280016,0xA5280033,0xA324000F,0xA124001A,0x9F280033,0x1D00088,0xAF1C0033,0xA3280034,0xA91C0026,0xA5200013,0xA1240024,0xA9140033,0xA31C000A,0xA1180013,0x9F1C0033,0x6DF80088, -0xA3080034,0xA1040024,0x9EF80033,0x9A00008C,0xDD280002,0xF1300038,0xF3340044,0xBB280001,0xAD280002,0xA7280002,0xA5280002,0xA3280002,0xD7200001,0xB7240001,0xA5240009,0xA1180013,0x45FC0088,0x13C0034,0xB3380008,0xA7380009,0xA3380009,0xAF340012,0xA7340001,0xA5340001,0xA5340012,0xA3340005,0xA1340012,0x1D80033,0xA92C0009,0xA3300009,0xA7280012,0xA3280002, -0xA12C0012,0x71F80033,0xA31C0009,0xA1180012,0x9E000033,0x1D80033,0xA92C0009,0xA3300009,0xA7280012,0xA3280002,0xA12C0012,0x71F80033,0xA31C0009,0xA1180012,0x9E000033,0x71F80033,0xA31C0009,0xA1180012,0x9E000033,0x9E000033,0xD92C0001,0xF3340014,0xF538000C,0xB52C0001,0xAD280002,0xA7280002,0xA32C0002,0xA3240002,0xD7200001,0xB3280001,0xA5240008,0xA1180012, -0x4BFC0033,0x1340034,0x1340034,0x1340034,0x1340034,0xA9300014,0xA9300014,0xA9300014,0xA1300014,0xA1300014,0x9D2C0015,0xAB280008,0xAB280008,0xAB280008,0xA3280001,0xA3280001,0x9F280005,0x9F28000A,0x9F28000A,0x9D280001,0x9B28000A,0x3C40033,0x3C40033,0x3C40033,0xA5200012,0xA5200012,0x9D280013,0xA31C0009,0xA31C0009,0x9D200002,0x9B20000A,0x67FC0033, -0x67FC0033,0x9D140013,0x9B0C000A,0x98000033,0xDD280001,0xF1300014,0x1340034,0xB9280001,0xAD280001,0xA7280001,0xA5280001,0xA1280001,0xD5200000,0xB7240000,0xA3280009,0x9D200002,0x3FFC0033,0x1380008,0x1380008,0x1380008,0x1380008,0xA5340000,0xA5340000,0xA5340000,0x9F340001,0x9F340001,0x9D340001,0x1D00008,0x1D00008,0x1D00008,0x9F300001,0x9F300001, -0x9D300001,0x6DF80008,0x6DF80008,0x9D280001,0x9A00000A,0x1D00008,0x1D00008,0x1D00008,0x9F300001,0x9F300001,0x9D300001,0x6DF80008,0x6DF80008,0x9D280001,0x9A00000A,0x6DF80008,0x6DF80008,0x9D280001,0x9A00000A,0x9A00000A,0xCF2C0000,0xD3340000,0x1380008,0xB32C0000,0xA92C0000,0xA52C0000,0xA1300001,0xA12C0000,0xCB240000,0xB1280000,0x45FC0008,0x9D280001, -0x45FC0008,0x1400014,0xAF3C0000,0xA73C0000,0xA33C0001,0x3DC0012,0xA7340001,0xA3380001,0x73FC0012,0xA3280001,0xA0000012,0x3DC0012,0xA7340001,0xA3380001,0x73FC0012,0xA3280001,0xA0000012,0x73FC0012,0xA3280001,0xA0000012,0xA0000012,0x3DC0012,0xA7340001,0xA3380001,0x73FC0012,0xA3280001,0xA0000012,0x73FC0012,0xA3280001,0xA0000012,0xA0000012,0x73FC0012, -0xA3280001,0xA0000012,0xA0000012,0xA0000012,0xE9240000,0x1580012,0xF73C0000,0xBB280000,0xAD280001,0xA9240000,0xA32C0001,0xA3200001,0xDF1C0000,0xBB200000,0xA52C0000,0xA0000012,0x4FFC0012,0x12C0014,0x12C0014,0x12C0014,0x12C0014,0x12C0014,0x12C0014,0x12C0014,0x12C0014,0x12C0014,0x12C0014,0xA5280000,0xA5280000,0xA5280000,0xA5280000,0xA5280000, -0xA5280000,0x9D280000,0x9D280000,0x9D280000,0x99280001,0x1C00012,0x1C00012,0x1C00012,0x1C00012,0x1C00012,0x1C00012,0x9D200001,0x9D200001,0x9D200001,0x99240001,0x65F80012,0x65F80012,0x65F80012,0x99140001,0x96000012,0xED280000,0x12C0014,0x12C0014,0xBD280000,0xB1280000,0xA9280000,0xA9280000,0xA3280000,0xD5200000,0xB7240000,0x9F280001,0x9D200001, -0x3DF80012,}; -static const uint32_t g_etc1_to_bc7_m6_table20[] = { -0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x6DFC0000, -0x6DFC0000,0x6DFC0000,0x6DFC0000,0x9C000000,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0xF4C0000,0xF4C0000,0xF4C0000,0x3D00000,0x47FC0000,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000, -0x1E40000,0x77F80000,0x77F80000,0x77F80000,0xA2000000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x77F80000,0x77F80000,0x77F80000,0xA2000000,0x77F80000,0x77F80000,0x77F80000,0xA2000000,0xA2000000,0x15C0000,0x1440001,0x1440001,0x17C0000,0x1980000,0x1BC0000,0x1BC0000,0x1DFC0000,0x17C0000,0x1980000,0x53FC0000,0x77F80000, -0x53FC0000,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x1F00000,0x1F00000,0x1F00000,0x7DF80000,0x7DF80000,0xA6000000,0x1F00000,0x1F00000,0x1F00000,0x7DF80000,0x7DF80000,0xA6000000,0x7DF80000,0x7DF80000,0xA6000000,0xA6000000,0x1F00000,0x1F00000,0x1F00000,0x7DF80000,0x7DF80000,0xA6000000,0x7DF80000,0x7DF80000,0xA6000000,0xA6000000,0x7DF80000, -0x7DF80000,0xA6000000,0xA6000000,0xA6000000,0x1840000,0x1640000,0x14C0001,0x3C40000,0x27FC0000,0x5BFC0000,0x6BFC0000,0x87FC0000,0x5A00000,0x1F00000,0x5BFC0000,0xA6000000,0x5BFC0000,0x1580001,0x7FC0000,0x85FC0000,0xAC000000,0x7FC0000,0x85FC0000,0xAC000000,0x85FC0000,0xAC000000,0xAC000000,0x7FC0000,0x85FC0000,0xAC000000,0x85FC0000,0xAC000000, -0xAC000000,0x85FC0000,0xAC000000,0xAC000000,0xAC000000,0x7FC0000,0x85FC0000,0xAC000000,0x85FC0000,0xAC000000,0xAC000000,0x85FC0000,0xAC000000,0xAC000000,0xAC000000,0x85FC0000,0xAC000000,0xAC000000,0xAC000000,0xAC000000,0x3B00000,0x1700000,0x1700000,0x37FC0000,0x75FC0000,0x99F40000,0xAC000000,0xAC000000,0x3D40000,0x51FC0000,0xA9C40000,0xAC000000, -0x65FC0000,0x148008C,0xC3440033,0xB3440033,0xAD440033,0xBD400024,0xB3400013,0xAF40001A,0xAF400024,0xAB400016,0xA9400026,0xBB3C0034,0xB33C000A,0xAF3C000F,0xB1380013,0xAD380002,0xA93C0016,0xAD3C0034,0xAB38000F,0xA9380019,0xA7380035,0x3E80088,0xB7300033,0xAD380033,0xB1300026,0xAD340012,0xA9380026,0xB3240033,0xAD280009,0xA92C0015,0xA7300034,0x79FC0088, -0xAD140033,0xA91C0026,0xA7100034,0xA4000088,0xE73C0002,0xFB440034,0xFD480044,0xC13C0002,0xB53C0002,0xAF3C0002,0xAD3C0002,0xAD380002,0xE3300001,0xC3340001,0xAD380009,0xA92C0015,0x57FC0088,0x1500033,0xBB4C000A,0xB148000A,0xAD48000A,0xB5480013,0xB1440002,0xAD480001,0xAD480013,0xAD440005,0xA9440015,0x1F40033,0xB33C0009,0xAD40000A,0xB1380012,0xAD380001, -0xA9400014,0x7FF80033,0xAD280008,0xA92C0014,0xA6000034,0x1F40033,0xB33C0009,0xAD40000A,0xB1380012,0xAD380001,0xA9400014,0x7FF80033,0xAD280008,0xA92C0014,0xA6000034,0x7FF80033,0xAD280008,0xA92C0014,0xA6000034,0xA6000034,0xE73C0001,0xFD480013,0xFF4C000B,0xC13C0001,0xB53C0001,0xAF3C0001,0xAD3C0001,0xAD380001,0xE3300001,0xBF380001,0xAD380009,0xA92C0014, -0x5DF80033,0x1440033,0x1440033,0x1440033,0x1440033,0xB5400012,0xB5400012,0xB5400012,0xAB400012,0xAB400012,0xA7400012,0xB33C0009,0xB33C0009,0xB33C0009,0xAB3C0002,0xAB3C0002,0xA73C0005,0xA9380009,0xA9380009,0xA7380001,0xA5380009,0x1E00033,0x1E00033,0x1E00033,0xAD340012,0xAD340012,0xA7380012,0xAB300009,0xAB300009,0xA7300001,0xA5300009,0x75FC0033, -0x75FC0033,0xA7240012,0xA5180008,0xA2000034,0xDB3C0002,0xF9400013,0x1440033,0xBF3C0001,0xB53C0001,0xAF3C0001,0xAD3C0002,0xAB380002,0xE3300000,0xBD380001,0xAD380008,0xA7300001,0x51FC0033,0x148000A,0x148000A,0x148000A,0x148000A,0xAD480001,0xAD480001,0xAD480001,0xA9440001,0xA9440001,0xA7440001,0x3E80008,0x3E80008,0x3E80008,0xA9400001,0xA9400001, -0xA7400001,0x79FC0008,0x79FC0008,0xA7340000,0xA4000008,0x3E80008,0x3E80008,0x3E80008,0xA9400001,0xA9400001,0xA7400001,0x79FC0008,0x79FC0008,0xA7340000,0xA4000008,0x79FC0008,0x79FC0008,0xA7340000,0xA4000008,0xA4000008,0xCF400000,0xCD480001,0x148000A,0xBF3C0000,0xB1400000,0xAD400000,0xAB400000,0xA9400001,0xD1380000,0xB93C0000,0x57FC0008,0xA7340000, -0x57FC0008,0x1540012,0xB7500001,0xAF500001,0xAD4C0001,0x1F80012,0xB1440001,0xAD480000,0x81FC0012,0xAD340000,0xA8000014,0x1F80012,0xB1440001,0xAD480000,0x81FC0012,0xAD340000,0xA8000014,0x81FC0012,0xAD340000,0xA8000014,0xA8000014,0x1F80012,0xB1440001,0xAD480000,0x81FC0012,0xAD340000,0xA8000014,0x81FC0012,0xAD340000,0xA8000014,0xA8000014,0x81FC0012, -0xAD340000,0xA8000014,0xA8000014,0xA8000014,0xF7340000,0xD680012,0xF1500001,0xC5380000,0xB53C0001,0xB1380000,0xAD3C0000,0xAD2C0000,0xEB2C0000,0xC3340000,0xAD400001,0xA8000014,0x5FFC0012,0x1400012,0x1400012,0x1400012,0x1400012,0x1400012,0x1400012,0x1400012,0x1400012,0x1400012,0x1400012,0xAD3C0001,0xAD3C0001,0xAD3C0001,0xAD3C0001,0xAD3C0001, -0xAD3C0001,0xA53C0001,0xA53C0001,0xA53C0001,0xA3380001,0x1DC0012,0x1DC0012,0x1DC0012,0x1DC0012,0x1DC0012,0x1DC0012,0xA7300001,0xA7300001,0xA7300001,0xA3340000,0x73F80012,0x73F80012,0x73F80012,0xA3200000,0x9E000014,0xE73C0001,0x1400012,0x1400012,0xBF3C0001,0xB53C0001,0xAF3C0001,0xAF3C0001,0xAB3C0001,0xE3300000,0xC5340000,0xA9380000,0xA7300001, -0x4DFC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table21[] = { -0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x79FC0000, -0x79FC0000,0x79FC0000,0x79FC0000,0xA4000000,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1600000,0x1600000,0x1600000,0x3E80000,0x57FC0000,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000, -0x1FC0000,0x83F80000,0x83F80000,0x83F80000,0xAA000000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x83F80000,0x83F80000,0x83F80000,0xAA000000,0x83F80000,0x83F80000,0x83F80000,0xAA000000,0xAA000000,0x16C0000,0x1540001,0x1540001,0x38C0000,0x1AC0000,0x1D00000,0x1D00000,0x31FC0000,0x38C0000,0x1AC0000,0x61FC0000,0x83F80000, -0x61FC0000,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0xDFC0000,0xDFC0000,0xDFC0000,0x89F80000,0x89F80000,0xAE000000,0xDFC0000,0xDFC0000,0xDFC0000,0x89F80000,0x89F80000,0xAE000000,0x89F80000,0x89F80000,0xAE000000,0xAE000000,0xDFC0000,0xDFC0000,0xDFC0000,0x89F80000,0x89F80000,0xAE000000,0x89F80000,0x89F80000,0xAE000000,0xAE000000,0x89F80000, -0x89F80000,0xAE000000,0xAE000000,0xAE000000,0x1980000,0x3740000,0x15C0001,0x1DC0000,0x3BFC0000,0x69FC0000,0x79FC0000,0x93F80000,0x5B40000,0xDFC0000,0x69FC0000,0xAE000000,0x69FC0000,0x1680001,0x1FFC0000,0x91FC0000,0xB4000000,0x1FFC0000,0x91FC0000,0xB4000000,0x91FC0000,0xB4000000,0xB4000000,0x1FFC0000,0x91FC0000,0xB4000000,0x91FC0000,0xB4000000, -0xB4000000,0x91FC0000,0xB4000000,0xB4000000,0xB4000000,0x1FFC0000,0x91FC0000,0xB4000000,0x91FC0000,0xB4000000,0xB4000000,0x91FC0000,0xB4000000,0xB4000000,0xB4000000,0x91FC0000,0xB4000000,0xB4000000,0xB4000000,0xB4000000,0x3C40000,0x9800000,0x9800000,0x4BFC0000,0x83FC0000,0xA3F40000,0xB4000000,0xB4000000,0x1EC0000,0x63FC0000,0xB1D40000,0xB4000000, -0x75FC0000,0x158008C,0xCB540033,0xBB540033,0xB5540033,0xC5500024,0xBB500013,0xB750001A,0xB7500024,0xB3500016,0xB1500026,0xC34C0034,0xBB4C000A,0xB74C000F,0xB9480013,0xB5480002,0xB14C0016,0xB54C0034,0xB348000F,0xB1480019,0xAF480035,0x7FC0088,0xBF400033,0xB5480033,0xB9400026,0xB5440012,0xB1480026,0xBB340033,0xB5380009,0xB13C0015,0xAF400034,0x85FC0088, -0xB5240033,0xB12C0026,0xAF200034,0xAC000088,0xEF4C0002,0xF354003E,0xF558004B,0xC94C0002,0xBD4C0002,0xB74C0002,0xB54C0002,0xB5480002,0xEB400001,0xCB440001,0xB5480009,0xB13C0015,0x65FC0088,0x1600033,0xC35C000A,0xB958000A,0xB558000A,0xBD580013,0xB9540002,0xB5580001,0xB5580013,0xB5540005,0xB1540015,0xFFC0033,0xBB4C0009,0xB550000A,0xB9480012,0xB5480001, -0xB1500014,0x8BF80033,0xB5380008,0xB13C0014,0xAE000034,0xFFC0033,0xBB4C0009,0xB550000A,0xB9480012,0xB5480001,0xB1500014,0x8BF80033,0xB5380008,0xB13C0014,0xAE000034,0x8BF80033,0xB5380008,0xB13C0014,0xAE000034,0xAE000034,0xEF4C0001,0xF5580015,0xF75C000E,0xC94C0001,0xBD4C0001,0xB74C0001,0xB54C0001,0xB5480001,0xEB400001,0xC7480001,0xB5480009,0xB13C0014, -0x6BFC0033,0x1540033,0x1540033,0x1540033,0x1540033,0xBD500012,0xBD500012,0xBD500012,0xB3500012,0xB3500012,0xAF500012,0xBB4C0009,0xBB4C0009,0xBB4C0009,0xB34C0002,0xB34C0002,0xAF4C0005,0xB1480009,0xB1480009,0xAF480001,0xAD480009,0x1F80033,0x1F80033,0x1F80033,0xB5440012,0xB5440012,0xAF480012,0xB3400009,0xB3400009,0xAF400001,0xAD400009,0x81FC0033, -0x81FC0033,0xAF340012,0xAD280008,0xAA000034,0xE34C0002,0xF1500016,0x1540033,0xC74C0001,0xBD4C0001,0xB74C0001,0xB54C0002,0xB3480002,0xEB400000,0xC5480001,0xB5480008,0xAF400001,0x5FFC0033,0x158000A,0x158000A,0x158000A,0x158000A,0xB5580001,0xB5580001,0xB5580001,0xB1540001,0xB1540001,0xAF540001,0x7FC0008,0x7FC0008,0x7FC0008,0xB1500001,0xB1500001, -0xAF500001,0x85FC0008,0x85FC0008,0xAF440000,0xAC000008,0x7FC0008,0x7FC0008,0x7FC0008,0xB1500001,0xB1500001,0xAF500001,0x85FC0008,0x85FC0008,0xAF440000,0xAC000008,0x85FC0008,0x85FC0008,0xAF440000,0xAC000008,0xAC000008,0xD7500000,0xD5580001,0x158000A,0xC74C0000,0xB9500000,0xB5500000,0xB3500000,0xB1500001,0xD9480000,0xC14C0000,0x65FC0008,0xAF440000, -0x65FC0008,0x1640012,0xBF600001,0xB7600001,0xB55C0001,0x15FC0012,0xB9540001,0xB5580000,0x8DFC0012,0xB5440000,0xB0000014,0x15FC0012,0xB9540001,0xB5580000,0x8DFC0012,0xB5440000,0xB0000014,0x8DFC0012,0xB5440000,0xB0000014,0xB0000014,0x15FC0012,0xB9540001,0xB5580000,0x8DFC0012,0xB5440000,0xB0000014,0x8DFC0012,0xB5440000,0xB0000014,0xB0000014,0x8DFC0012, -0xB5440000,0xB0000014,0xB0000014,0xB0000014,0xFF440000,0x17C0012,0xF9600001,0xCD480000,0xBD4C0001,0xB9480000,0xB54C0000,0xB53C0000,0xF33C0000,0xCB440000,0xB5500001,0xB0000014,0x6FFC0012,0x1500012,0x1500012,0x1500012,0x1500012,0x1500012,0x1500012,0x1500012,0x1500012,0x1500012,0x1500012,0xB54C0001,0xB54C0001,0xB54C0001,0xB54C0001,0xB54C0001, -0xB54C0001,0xAD4C0001,0xAD4C0001,0xAD4C0001,0xAB480001,0x1F40012,0x1F40012,0x1F40012,0x1F40012,0x1F40012,0x1F40012,0xAF400001,0xAF400001,0xAF400001,0xAB440000,0x7FF80012,0x7FF80012,0x7FF80012,0xAB300000,0xA6000014,0xEF4C0001,0x1500012,0x1500012,0xC74C0001,0xBD4C0001,0xB74C0001,0xB74C0001,0xB34C0001,0xEB400000,0xCD440000,0xB1480000,0xAF400001, -0x5DF80012,}; -static const uint32_t g_etc1_to_bc7_m6_table22[] = { -0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x85FC0000, -0x85FC0000,0x85FC0000,0x85FC0000,0xAC000000,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1700000,0x1700000,0x1700000,0x7FC0000,0x65FC0000,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000, -0x19FC0000,0x8FF80000,0x8FF80000,0x8FF80000,0xB2000000,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x8FF80000,0x8FF80000,0x8FF80000,0xB2000000,0x8FF80000,0x8FF80000,0x8FF80000,0xB2000000,0xB2000000,0x77C0000,0x1640001,0x1640001,0x1A00000,0x1C00000,0x1E80000,0x1E80000,0x45FC0000,0x1A00000,0x1C00000,0x71FC0000,0x8FF80000, -0x71FC0000,0x16C0001,0x16C0001,0x16C0001,0x16C0001,0x25FC0000,0x25FC0000,0x25FC0000,0x95F80000,0x95F80000,0xB6000000,0x25FC0000,0x25FC0000,0x25FC0000,0x95F80000,0x95F80000,0xB6000000,0x95F80000,0x95F80000,0xB6000000,0xB6000000,0x25FC0000,0x25FC0000,0x25FC0000,0x95F80000,0x95F80000,0xB6000000,0x95F80000,0x95F80000,0xB6000000,0xB6000000,0x95F80000, -0x95F80000,0xB6000000,0xB6000000,0xB6000000,0x3A80000,0xB840000,0x16C0001,0x3F00000,0x4FFC0000,0x79FC0000,0x87F80000,0x9DFC0000,0x1CC0000,0x25FC0000,0x79FC0000,0xB6000000,0x79FC0000,0x1780001,0x37FC0000,0x9DFC0000,0xBC000000,0x37FC0000,0x9DFC0000,0xBC000000,0x9DFC0000,0xBC000000,0xBC000000,0x37FC0000,0x9DFC0000,0xBC000000,0x9DFC0000,0xBC000000, -0xBC000000,0x9DFC0000,0xBC000000,0xBC000000,0xBC000000,0x37FC0000,0x9DFC0000,0xBC000000,0x9DFC0000,0xBC000000,0xBC000000,0x9DFC0000,0xBC000000,0xBC000000,0xBC000000,0x9DFC0000,0xBC000000,0xBC000000,0xBC000000,0xBC000000,0x3D80000,0x1940000,0x1940000,0x5DFC0000,0x91FC0000,0xADF40000,0xBC000000,0xBC000000,0x9FC0000,0x73FC0000,0xB9E40000,0xBC000000, -0x83FC0000,0x168008C,0xD3640033,0xC3640033,0xBD640033,0xCD600024,0xC3600013,0xBF60001A,0xBF600024,0xBB600016,0xB9600026,0xCB5C0034,0xC35C000A,0xBF5C000F,0xC1580013,0xBD580002,0xB95C0016,0xBD5C0034,0xBB58000F,0xB9580019,0xB7580035,0x1FFC0088,0xC7500033,0xBD580033,0xC1500026,0xBD540012,0xB9580026,0xC3440033,0xBD480009,0xB94C0015,0xB7500034,0x91FC0088, -0xBD340033,0xB93C0026,0xB7300034,0xB4000088,0xF75C0002,0xFB64003E,0xFD68004B,0xD15C0002,0xC55C0002,0xBF5C0002,0xBD5C0002,0xBD580002,0xF3500001,0xD3540001,0xBD580009,0xB94C0015,0x75FC0088,0x1700033,0xCB6C000A,0xC168000A,0xBD68000A,0xC5680013,0xC1640002,0xBD680001,0xBD680013,0xBD640005,0xB9640015,0x29FC0033,0xC35C0009,0xBD60000A,0xC1580012,0xBD580001, -0xB9600014,0x97F80033,0xBD480008,0xB94C0014,0xB6000034,0x29FC0033,0xC35C0009,0xBD60000A,0xC1580012,0xBD580001,0xB9600014,0x97F80033,0xBD480008,0xB94C0014,0xB6000034,0x97F80033,0xBD480008,0xB94C0014,0xB6000034,0xB6000034,0xF75C0001,0xFD680015,0xFF6C000E,0xD15C0001,0xC55C0001,0xBF5C0001,0xBD5C0001,0xBD580001,0xF3500001,0xCF580001,0xBD580009,0xB94C0014, -0x7BFC0033,0x1640033,0x1640033,0x1640033,0x1640033,0xC5600012,0xC5600012,0xC5600012,0xBB600012,0xBB600012,0xB7600012,0xC35C0009,0xC35C0009,0xC35C0009,0xBB5C0002,0xBB5C0002,0xB75C0005,0xB9580009,0xB9580009,0xB7580001,0xB5580009,0x15FC0033,0x15FC0033,0x15FC0033,0xBD540012,0xBD540012,0xB7580012,0xBB500009,0xBB500009,0xB7500001,0xB5500009,0x8DFC0033, -0x8DFC0033,0xB7440012,0xB5380008,0xB2000034,0xEB5C0002,0xF9600016,0x1640033,0xCF5C0001,0xC55C0001,0xBF5C0001,0xBD5C0002,0xBB580002,0xF3500000,0xCD580001,0xBD580008,0xB7500001,0x6FFC0033,0x168000A,0x168000A,0x168000A,0x168000A,0xBD680001,0xBD680001,0xBD680001,0xB9640001,0xB9640001,0xB7640001,0x1FFC0008,0x1FFC0008,0x1FFC0008,0xB9600001,0xB9600001, -0xB7600001,0x91FC0008,0x91FC0008,0xB7540000,0xB4000008,0x1FFC0008,0x1FFC0008,0x1FFC0008,0xB9600001,0xB9600001,0xB7600001,0x91FC0008,0x91FC0008,0xB7540000,0xB4000008,0x91FC0008,0x91FC0008,0xB7540000,0xB4000008,0xB4000008,0xDF600000,0xDD680001,0x168000A,0xCF5C0000,0xC1600000,0xBD600000,0xBB600000,0xB9600001,0xE1580000,0xC95C0000,0x75FC0008,0xB7540000, -0x75FC0008,0x1740012,0xC7700001,0xBF700001,0xBD6C0001,0x2FFC0012,0xC1640001,0xBD680000,0x99FC0012,0xBD540000,0xB8000014,0x2FFC0012,0xC1640001,0xBD680000,0x99FC0012,0xBD540000,0xB8000014,0x99FC0012,0xBD540000,0xB8000014,0xB8000014,0x2FFC0012,0xC1640001,0xBD680000,0x99FC0012,0xBD540000,0xB8000014,0x99FC0012,0xBD540000,0xB8000014,0xB8000014,0x99FC0012, -0xBD540000,0xB8000014,0xB8000014,0xB8000014,0xF15C0001,0x18C0012,0xF1700002,0xD5580000,0xC55C0001,0xC1580000,0xBD5C0000,0xBD4C0000,0xFB4C0000,0xD3540000,0xBD600001,0xB8000014,0x7FF80012,0x1600012,0x1600012,0x1600012,0x1600012,0x1600012,0x1600012,0x1600012,0x1600012,0x1600012,0x1600012,0xBD5C0001,0xBD5C0001,0xBD5C0001,0xBD5C0001,0xBD5C0001, -0xBD5C0001,0xB55C0001,0xB55C0001,0xB55C0001,0xB3580001,0xFFC0012,0xFFC0012,0xFFC0012,0xFFC0012,0xFFC0012,0xFFC0012,0xB7500001,0xB7500001,0xB7500001,0xB3540000,0x8BF80012,0x8BF80012,0x8BF80012,0xB3400000,0xAE000014,0xF75C0001,0x1600012,0x1600012,0xCF5C0001,0xC55C0001,0xBF5C0001,0xBF5C0001,0xBB5C0001,0xF3500000,0xD5540000,0xB9580000,0xB7500001, -0x6BFC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table23[] = { -0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x91FC0000, -0x91FC0000,0x91FC0000,0x91FC0000,0xB4000000,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x9800000,0x9800000,0x9800000,0x1FFC0000,0x75FC0000,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000, -0x31FC0000,0x9BF80000,0x9BF80000,0x9BF80000,0xBA000000,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x9BF80000,0x9BF80000,0x9BF80000,0xBA000000,0x9BF80000,0x9BF80000,0x9BF80000,0xBA000000,0xBA000000,0xF8C0000,0x1740001,0x1740001,0x1B40000,0x1D40000,0x1FC0000,0x1FC0000,0x59FC0000,0x1B40000,0x1D40000,0x7FFC0000,0x9BF80000, -0x7FFC0000,0x17C0001,0x17C0001,0x17C0001,0x17C0001,0x3DFC0000,0x3DFC0000,0x3DFC0000,0xA1F80000,0xA1F80000,0xBE000000,0x3DFC0000,0x3DFC0000,0x3DFC0000,0xA1F80000,0xA1F80000,0xBE000000,0xA1F80000,0xA1F80000,0xBE000000,0xBE000000,0x3DFC0000,0x3DFC0000,0x3DFC0000,0xA1F80000,0xA1F80000,0xBE000000,0xA1F80000,0xA1F80000,0xBE000000,0xBE000000,0xA1F80000, -0xA1F80000,0xBE000000,0xBE000000,0xBE000000,0x1BC0000,0x1980000,0x17C0001,0xFFC0000,0x63FC0000,0x87FC0000,0x95F80000,0xA9F40000,0x1E00000,0x3DFC0000,0x87FC0000,0xBE000000,0x87FC0000,0x1880001,0x4FFC0000,0xA9FC0000,0xC4000000,0x4FFC0000,0xA9FC0000,0xC4000000,0xA9FC0000,0xC4000000,0xC4000000,0x4FFC0000,0xA9FC0000,0xC4000000,0xA9FC0000,0xC4000000, -0xC4000000,0xA9FC0000,0xC4000000,0xC4000000,0xC4000000,0x4FFC0000,0xA9FC0000,0xC4000000,0xA9FC0000,0xC4000000,0xC4000000,0xA9FC0000,0xC4000000,0xC4000000,0xC4000000,0xA9FC0000,0xC4000000,0xC4000000,0xC4000000,0xC4000000,0x3EC0000,0x1A40000,0x1A40000,0x71FC0000,0x9FF80000,0xB7F40000,0xC4000000,0xC4000000,0x27FC0000,0x85FC0000,0xC1F40000,0xC4000000, -0x93FC0000,0x178008C,0xDB740033,0xCB740033,0xC5740033,0xD5700024,0xCB700013,0xC770001A,0xC7700024,0xC3700016,0xC1700026,0xD36C0034,0xCB6C000A,0xC76C000F,0xC9680013,0xC5680002,0xC16C0016,0xC56C0034,0xC368000F,0xC1680019,0xBF680035,0x37FC0088,0xCF600033,0xC5680033,0xC9600026,0xC5640012,0xC1680026,0xCB540033,0xC5580009,0xC15C0015,0xBF600034,0x9DFC0088, -0xC5440033,0xC14C0026,0xBF400034,0xBC000088,0xFF6C0002,0xF374004C,0xF5780054,0xD96C0002,0xCD6C0002,0xC76C0002,0xC56C0002,0xC5680002,0xFB600001,0xDB640001,0xC5680009,0xC15C0015,0x83FC0088,0x1800033,0xD37C000A,0xC978000A,0xC578000A,0xCD780013,0xC9740002,0xC5780001,0xC5780013,0xC5740005,0xC1740015,0x41FC0033,0xCB6C0009,0xC570000A,0xC9680012,0xC5680001, -0xC1700014,0xA1FC0033,0xC5580008,0xC15C0014,0xBE000034,0x41FC0033,0xCB6C0009,0xC570000A,0xC9680012,0xC5680001,0xC1700014,0xA1FC0033,0xC5580008,0xC15C0014,0xBE000034,0xA1FC0033,0xC5580008,0xC15C0014,0xBE000034,0xBE000034,0xFF6C0001,0xF578001B,0xF77C0013,0xD96C0001,0xCD6C0001,0xC76C0001,0xC56C0001,0xC5680001,0xFB600001,0xD7680001,0xC5680009,0xC15C0014, -0x89FC0033,0x1740033,0x1740033,0x1740033,0x1740033,0xCD700012,0xCD700012,0xCD700012,0xC3700012,0xC3700012,0xBF700012,0xCB6C0009,0xCB6C0009,0xCB6C0009,0xC36C0002,0xC36C0002,0xBF6C0005,0xC1680009,0xC1680009,0xBF680001,0xBD680009,0x2FFC0033,0x2FFC0033,0x2FFC0033,0xC5640012,0xC5640012,0xBF680012,0xC3600009,0xC3600009,0xBF600001,0xBD600009,0x99FC0033, -0x99FC0033,0xBF540012,0xBD480008,0xBA000034,0xF36C0002,0xF170001B,0x1740033,0xD76C0001,0xCD6C0001,0xC76C0001,0xC56C0002,0xC3680002,0xFB600000,0xD5680001,0xC5680008,0xBF600001,0x7FF80033,0x178000A,0x178000A,0x178000A,0x178000A,0xC5780001,0xC5780001,0xC5780001,0xC1740001,0xC1740001,0xBF740001,0x37FC0008,0x37FC0008,0x37FC0008,0xC1700001,0xC1700001, -0xBF700001,0x9DFC0008,0x9DFC0008,0xBF640000,0xBC000008,0x37FC0008,0x37FC0008,0x37FC0008,0xC1700001,0xC1700001,0xBF700001,0x9DFC0008,0x9DFC0008,0xBF640000,0xBC000008,0x9DFC0008,0x9DFC0008,0xBF640000,0xBC000008,0xBC000008,0xE7700000,0xE5780001,0x178000A,0xD76C0000,0xC9700000,0xC5700000,0xC3700000,0xC1700001,0xE9680000,0xD16C0000,0x83FC0008,0xBF640000, -0x83FC0008,0x1840012,0xCF800001,0xC7800001,0xC57C0001,0x47FC0012,0xC9740001,0xC5780000,0xA5F80012,0xC5640000,0xC0000014,0x47FC0012,0xC9740001,0xC5780000,0xA5F80012,0xC5640000,0xC0000014,0xA5F80012,0xC5640000,0xC0000014,0xC0000014,0x47FC0012,0xC9740001,0xC5780000,0xA5F80012,0xC5640000,0xC0000014,0xA5F80012,0xC5640000,0xC0000014,0xC0000014,0xA5F80012, -0xC5640000,0xC0000014,0xC0000014,0xC0000014,0xF96C0001,0x79C0012,0xF9800002,0xDD680000,0xCD6C0001,0xC9680000,0xC56C0000,0xC55C0000,0xF9640001,0xDB640000,0xC5700001,0xC0000014,0x8DFC0012,0x1700012,0x1700012,0x1700012,0x1700012,0x1700012,0x1700012,0x1700012,0x1700012,0x1700012,0x1700012,0xC56C0001,0xC56C0001,0xC56C0001,0xC56C0001,0xC56C0001, -0xC56C0001,0xBD6C0001,0xBD6C0001,0xBD6C0001,0xBB680001,0x29FC0012,0x29FC0012,0x29FC0012,0x29FC0012,0x29FC0012,0x29FC0012,0xBF600001,0xBF600001,0xBF600001,0xBB640000,0x97F80012,0x97F80012,0x97F80012,0xBB500000,0xB6000014,0xFF6C0001,0x1700012,0x1700012,0xD76C0001,0xCD6C0001,0xC76C0001,0xC76C0001,0xC36C0001,0xFB600000,0xDD640000,0xC1680000,0xBF600001, -0x7BFC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table24[] = { -0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x9FF80000, -0x9FF80000,0x9FF80000,0x9FF80000,0xBC000001,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x3940000,0x3940000,0x3940000,0x3BFC0000,0x85FC0000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x4DFC0000, -0x4DFC0000,0xA7FC0000,0xA7FC0000,0xA7FC0000,0xC2000001,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0xA7FC0000,0xA7FC0000,0xA7FC0000,0xC2000001,0xA7FC0000,0xA7FC0000,0xA7FC0000,0xC2000001,0xC2000001,0x9A00000,0x1880000,0x1880000,0x1C80000,0x1EC0000,0x23FC0000,0x23FC0000,0x6FFC0000,0x1C80000,0x1EC0000,0x91FC0000,0xA7FC0000, -0x91FC0000,0x1900000,0x1900000,0x1900000,0x1900000,0x59FC0000,0x59FC0000,0x59FC0000,0xADFC0000,0xADFC0000,0xC6000001,0x59FC0000,0x59FC0000,0x59FC0000,0xADFC0000,0xADFC0000,0xC6000001,0xADFC0000,0xADFC0000,0xC6000001,0xC6000001,0x59FC0000,0x59FC0000,0x59FC0000,0xADFC0000,0xADFC0000,0xC6000001,0xADFC0000,0xADFC0000,0xC6000001,0xC6000001,0xADFC0000, -0xADFC0000,0xC6000001,0xC6000001,0xC6000001,0x1D00000,0xDA80000,0x1900000,0x31FC0000,0x79FC0000,0x99FC0000,0xA3FC0000,0xB5F80000,0x3F40000,0x59FC0000,0x99FC0000,0xC6000001,0x99FC0000,0x19C0000,0x6BFC0000,0xB7F80000,0xCC000001,0x6BFC0000,0xB7F80000,0xCC000001,0xB7F80000,0xCC000001,0xCC000001,0x6BFC0000,0xB7F80000,0xCC000001,0xB7F80000,0xCC000001, -0xCC000001,0xB7F80000,0xCC000001,0xCC000001,0xCC000001,0x6BFC0000,0xB7F80000,0xCC000001,0xB7F80000,0xCC000001,0xCC000001,0xB7F80000,0xCC000001,0xCC000001,0xCC000001,0xB7F80000,0xCC000001,0xCC000001,0xCC000001,0xCC000001,0x11FC0000,0x1B80000,0x1B80000,0x87FC0000,0xADFC0000,0xC3F00000,0xCC000001,0xCC000001,0x49FC0000,0x97FC0000,0xCBE80000,0xCC000001, -0xA3FC0000,0x18C0088,0xE1880034,0xD3880034,0xCD880035,0xDD840026,0xD3840015,0xCD840019,0xCF840026,0xCD800016,0xCB800026,0xDF7C0033,0xD57C0009,0xCF80000F,0xD17C0012,0xCD7C0002,0xCB7C0016,0xCF7C0033,0xCD78000F,0xCB78001A,0xC97C0033,0x53FC0088,0xD9700033,0xCD7C0034,0xD3700026,0xCF740013,0xCB780024,0xD3680033,0xCD70000A,0xCB6C0013,0xC9700033,0xABF80088, -0xCD5C0034,0xCB580024,0xC94C0033,0xC400008C,0xFF800006,0xFD880048,0xFD880058,0xE57C0001,0xD77C0002,0xD17C0002,0xCF7C0002,0xCD7C0002,0xFF740004,0xE1780001,0xCF780009,0xCB6C0013,0x95FC0088,0x1900034,0xDD8C0008,0xD18C0009,0xCD8C0009,0xD9880012,0xD1880001,0xCF880001,0xCF880012,0xCD880005,0xCB880012,0x5BFC0033,0xD3800009,0xCD840009,0xD17C0012,0xCD7C0002, -0xCB800012,0xAFFC0033,0xCD700009,0xCB6C0012,0xC8000033,0x5BFC0033,0xD3800009,0xCD840009,0xD17C0012,0xCD7C0002,0xCB800012,0xAFFC0033,0xCD700009,0xCB6C0012,0xC8000033,0xAFFC0033,0xCD700009,0xCB6C0012,0xC8000033,0xC8000033,0xFF800002,0xFF8C0018,0xFF8C0018,0xDF800001,0xD77C0002,0xD17C0002,0xCD800002,0xCD780002,0xFB780003,0xDD7C0001,0xCF780008,0xCB6C0012, -0x9BFC0033,0x1880034,0x1880034,0x1880034,0x1880034,0xD3840014,0xD3840014,0xD3840014,0xCB840014,0xCB840014,0xC7800015,0xD57C0008,0xD57C0008,0xD57C0008,0xCD7C0001,0xCD7C0001,0xC97C0005,0xC97C000A,0xC97C000A,0xC77C0001,0xC57C000A,0x49FC0033,0x49FC0033,0x49FC0033,0xCF740012,0xCF740012,0xC77C0013,0xCD700009,0xCD700009,0xC7740002,0xC574000A,0xA7F80033, -0xA7F80033,0xC7680013,0xC560000A,0xC2000033,0xF9800004,0xFB840018,0x1880034,0xE37C0001,0xD77C0001,0xD17C0001,0xCF7C0001,0xCB7C0001,0xFF740000,0xE1780000,0xCD7C0009,0xC7740002,0x8FFC0033,0x18C0008,0x18C0008,0x18C0008,0x18C0008,0xCF880000,0xCF880000,0xCF880000,0xC9880001,0xC9880001,0xC7880001,0x53FC0008,0x53FC0008,0x53FC0008,0xC9840001,0xC9840001, -0xC7840001,0xABF80008,0xABF80008,0xC77C0001,0xC400000A,0x53FC0008,0x53FC0008,0x53FC0008,0xC9840001,0xC9840001,0xC7840001,0xABF80008,0xABF80008,0xC77C0001,0xC400000A,0xABF80008,0xABF80008,0xC77C0001,0xC400000A,0xC400000A,0xF9800000,0xFD880000,0x18C0008,0xDD800000,0xD3800000,0xCF800000,0xCB840001,0xCB800000,0xF5780000,0xDB7C0000,0x95FC0008,0xC77C0001, -0x95FC0008,0x1940014,0xD9900000,0xD1900000,0xCD900001,0x63FC0012,0xD1880001,0xCD8C0001,0xB3F80012,0xCD7C0001,0xCA000012,0x63FC0012,0xD1880001,0xCD8C0001,0xB3F80012,0xCD7C0001,0xCA000012,0xB3F80012,0xCD7C0001,0xCA000012,0xCA000012,0x63FC0012,0xD1880001,0xCD8C0001,0xB3F80012,0xCD7C0001,0xCA000012,0xB3F80012,0xCD7C0001,0xCA000012,0xCA000012,0xB3F80012, -0xCD7C0001,0xCA000012,0xCA000012,0xCA000012,0xF7840002,0x1B00012,0xF3940005,0xE57C0000,0xD77C0001,0xD3780000,0xCD800001,0xCD740001,0xF57C0002,0xE5740000,0xCF800000,0xCA000012,0x9FF80012,0x1800014,0x1800014,0x1800014,0x1800014,0x1800014,0x1800014,0x1800014,0x1800014,0x1800014,0x1800014,0xCF7C0000,0xCF7C0000,0xCF7C0000,0xCF7C0000,0xCF7C0000, -0xCF7C0000,0xC77C0000,0xC77C0000,0xC77C0000,0xC37C0001,0x43FC0012,0x43FC0012,0x43FC0012,0x43FC0012,0x43FC0012,0x43FC0012,0xC7740001,0xC7740001,0xC7740001,0xC3780001,0xA3FC0012,0xA3FC0012,0xA3FC0012,0xC3680001,0xC0000012,0xF77C0004,0x1800014,0x1800014,0xE77C0000,0xDB7C0000,0xD37C0000,0xD37C0000,0xCD7C0000,0xFF740000,0xE1780000,0xC97C0001,0xC7740001, -0x8BFC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table25[] = { -0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0xABF80000, -0xABF80000,0xABF80000,0xABF80000,0xC4000001,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0xBA40000,0xBA40000,0xBA40000,0x53FC0000,0x95FC0000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x65FC0000,0x65FC0000,0x65FC0000,0x65FC0000,0x65FC0000, -0x65FC0000,0xB3FC0000,0xB3FC0000,0xB3FC0000,0xCA000001,0x65FC0000,0x65FC0000,0x65FC0000,0x65FC0000,0x65FC0000,0x65FC0000,0xB3FC0000,0xB3FC0000,0xB3FC0000,0xCA000001,0xB3FC0000,0xB3FC0000,0xB3FC0000,0xCA000001,0xCA000001,0x1B40000,0x1980000,0x1980000,0x5D80000,0x7FC0000,0x41FC0000,0x41FC0000,0x83FC0000,0x5D80000,0x7FC0000,0x9FFC0000,0xB3FC0000, -0x9FFC0000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x71FC0000,0x71FC0000,0x71FC0000,0xB9FC0000,0xB9FC0000,0xCE000001,0x71FC0000,0x71FC0000,0x71FC0000,0xB9FC0000,0xB9FC0000,0xCE000001,0xB9FC0000,0xB9FC0000,0xCE000001,0xCE000001,0x71FC0000,0x71FC0000,0x71FC0000,0xB9FC0000,0xB9FC0000,0xCE000001,0xB9FC0000,0xB9FC0000,0xCE000001,0xCE000001,0xB9FC0000, -0xB9FC0000,0xCE000001,0xCE000001,0xCE000001,0x1E40000,0x1BC0000,0x1A00000,0x4FFC0000,0x8DFC0000,0xA7FC0000,0xB1FC0000,0xBFFC0000,0x1BFC0000,0x71FC0000,0xA7FC0000,0xCE000001,0xA7FC0000,0x1AC0000,0x83FC0000,0xC3F80000,0xD4000001,0x83FC0000,0xC3F80000,0xD4000001,0xC3F80000,0xD4000001,0xD4000001,0x83FC0000,0xC3F80000,0xD4000001,0xC3F80000,0xD4000001, -0xD4000001,0xC3F80000,0xD4000001,0xD4000001,0xD4000001,0x83FC0000,0xC3F80000,0xD4000001,0xC3F80000,0xD4000001,0xD4000001,0xC3F80000,0xD4000001,0xD4000001,0xD4000001,0xC3F80000,0xD4000001,0xD4000001,0xD4000001,0xD4000001,0x37FC0000,0x1C80000,0x1C80000,0x9BFC0000,0xBBFC0000,0xCDF00000,0xD4000001,0xD4000001,0x67FC0000,0xA9FC0000,0xD3F80000,0xD4000001, -0xB3FC0000,0x19C0088,0xE9980034,0xDB980034,0xD5980035,0xE5940026,0xDB940015,0xD5940019,0xD7940026,0xD5900016,0xD3900026,0xE78C0033,0xDD8C0009,0xD790000F,0xD98C0012,0xD58C0002,0xD38C0016,0xD78C0033,0xD588000F,0xD388001A,0xD18C0033,0x6BFC0088,0xE1800033,0xD58C0034,0xDB800026,0xD7840013,0xD3880024,0xDB780033,0xD580000A,0xD37C0013,0xD1800033,0xB7F80088, -0xD56C0034,0xD3680024,0xD15C0033,0xCC00008C,0xFF90000E,0xF5980056,0xF79C0061,0xED8C0001,0xDF8C0002,0xD98C0002,0xD78C0002,0xD58C0002,0xFF880008,0xE9880001,0xD7880009,0xD37C0013,0xA3FC0088,0x1A00034,0xE59C0008,0xD99C0009,0xD59C0009,0xE1980012,0xD9980001,0xD7980001,0xD7980012,0xD5980005,0xD3980012,0x75FC0033,0xDB900009,0xD5940009,0xD98C0012,0xD58C0002, -0xD3900012,0xBBFC0033,0xD5800009,0xD37C0012,0xD0000033,0x75FC0033,0xDB900009,0xD5940009,0xD98C0012,0xD58C0002,0xD3900012,0xBBFC0033,0xD5800009,0xD37C0012,0xD0000033,0xBBFC0033,0xD5800009,0xD37C0012,0xD0000033,0xD0000033,0xFF940003,0xF79C001E,0xF9A00019,0xE7900001,0xDF8C0002,0xD98C0002,0xD5900002,0xD5880002,0xFF880004,0xE58C0001,0xD7880008,0xD37C0012, -0xA9FC0033,0x1980034,0x1980034,0x1980034,0x1980034,0xDB940014,0xDB940014,0xDB940014,0xD3940014,0xD3940014,0xCF900015,0xDD8C0008,0xDD8C0008,0xDD8C0008,0xD58C0001,0xD58C0001,0xD18C0005,0xD18C000A,0xD18C000A,0xCF8C0001,0xCD8C000A,0x63FC0033,0x63FC0033,0x63FC0033,0xD7840012,0xD7840012,0xCF8C0013,0xD5800009,0xD5800009,0xCF840002,0xCD84000A,0xB3F80033, -0xB3F80033,0xCF780013,0xCD70000A,0xCA000033,0xFF900005,0xF394001D,0x1980034,0xEB8C0001,0xDF8C0001,0xD98C0001,0xD78C0001,0xD38C0001,0xFB880002,0xE9880000,0xD58C0009,0xCF840002,0x9FF80033,0x19C0008,0x19C0008,0x19C0008,0x19C0008,0xD7980000,0xD7980000,0xD7980000,0xD1980001,0xD1980001,0xCF980001,0x6BFC0008,0x6BFC0008,0x6BFC0008,0xD1940001,0xD1940001, -0xCF940001,0xB7F80008,0xB7F80008,0xCF8C0001,0xCC00000A,0x6BFC0008,0x6BFC0008,0x6BFC0008,0xD1940001,0xD1940001,0xCF940001,0xB7F80008,0xB7F80008,0xCF8C0001,0xCC00000A,0xB7F80008,0xB7F80008,0xCF8C0001,0xCC00000A,0xCC00000A,0xEB940001,0xF5980001,0x19C0008,0xE5900000,0xDB900000,0xD7900000,0xD3940001,0xD3900000,0xFD880000,0xE38C0000,0xA3FC0008,0xCF8C0001, -0xA3FC0008,0x1A40014,0xE1A00000,0xD9A00000,0xD5A00001,0x7BFC0012,0xD9980001,0xD59C0001,0xBFF80012,0xD58C0001,0xD2000012,0x7BFC0012,0xD9980001,0xD59C0001,0xBFF80012,0xD58C0001,0xD2000012,0xBFF80012,0xD58C0001,0xD2000012,0xD2000012,0x7BFC0012,0xD9980001,0xD59C0001,0xBFF80012,0xD58C0001,0xD2000012,0xBFF80012,0xD58C0001,0xD2000012,0xD2000012,0xBFF80012, -0xD58C0001,0xD2000012,0xD2000012,0xD2000012,0xFF940002,0x9C00012,0xFBA40005,0xED8C0000,0xDF8C0001,0xDB880000,0xD5900001,0xD5840001,0xFD8C0002,0xED840000,0xD7900000,0xD2000012,0xADFC0012,0x1900014,0x1900014,0x1900014,0x1900014,0x1900014,0x1900014,0x1900014,0x1900014,0x1900014,0x1900014,0xD78C0000,0xD78C0000,0xD78C0000,0xD78C0000,0xD78C0000, -0xD78C0000,0xCF8C0000,0xCF8C0000,0xCF8C0000,0xCB8C0001,0x5BFC0012,0x5BFC0012,0x5BFC0012,0x5BFC0012,0x5BFC0012,0x5BFC0012,0xCF840001,0xCF840001,0xCF840001,0xCB880001,0xAFFC0012,0xAFFC0012,0xAFFC0012,0xCB780001,0xC8000012,0xFF8C0004,0x1900014,0x1900014,0xEF8C0000,0xE38C0000,0xDB8C0000,0xDB8C0000,0xD58C0000,0xFB880001,0xE9880000,0xD18C0001,0xCF840001, -0x9BFC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table26[] = { -0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0xB7F80000, -0xB7F80000,0xB7F80000,0xB7F80000,0xCC000001,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x1B80000,0x1B80000,0x1B80000,0x6BFC0000,0xA3FC0000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x7DFC0000,0x7DFC0000,0x7DFC0000,0x7DFC0000,0x7DFC0000, -0x7DFC0000,0xBFFC0000,0xBFFC0000,0xBFFC0000,0xD2000001,0x7DFC0000,0x7DFC0000,0x7DFC0000,0x7DFC0000,0x7DFC0000,0x7DFC0000,0xBFFC0000,0xBFFC0000,0xBFFC0000,0xD2000001,0xBFFC0000,0xBFFC0000,0xBFFC0000,0xD2000001,0xD2000001,0x1C40000,0x1A80000,0x1A80000,0x1EC0000,0x2DFC0000,0x5FFC0000,0x5FFC0000,0x97FC0000,0x1EC0000,0x2DFC0000,0xAFFC0000,0xBFFC0000, -0xAFFC0000,0x1B00000,0x1B00000,0x1B00000,0x1B00000,0x89FC0000,0x89FC0000,0x89FC0000,0xC5FC0000,0xC5FC0000,0xD6000001,0x89FC0000,0x89FC0000,0x89FC0000,0xC5FC0000,0xC5FC0000,0xD6000001,0xC5FC0000,0xC5FC0000,0xD6000001,0xD6000001,0x89FC0000,0x89FC0000,0x89FC0000,0xC5FC0000,0xC5FC0000,0xD6000001,0xC5FC0000,0xC5FC0000,0xD6000001,0xD6000001,0xC5FC0000, -0xC5FC0000,0xD6000001,0xD6000001,0xD6000001,0x5F40000,0x1CC0000,0x1B00000,0x6FFC0000,0xA1FC0000,0xB7FC0000,0xBFF80000,0xCBF80000,0x41FC0000,0x89FC0000,0xB7FC0000,0xD6000001,0xB7FC0000,0x1BC0000,0x9BFC0000,0xCFF80000,0xDC000001,0x9BFC0000,0xCFF80000,0xDC000001,0xCFF80000,0xDC000001,0xDC000001,0x9BFC0000,0xCFF80000,0xDC000001,0xCFF80000,0xDC000001, -0xDC000001,0xCFF80000,0xDC000001,0xDC000001,0xDC000001,0x9BFC0000,0xCFF80000,0xDC000001,0xCFF80000,0xDC000001,0xDC000001,0xCFF80000,0xDC000001,0xDC000001,0xDC000001,0xCFF80000,0xDC000001,0xDC000001,0xDC000001,0xDC000001,0x5FFC0000,0x5D80000,0x5D80000,0xAFFC0000,0xC9F80000,0xD7F00000,0xDC000001,0xDC000001,0x85FC0000,0xB9FC0000,0xDDCC0000,0xDC000001, -0xC1FC0000,0x1AC0088,0xF1A80034,0xE3A80034,0xDDA80035,0xEDA40026,0xE3A40015,0xDDA40019,0xDFA40026,0xDDA00016,0xDBA00026,0xEF9C0033,0xE59C0009,0xDFA0000F,0xE19C0012,0xDD9C0002,0xDB9C0016,0xDF9C0033,0xDD98000F,0xDB98001A,0xD99C0033,0x83FC0088,0xE9900033,0xDD9C0034,0xE3900026,0xDF940013,0xDB980024,0xE3880033,0xDD90000A,0xDB8C0013,0xD9900033,0xC3F80088, -0xDD7C0034,0xDB780024,0xD96C0033,0xD400008C,0xFFA00016,0xFDA80056,0xFFAC0061,0xF59C0001,0xE79C0002,0xE19C0002,0xDF9C0002,0xDD9C0002,0xFF9C0013,0xF1980001,0xDF980009,0xDB8C0013,0xB3FC0088,0x1B00034,0xEDAC0008,0xE1AC0009,0xDDAC0009,0xE9A80012,0xE1A80001,0xDFA80001,0xDFA80012,0xDDA80005,0xDBA80012,0x8DFC0033,0xE3A00009,0xDDA40009,0xE19C0012,0xDD9C0002, -0xDBA00012,0xC7FC0033,0xDD900009,0xDB8C0012,0xD8000033,0x8DFC0033,0xE3A00009,0xDDA40009,0xE19C0012,0xDD9C0002,0xDBA00012,0xC7FC0033,0xDD900009,0xDB8C0012,0xD8000033,0xC7FC0033,0xDD900009,0xDB8C0012,0xD8000033,0xD8000033,0xF9A80009,0xFFAC001E,0xF1B00020,0xEFA00001,0xE79C0002,0xE19C0002,0xDDA00002,0xDD980002,0xFDA00009,0xED9C0001,0xDF980008,0xDB8C0012, -0xB9FC0033,0x1A80034,0x1A80034,0x1A80034,0x1A80034,0xE3A40014,0xE3A40014,0xE3A40014,0xDBA40014,0xDBA40014,0xD7A00015,0xE59C0008,0xE59C0008,0xE59C0008,0xDD9C0001,0xDD9C0001,0xD99C0005,0xD99C000A,0xD99C000A,0xD79C0001,0xD59C000A,0x7BFC0033,0x7BFC0033,0x7BFC0033,0xDF940012,0xDF940012,0xD79C0013,0xDD900009,0xDD900009,0xD7940002,0xD594000A,0xBFF80033, -0xBFF80033,0xD7880013,0xD580000A,0xD2000033,0xFFA00006,0xFBA4001D,0x1A80034,0xF39C0001,0xE79C0001,0xE19C0001,0xDF9C0001,0xDB9C0001,0xFB980006,0xF1980000,0xDD9C0009,0xD7940002,0xADFC0033,0x1AC0008,0x1AC0008,0x1AC0008,0x1AC0008,0xDFA80000,0xDFA80000,0xDFA80000,0xD9A80001,0xD9A80001,0xD7A80001,0x83FC0008,0x83FC0008,0x83FC0008,0xD9A40001,0xD9A40001, -0xD7A40001,0xC3F80008,0xC3F80008,0xD79C0001,0xD400000A,0x83FC0008,0x83FC0008,0x83FC0008,0xD9A40001,0xD9A40001,0xD7A40001,0xC3F80008,0xC3F80008,0xD79C0001,0xD400000A,0xC3F80008,0xC3F80008,0xD79C0001,0xD400000A,0xD400000A,0xF3A40001,0xFDA80001,0x1AC0008,0xEDA00000,0xE3A00000,0xDFA00000,0xDBA40001,0xDBA00000,0xF5A00001,0xEB9C0000,0xB3FC0008,0xD79C0001, -0xB3FC0008,0x1B40014,0xE9B00000,0xE1B00000,0xDDB00001,0x93FC0012,0xE1A80001,0xDDAC0001,0xCBF80012,0xDD9C0001,0xDA000012,0x93FC0012,0xE1A80001,0xDDAC0001,0xCBF80012,0xDD9C0001,0xDA000012,0xCBF80012,0xDD9C0001,0xDA000012,0xDA000012,0x93FC0012,0xE1A80001,0xDDAC0001,0xCBF80012,0xDD9C0001,0xDA000012,0xCBF80012,0xDD9C0001,0xDA000012,0xDA000012,0xCBF80012, -0xDD9C0001,0xDA000012,0xDA000012,0xDA000012,0xF9A80005,0x1D40012,0xF3B40008,0xF59C0000,0xE79C0001,0xE3980000,0xDDA00001,0xDD940001,0xFBA40005,0xF5940000,0xDFA00000,0xDA000012,0xBDF80012,0x1A00014,0x1A00014,0x1A00014,0x1A00014,0x1A00014,0x1A00014,0x1A00014,0x1A00014,0x1A00014,0x1A00014,0xDF9C0000,0xDF9C0000,0xDF9C0000,0xDF9C0000,0xDF9C0000, -0xDF9C0000,0xD79C0000,0xD79C0000,0xD79C0000,0xD39C0001,0x75FC0012,0x75FC0012,0x75FC0012,0x75FC0012,0x75FC0012,0x75FC0012,0xD7940001,0xD7940001,0xD7940001,0xD3980001,0xBBFC0012,0xBBFC0012,0xBBFC0012,0xD3880001,0xD0000012,0xF9A00005,0x1A00014,0x1A00014,0xF79C0000,0xEB9C0000,0xE39C0000,0xE39C0000,0xDD9C0000,0xFB980002,0xF1980000,0xD99C0001,0xD7940001, -0xA9FC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table27[] = { -0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0xC3F80000, -0xC3F80000,0xC3F80000,0xC3F80000,0xD4000001,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1C80000,0x1C80000,0x1C80000,0x83FC0000,0xB3FC0000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x95FC0000,0x95FC0000,0x95FC0000,0x95FC0000,0x95FC0000, -0x95FC0000,0xCBFC0000,0xCBFC0000,0xCBFC0000,0xDA000001,0x95FC0000,0x95FC0000,0x95FC0000,0x95FC0000,0x95FC0000,0x95FC0000,0xCBFC0000,0xCBFC0000,0xCBFC0000,0xDA000001,0xCBFC0000,0xCBFC0000,0xCBFC0000,0xDA000001,0xDA000001,0x3D40000,0x1B80000,0x1B80000,0x9FC0000,0x55FC0000,0x7DFC0000,0x7DFC0000,0xABFC0000,0x9FC0000,0x55FC0000,0xBFF80000,0xCBFC0000, -0xBFF80000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xD1FC0000,0xD1FC0000,0xDE000001,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xD1FC0000,0xD1FC0000,0xDE000001,0xD1FC0000,0xD1FC0000,0xDE000001,0xDE000001,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xD1FC0000,0xD1FC0000,0xDE000001,0xD1FC0000,0xD1FC0000,0xDE000001,0xDE000001,0xD1FC0000, -0xD1FC0000,0xDE000001,0xDE000001,0xDE000001,0x27FC0000,0x7DC0000,0x1C00000,0x8DFC0000,0xB3FC0000,0xC5FC0000,0xCBFC0000,0xD5FC0000,0x69FC0000,0xA3FC0000,0xC5FC0000,0xDE000001,0xC5FC0000,0x1CC0000,0xB5FC0000,0xDBF80000,0xE4000001,0xB5FC0000,0xDBF80000,0xE4000001,0xDBF80000,0xE4000001,0xE4000001,0xB5FC0000,0xDBF80000,0xE4000001,0xDBF80000,0xE4000001, -0xE4000001,0xDBF80000,0xE4000001,0xE4000001,0xE4000001,0xB5FC0000,0xDBF80000,0xE4000001,0xDBF80000,0xE4000001,0xE4000001,0xDBF80000,0xE4000001,0xE4000001,0xE4000001,0xDBF80000,0xE4000001,0xE4000001,0xE4000001,0xE4000001,0x87FC0000,0xDE80000,0xDE80000,0xC3FC0000,0xD5FC0000,0xE1F00000,0xE4000001,0xE4000001,0xA3FC0000,0xCBFC0000,0xE5DC0000,0xE4000001, -0xD1FC0000,0x1BC0088,0xF9B80034,0xEBB80034,0xE5B80035,0xF5B40026,0xEBB40015,0xE5B40019,0xE7B40026,0xE5B00016,0xE3B00026,0xF7AC0033,0xEDAC0009,0xE7B0000F,0xE9AC0012,0xE5AC0002,0xE3AC0016,0xE7AC0033,0xE5A8000F,0xE3A8001A,0xE1AC0033,0x9BFC0088,0xF1A00033,0xE5AC0034,0xEBA00026,0xE7A40013,0xE3A80024,0xEB980033,0xE5A0000A,0xE39C0013,0xE1A00033,0xCFF80088, -0xE58C0034,0xE3880024,0xE17C0033,0xDC00008C,0xFFB40021,0xF5B80068,0xF7BC006C,0xFDAC0001,0xEFAC0002,0xE9AC0002,0xE7AC0002,0xE5AC0002,0xFDB00021,0xF9A80001,0xE7A80009,0xE39C0013,0xC1FC0088,0x1C00034,0xF5BC0008,0xE9BC0009,0xE5BC0009,0xF1B80012,0xE9B80001,0xE7B80001,0xE7B80012,0xE5B80005,0xE3B80012,0xA5FC0033,0xEBB00009,0xE5B40009,0xE9AC0012,0xE5AC0002, -0xE3B00012,0xD3FC0033,0xE5A00009,0xE39C0012,0xE0000033,0xA5FC0033,0xEBB00009,0xE5B40009,0xE9AC0012,0xE5AC0002,0xE3B00012,0xD3FC0033,0xE5A00009,0xE39C0012,0xE0000033,0xD3FC0033,0xE5A00009,0xE39C0012,0xE0000033,0xE0000033,0xFDB8000E,0xF9C00024,0xF9C00020,0xF7B00001,0xEFAC0002,0xE9AC0002,0xE5B00002,0xE5A80002,0xFDB00011,0xF5AC0001,0xE7A80008,0xE39C0012, -0xC7FC0033,0x1B80034,0x1B80034,0x1B80034,0x1B80034,0xEBB40014,0xEBB40014,0xEBB40014,0xE3B40014,0xE3B40014,0xDFB00015,0xEDAC0008,0xEDAC0008,0xEDAC0008,0xE5AC0001,0xE5AC0001,0xE1AC0005,0xE1AC000A,0xE1AC000A,0xDFAC0001,0xDDAC000A,0x93FC0033,0x93FC0033,0x93FC0033,0xE7A40012,0xE7A40012,0xDFAC0013,0xE5A00009,0xE5A00009,0xDFA40002,0xDDA4000A,0xCBF80033, -0xCBF80033,0xDF980013,0xDD90000A,0xDA000033,0xF9B00011,0xF3B40024,0x1B80034,0xFBAC0001,0xEFAC0001,0xE9AC0001,0xE7AC0001,0xE3AC0001,0xFBAC0009,0xF9A80000,0xE5AC0009,0xDFA40002,0xBDF80033,0x1BC0008,0x1BC0008,0x1BC0008,0x1BC0008,0xE7B80000,0xE7B80000,0xE7B80000,0xE1B80001,0xE1B80001,0xDFB80001,0x9BFC0008,0x9BFC0008,0x9BFC0008,0xE1B40001,0xE1B40001, -0xDFB40001,0xCFF80008,0xCFF80008,0xDFAC0001,0xDC00000A,0x9BFC0008,0x9BFC0008,0x9BFC0008,0xE1B40001,0xE1B40001,0xDFB40001,0xCFF80008,0xCFF80008,0xDFAC0001,0xDC00000A,0xCFF80008,0xCFF80008,0xDFAC0001,0xDC00000A,0xDC00000A,0xFBB40001,0xF5B80004,0x1BC0008,0xF5B00000,0xEBB00000,0xE7B00000,0xE3B40001,0xE3B00000,0xFDB00001,0xF3AC0000,0xC1FC0008,0xDFAC0001, -0xC1FC0008,0x1C40014,0xF1C00000,0xE9C00000,0xE5C00001,0xABFC0012,0xE9B80001,0xE5BC0001,0xD7F80012,0xE5AC0001,0xE2000012,0xABFC0012,0xE9B80001,0xE5BC0001,0xD7F80012,0xE5AC0001,0xE2000012,0xD7F80012,0xE5AC0001,0xE2000012,0xE2000012,0xABFC0012,0xE9B80001,0xE5BC0001,0xD7F80012,0xE5AC0001,0xE2000012,0xD7F80012,0xE5AC0001,0xE2000012,0xE2000012,0xD7F80012, -0xE5AC0001,0xE2000012,0xE2000012,0xE2000012,0xFBBC0008,0x1E40012,0xFBC40008,0xFDAC0000,0xEFAC0001,0xEBA80000,0xE5B00001,0xE5A40001,0xF3BC0008,0xFDA40000,0xE7B00000,0xE2000012,0xCBFC0012,0x1B00014,0x1B00014,0x1B00014,0x1B00014,0x1B00014,0x1B00014,0x1B00014,0x1B00014,0x1B00014,0x1B00014,0xE7AC0000,0xE7AC0000,0xE7AC0000,0xE7AC0000,0xE7AC0000, -0xE7AC0000,0xDFAC0000,0xDFAC0000,0xDFAC0000,0xDBAC0001,0x8DFC0012,0x8DFC0012,0x8DFC0012,0x8DFC0012,0x8DFC0012,0x8DFC0012,0xDFA40001,0xDFA40001,0xDFA40001,0xDBA80001,0xC7FC0012,0xC7FC0012,0xC7FC0012,0xDB980001,0xD8000012,0xF1B00008,0x1B00014,0x1B00014,0xFFAC0000,0xF3AC0000,0xEBAC0000,0xEBAC0000,0xE5AC0000,0xF7AC0005,0xF9A80000,0xE1AC0001,0xDFA40001, -0xB9FC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table28[] = { -0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0xD1F80000, -0xD1F80000,0xD1F80000,0xD1F80000,0xDE000000,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1DC0000,0x1DC0000,0x1DC0000,0x9FFC0000,0xC3FC0000,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xB1FC0000, -0xB1FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xE4000000,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xE4000000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xE4000000,0xE4000000,0x1E80000,0x1C80001,0x1C80001,0x49FC0000,0x81FC0000,0x9FFC0000,0x9FFC0000,0xC1FC0000,0x49FC0000,0x81FC0000,0xCFFC0000,0xD9FC0000, -0xCFFC0000,0x1D00001,0x1D00001,0x1D00001,0x1D00001,0xBDFC0000,0xBDFC0000,0xBDFC0000,0xDFF80000,0xDFF80000,0xE8000000,0xBDFC0000,0xBDFC0000,0xBDFC0000,0xDFF80000,0xDFF80000,0xE8000000,0xDFF80000,0xDFF80000,0xE8000000,0xE8000000,0xBDFC0000,0xBDFC0000,0xBDFC0000,0xDFF80000,0xDFF80000,0xE8000000,0xDFF80000,0xDFF80000,0xE8000000,0xE8000000,0xDFF80000, -0xDFF80000,0xE8000000,0xE8000000,0xE8000000,0x67FC0000,0x1F00000,0x1D00001,0xAFFC0000,0xCBFC0000,0xD7FC0000,0xDBFC0000,0xE1FC0000,0x95FC0000,0xBDFC0000,0xD7FC0000,0xE8000000,0xD7FC0000,0x1DC0001,0xCFFC0000,0xE7FC0000,0xEE000000,0xCFFC0000,0xE7FC0000,0xEE000000,0xE7FC0000,0xEE000000,0xEE000000,0xCFFC0000,0xE7FC0000,0xEE000000,0xE7FC0000,0xEE000000, -0xEE000000,0xE7FC0000,0xEE000000,0xEE000000,0xEE000000,0xCFFC0000,0xE7FC0000,0xEE000000,0xE7FC0000,0xEE000000,0xEE000000,0xE7FC0000,0xEE000000,0xEE000000,0xEE000000,0xE7FC0000,0xEE000000,0xEE000000,0xEE000000,0xEE000000,0xB3FC0000,0x7FC0000,0x7FC0000,0xD9FC0000,0xE5FC0000,0xEBFC0000,0xEE000000,0xEE000000,0xC5FC0000,0xDDFC0000,0xEFD00000,0xEE000000, -0xE1FC0000,0x1CC008C,0xFFC80037,0xF5C80033,0xEFC80033,0xFFC40024,0xF5C40013,0xF1C4001A,0xF1C40024,0xEDC40016,0xEBC40026,0xFDC00034,0xF5C0000A,0xF1C0000F,0xF3BC0013,0xEFBC0002,0xEBC00016,0xEFC00034,0xEDBC000F,0xEBBC0019,0xE9BC0035,0xB7FC0088,0xF9B40033,0xEFBC0033,0xF3B40026,0xEFB80012,0xEBBC0026,0xF5A80033,0xEFAC0009,0xEBB00015,0xE9B40034,0xDDF40088, -0xEF980033,0xEBA00026,0xE9940034,0xE6000088,0xFDC8003F,0xFFCC0064,0xFFCC006C,0xFFC00007,0xF7C00002,0xF1C00002,0xEFC00002,0xEFBC0002,0xFFC40034,0xFFBC0005,0xEFBC0009,0xEBB00015,0xD3FC0088,0x1D40033,0xFDD0000A,0xF3CC000A,0xEFCC000A,0xF7CC0013,0xF3C80002,0xEFCC0001,0xEFCC0013,0xEFC80005,0xEBC80015,0xC1FC0033,0xF5C00009,0xEFC4000A,0xF3BC0012,0xEFBC0001, -0xEBC40014,0xE1F80033,0xEFAC0008,0xEBB00014,0xE8000034,0xC1FC0033,0xF5C00009,0xEFC4000A,0xF3BC0012,0xEFBC0001,0xEBC40014,0xE1F80033,0xEFAC0008,0xEBB00014,0xE8000034,0xE1F80033,0xEFAC0008,0xEBB00014,0xE8000034,0xE8000034,0xFDCC0014,0xF1D0002D,0xF3D4002A,0xFFC40003,0xF7C00001,0xF1C00001,0xEFC00001,0xEFBC0001,0xF7CC0019,0xFFC00002,0xEFBC0009,0xEBB00014, -0xD9FC0033,0x1C80033,0x1C80033,0x1C80033,0x1C80033,0xF7C40012,0xF7C40012,0xF7C40012,0xEDC40012,0xEDC40012,0xE9C40012,0xF5C00009,0xF5C00009,0xF5C00009,0xEDC00002,0xEDC00002,0xE9C00005,0xEBBC0009,0xEBBC0009,0xE9BC0001,0xE7BC0009,0xAFFC0033,0xAFFC0033,0xAFFC0033,0xEFB80012,0xEFB80012,0xE9BC0012,0xEDB40009,0xEDB40009,0xE9B40001,0xE7B40009,0xD7FC0033, -0xD7FC0033,0xE9A80012,0xE79C0008,0xE4000034,0xFBC40013,0xFDC80023,0x1C80033,0xFDC00003,0xF7C00001,0xF1C00001,0xEFC00002,0xEDBC0002,0xFDC00013,0xFFBC0001,0xEFBC0008,0xE9B40001,0xCDFC0033,0x1CC000A,0x1CC000A,0x1CC000A,0x1CC000A,0xEFCC0001,0xEFCC0001,0xEFCC0001,0xEBC80001,0xEBC80001,0xE9C80001,0xB7FC0008,0xB7FC0008,0xB7FC0008,0xEBC40001,0xEBC40001, -0xE9C40001,0xDDF40008,0xDDF40008,0xE9B80000,0xE6000008,0xB7FC0008,0xB7FC0008,0xB7FC0008,0xEBC40001,0xEBC40001,0xE9C40001,0xDDF40008,0xDDF40008,0xE9B80000,0xE6000008,0xDDF40008,0xDDF40008,0xE9B80000,0xE6000008,0xE6000008,0xFDC80002,0xFFCC0002,0x1CC000A,0xF5C80001,0xF3C40000,0xEFC40000,0xEDC40000,0xEBC40001,0xFFC40002,0xFBC00000,0xD3FC0008,0xE9B80000, -0xD3FC0008,0x1D80012,0xF9D40001,0xF1D40001,0xEFD00001,0xC7FC0012,0xF3C80001,0xEFCC0000,0xE3FC0012,0xEFB80000,0xEA000014,0xC7FC0012,0xF3C80001,0xEFCC0000,0xE3FC0012,0xEFB80000,0xEA000014,0xE3FC0012,0xEFB80000,0xEA000014,0xEA000014,0xC7FC0012,0xF3C80001,0xEFCC0000,0xE3FC0012,0xEFB80000,0xEA000014,0xE3FC0012,0xEFB80000,0xEA000014,0xEA000014,0xE3FC0012, -0xEFB80000,0xEA000014,0xEA000014,0xEA000014,0xFFD00008,0x1F80012,0xF5D8000D,0xFFC40002,0xF7C00001,0xF3BC0000,0xEFC00000,0xEFB00000,0xF9D00008,0xFFC00001,0xEFC40001,0xEA000014,0xDDF80012,0x1C40012,0x1C40012,0x1C40012,0x1C40012,0x1C40012,0x1C40012,0x1C40012,0x1C40012,0x1C40012,0x1C40012,0xEFC00001,0xEFC00001,0xEFC00001,0xEFC00001,0xEFC00001, -0xEFC00001,0xE7C00001,0xE7C00001,0xE7C00001,0xE5BC0001,0xA9FC0012,0xA9FC0012,0xA9FC0012,0xA9FC0012,0xA9FC0012,0xA9FC0012,0xE9B40001,0xE9B40001,0xE9B40001,0xE5B80000,0xD5F80012,0xD5F80012,0xD5F80012,0xE5A40000,0xE0000014,0xF9C0000A,0x1C40012,0x1C40012,0xF9C00002,0xF7C00001,0xF1C00001,0xF1C00001,0xEDC00001,0xFFBC0005,0xFFBC0001,0xEBBC0000,0xE9B40001, -0xC9FC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table29[] = { -0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xDDF40000, -0xDDF40000,0xDDF40000,0xDDF40000,0xE6000000,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1EC0000,0x1EC0000,0x1EC0000,0xB7FC0000,0xD3FC0000,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xC9FC0000, -0xC9FC0000,0xE5F80000,0xE5F80000,0xE5F80000,0xEC000000,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xE5F80000,0xE5F80000,0xE5F80000,0xEC000000,0xE5F80000,0xE5F80000,0xE5F80000,0xEC000000,0xEC000000,0x5F80000,0x1D80001,0x1D80001,0x83FC0000,0xA9FC0000,0xBDFC0000,0xBDFC0000,0xD3FC0000,0x83FC0000,0xA9FC0000,0xDFF80000,0xE5F80000, -0xDFF80000,0x1E00001,0x1E00001,0x1E00001,0x1E00001,0xD5FC0000,0xD5FC0000,0xD5FC0000,0xEBF80000,0xEBF80000,0xF0000000,0xD5FC0000,0xD5FC0000,0xD5FC0000,0xEBF80000,0xEBF80000,0xF0000000,0xEBF80000,0xEBF80000,0xF0000000,0xF0000000,0xD5FC0000,0xD5FC0000,0xD5FC0000,0xEBF80000,0xEBF80000,0xF0000000,0xEBF80000,0xEBF80000,0xF0000000,0xF0000000,0xEBF80000, -0xEBF80000,0xF0000000,0xF0000000,0xF0000000,0x9FFC0000,0x27FC0000,0x1E00001,0xCDFC0000,0xDDFC0000,0xE5FC0000,0xE9F80000,0xEDF80000,0xBDFC0000,0xD5FC0000,0xE5FC0000,0xF0000000,0xE5FC0000,0x1EC0001,0xE9FC0000,0xF3FC0000,0xF6000000,0xE9FC0000,0xF3FC0000,0xF6000000,0xF3FC0000,0xF6000000,0xF6000000,0xE9FC0000,0xF3FC0000,0xF6000000,0xF3FC0000,0xF6000000, -0xF6000000,0xF3FC0000,0xF6000000,0xF6000000,0xF6000000,0xE9FC0000,0xF3FC0000,0xF6000000,0xF3FC0000,0xF6000000,0xF6000000,0xF3FC0000,0xF6000000,0xF6000000,0xF6000000,0xF3FC0000,0xF6000000,0xF6000000,0xF6000000,0xF6000000,0xDBFC0000,0x87FC0000,0x87FC0000,0xEDFC0000,0xF3F80000,0xF5FC0000,0xF6000000,0xF6000000,0xE3FC0000,0xEFFC0000,0xF7E00000,0xF6000000, -0xF1FC0000,0x1DC008C,0xFFDC0044,0xFDD80033,0xF7D80033,0xFDD80034,0xFDD40013,0xF9D4001A,0xF9D40024,0xF5D40016,0xF3D40026,0xFFD0003C,0xFDD0000A,0xF9D0000F,0xFBCC0013,0xF7CC0002,0xF3D00016,0xF7D00034,0xF5CC000F,0xF3CC0019,0xF1CC0035,0xCFFC0088,0xFFC80035,0xF7CC0033,0xFBC40026,0xF7C80012,0xF3CC0026,0xFDB80033,0xF7BC0009,0xF3C00015,0xF1C40034,0xE7FC0088, -0xF7A80033,0xF3B00026,0xF1A40034,0xEE000088,0xFFD8004E,0xF7DC0076,0xF7DC007B,0xFFD4001A,0xFFD00002,0xF9D00002,0xF7D00002,0xF7CC0002,0xFFD4004A,0xFFD00015,0xF7CC0009,0xF3C00015,0xE1FC0088,0x1E40033,0xFFE0000E,0xFBDC000A,0xF7DC000A,0xFFDC0013,0xFBD80002,0xF7DC0001,0xF7DC0013,0xF7D80005,0xF3D80015,0xD9FC0033,0xFDD00009,0xF7D4000A,0xFBCC0012,0xF7CC0001, -0xF3D40014,0xEDF80033,0xF7BC0008,0xF3C00014,0xF0000034,0xD9FC0033,0xFDD00009,0xF7D4000A,0xFBCC0012,0xF7CC0001,0xF3D40014,0xEDF80033,0xF7BC0008,0xF3C00014,0xF0000034,0xEDF80033,0xF7BC0008,0xF3C00014,0xF0000034,0xF0000034,0xFDE00021,0xF9E0002D,0xFBE4002A,0xFFD8000D,0xFFD00001,0xF9D00001,0xF7D00001,0xF7CC0001,0xFFDC0019,0xFFD4000A,0xF7CC0009,0xF3C00014, -0xE7FC0033,0x1D80033,0x1D80033,0x1D80033,0x1D80033,0xFFD40012,0xFFD40012,0xFFD40012,0xF5D40012,0xF5D40012,0xF1D40012,0xFDD00009,0xFDD00009,0xFDD00009,0xF5D00002,0xF5D00002,0xF1D00005,0xF3CC0009,0xF3CC0009,0xF1CC0001,0xEFCC0009,0xC7FC0033,0xC7FC0033,0xC7FC0033,0xF7C80012,0xF7C80012,0xF1CC0012,0xF5C40009,0xF5C40009,0xF1C40001,0xEFC40009,0xE3FC0033, -0xE3FC0033,0xF1B80012,0xEFAC0008,0xEC000034,0xFDD4001D,0xF5D8002A,0x1D80033,0xFFD4000A,0xFFD00001,0xF9D00001,0xF7D00002,0xF5CC0002,0xFFD00018,0xFFD00005,0xF7CC0008,0xF1C40001,0xDDF80033,0x1DC000A,0x1DC000A,0x1DC000A,0x1DC000A,0xF7DC0001,0xF7DC0001,0xF7DC0001,0xF3D80001,0xF3D80001,0xF1D80001,0xCFFC0008,0xCFFC0008,0xCFFC0008,0xF3D40001,0xF3D40001, -0xF1D40001,0xE7FC0008,0xE7FC0008,0xF1C80000,0xEE000008,0xCFFC0008,0xCFFC0008,0xCFFC0008,0xF3D40001,0xF3D40001,0xF1D40001,0xE7FC0008,0xE7FC0008,0xF1C80000,0xEE000008,0xE7FC0008,0xE7FC0008,0xF1C80000,0xEE000008,0xEE000008,0xFFD80004,0xF7DC0005,0x1DC000A,0xFDD80001,0xFBD40000,0xF7D40000,0xF5D40000,0xF3D40001,0xF1DC0005,0xFBD40001,0xE1FC0008,0xF1C80000, -0xE1FC0008,0x1E80012,0xFDE40002,0xF9E40001,0xF7E00001,0xDFFC0012,0xFBD80001,0xF7DC0000,0xEFFC0012,0xF7C80000,0xF2000014,0xDFFC0012,0xFBD80001,0xF7DC0000,0xEFFC0012,0xF7C80000,0xF2000014,0xEFFC0012,0xF7C80000,0xF2000014,0xF2000014,0xDFFC0012,0xFBD80001,0xF7DC0000,0xEFFC0012,0xF7C80000,0xF2000014,0xEFFC0012,0xF7C80000,0xF2000014,0xF2000014,0xEFFC0012, -0xF7C80000,0xF2000014,0xF2000014,0xF2000014,0xF7E8000D,0x57FC0012,0xFDE8000D,0xFDE00008,0xFFD00001,0xFBCC0000,0xF7D00000,0xF7C00000,0xF9E4000D,0xFFD80005,0xF7D40001,0xF2000014,0xEBFC0012,0x1D40012,0x1D40012,0x1D40012,0x1D40012,0x1D40012,0x1D40012,0x1D40012,0x1D40012,0x1D40012,0x1D40012,0xF7D00001,0xF7D00001,0xF7D00001,0xF7D00001,0xF7D00001, -0xF7D00001,0xEFD00001,0xEFD00001,0xEFD00001,0xEDCC0001,0xC1FC0012,0xC1FC0012,0xC1FC0012,0xC1FC0012,0xC1FC0012,0xC1FC0012,0xF1C40001,0xF1C40001,0xF1C40001,0xEDC80000,0xE1F80012,0xE1F80012,0xE1F80012,0xEDB40000,0xE8000014,0xF3D4000D,0x1D40012,0x1D40012,0xFBD00005,0xFFD00001,0xF9D00001,0xF9D00001,0xF5D00001,0xFBD00008,0xFDCC0004,0xF3CC0000,0xF1C40001, -0xD9FC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table30[] = { -0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xE7FC0000, -0xE7FC0000,0xE7FC0000,0xE7FC0000,0xEE000000,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0x7FC0000,0x7FC0000,0x7FC0000,0xCFFC0000,0xE1FC0000,0x1E80001,0x1E80001,0x1E80001,0x1E80001,0x1E80001,0x1E80001,0x1E80001,0x1E80001,0x1E80001,0x1E80001,0xE3FC0000,0xE3FC0000,0xE3FC0000,0xE3FC0000,0xE3FC0000, -0xE3FC0000,0xF1F80000,0xF1F80000,0xF1F80000,0xF4000000,0xE3FC0000,0xE3FC0000,0xE3FC0000,0xE3FC0000,0xE3FC0000,0xE3FC0000,0xF1F80000,0xF1F80000,0xF1F80000,0xF4000000,0xF1F80000,0xF1F80000,0xF1F80000,0xF4000000,0xF4000000,0x67FC0000,0x1E80001,0x1E80001,0xBBFC0000,0xD1FC0000,0xDBFC0000,0xDBFC0000,0xE7FC0000,0xBBFC0000,0xD1FC0000,0xEDFC0000,0xF1F80000, -0xEDFC0000,0x1F00001,0x1F00001,0x1F00001,0x1F00001,0xEFFC0000,0xEFFC0000,0xEFFC0000,0xF7F80000,0xF7F80000,0xF8000000,0xEFFC0000,0xEFFC0000,0xEFFC0000,0xF7F80000,0xF7F80000,0xF8000000,0xF7F80000,0xF7F80000,0xF8000000,0xF8000000,0xEFFC0000,0xEFFC0000,0xEFFC0000,0xF7F80000,0xF7F80000,0xF8000000,0xF7F80000,0xF7F80000,0xF8000000,0xF8000000,0xF7F80000, -0xF7F80000,0xF8000000,0xF8000000,0xF8000000,0xD7FC0000,0xA7FC0000,0x1F00001,0xEBFC0000,0xF1FC0000,0xF5FC0000,0xF5FC0000,0xF7FC0000,0xE3FC0000,0xEFFC0000,0xF5FC0000,0xF8000000,0xF5FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1EC008C,0xFFEC005F,0xFFE80043,0xFFE80033,0xFFE8004C,0xFFE4002B,0xFFE4001B,0xFFE40026,0xFDE40016,0xFBE40026,0xFFE80054,0xFFE40023,0xFFE00011,0xFFE0001B,0xFFDC0002,0xFBE00016,0xFFE00034,0xFDDC000F,0xFBDC0019,0xF9DC0035,0xE9FC0088,0xFFE4004B,0xFFDC0033,0xFFD80036,0xFFD80012,0xFBDC0026,0xFFD8003D,0xFFCC0009,0xFBD00015,0xF9D40034,0xF3FC0088, -0xFFB80033,0xFBC00026,0xF9B40034,0xF6000088,0xFFEC006A,0xFFEC0076,0xFFEC007B,0xFFE8004F,0xFFE40026,0xFFE0000C,0xFFE00002,0xFFDC0002,0xFDEC006A,0xFFE4004A,0xFFDC0009,0xFBD00015,0xF1FC0088,0x1F40033,0xFDF00023,0xFFF0000E,0xFFEC000A,0xFDF00023,0xFFEC000A,0xFFEC0001,0xFFEC0013,0xFFE80005,0xFBE80015,0xF1FC0033,0xFFEC0019,0xFFE4000A,0xFFE40015,0xFFDC0001, -0xFBE40014,0xF9F80033,0xFFCC0008,0xFBD00014,0xF8000034,0xF1FC0033,0xFFEC0019,0xFFE4000A,0xFFE40015,0xFFDC0001,0xFBE40014,0xF9F80033,0xFFCC0008,0xFBD00014,0xF8000034,0xF9F80033,0xFFCC0008,0xFBD00014,0xF8000034,0xF8000034,0xFDF4002A,0xF3F40033,0xF3F40033,0xFFEC001E,0xFFE80012,0xFFE40009,0xFFE00001,0xFFDC0001,0xFFF00029,0xFFF00021,0xFFDC0009,0xFBD00014, -0xF7FC0033,0x1E80033,0x1E80033,0x1E80033,0x1E80033,0xFDE4001B,0xFDE4001B,0xFDE4001B,0xFDE40012,0xFDE40012,0xF9E40012,0xFFE00011,0xFFE00011,0xFFE00011,0xFDE00002,0xFDE00002,0xF9E00005,0xFBDC0009,0xFBDC0009,0xF9DC0001,0xF7DC0009,0xDFFC0033,0xDFFC0033,0xDFFC0033,0xFFD80012,0xFFD80012,0xF9DC0012,0xFDD40009,0xFDD40009,0xF9D40001,0xF7D40009,0xEFFC0033, -0xEFFC0033,0xF9C80012,0xF7BC0008,0xF4000034,0xFFE80022,0xFDE8002A,0x1E80033,0xFDE4001A,0xFFE4000D,0xFFE00003,0xFFE00002,0xFDDC0002,0xFFE40021,0xFFE00018,0xFFDC0008,0xF9D40001,0xEBFC0033,0x1EC000A,0x1EC000A,0x1EC000A,0x1EC000A,0xFFEC0001,0xFFEC0001,0xFFEC0001,0xFBE80001,0xFBE80001,0xF9E80001,0xE9FC0008,0xE9FC0008,0xE9FC0008,0xFBE40001,0xFBE40001, -0xF9E40001,0xF3FC0008,0xF3FC0008,0xF9D80000,0xF6000008,0xE9FC0008,0xE9FC0008,0xE9FC0008,0xFBE40001,0xFBE40001,0xF9E40001,0xF3FC0008,0xF3FC0008,0xF9D80000,0xF6000008,0xF3FC0008,0xF3FC0008,0xF9D80000,0xF6000008,0xF6000008,0xFBEC0005,0xFFEC0005,0x1EC000A,0xF9EC0005,0xFDE80002,0xFFE40000,0xFDE40000,0xFBE40001,0xF9EC0005,0xFFE80002,0xF1FC0008,0xF9D80000, -0xF1FC0008,0x1F80012,0xFFF4000A,0xFFF00005,0xFFF00001,0xF7FC0012,0xFFF00005,0xFFEC0000,0xFBFC0012,0xFFD80000,0xFA000014,0xF7FC0012,0xFFF00005,0xFFEC0000,0xFBFC0012,0xFFD80000,0xFA000014,0xFBFC0012,0xFFD80000,0xFA000014,0xFA000014,0xF7FC0012,0xFFF00005,0xFFEC0000,0xFBFC0012,0xFFD80000,0xFA000014,0xFBFC0012,0xFFD80000,0xFA000014,0xFA000014,0xFBFC0012, -0xFFD80000,0xFA000014,0xFA000014,0xFA000014,0xFFF8000D,0xD7FC0012,0xF5F80012,0xFFF4000D,0xFDF4000D,0xFFE80005,0xFFE00000,0xFFD00000,0xF5FC0012,0xFFF00011,0xFFE40001,0xFA000014,0xFBFC0012,0x1E40012,0x1E40012,0x1E40012,0x1E40012,0x1E40012,0x1E40012,0x1E40012,0x1E40012,0x1E40012,0x1E40012,0xFFE00001,0xFFE00001,0xFFE00001,0xFFE00001,0xFFE00001, -0xFFE00001,0xF7E00001,0xF7E00001,0xF7E00001,0xF5DC0001,0xD9FC0012,0xD9FC0012,0xD9FC0012,0xD9FC0012,0xD9FC0012,0xD9FC0012,0xF9D40001,0xF9D40001,0xF9D40001,0xF5D80000,0xEDF80012,0xEDF80012,0xEDF80012,0xF5C40000,0xF0000014,0xFBE4000D,0x1E40012,0x1E40012,0xFBE0000A,0xFDE00005,0xFFE00002,0xFFE00002,0xFDE00001,0xF7E4000D,0xFDE00008,0xFBDC0000,0xF9D40001, -0xE7FC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table31[] = { -0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xF3FC0000, -0xF3FC0000,0xF3FC0000,0xF3FC0000,0xF6000000,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0x87FC0000,0x87FC0000,0x87FC0000,0xE9FC0000,0xF1FC0000,0x1F80001,0x1F80001,0x1F80001,0x1F80001,0x1F80001,0x1F80001,0x1F80001,0x1F80001,0x1F80001,0x1F80001,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xFBFC0000, -0xFBFC0000,0xFDF80000,0xFDF80000,0xFDF80000,0xFC000000,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xFDF80000,0xFDF80000,0xFDF80000,0xFC000000,0xFDF80000,0xFDF80000,0xFDF80000,0xFC000000,0xFC000000,0xE7FC0000,0x1F80001,0x1F80001,0xF5FC0000,0xF7FC0000,0xF9FC0000,0xF9FC0000,0xFBFC0000,0xF5FC0000,0xF7FC0000,0xFDF80000,0xFDF80000, -0xFDF80000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1F8002C,0xFFF80027,0xFFF80024,0xFFF80023,0xFFF80022,0xFFF4001F,0xFFF4001B,0xFFF4001A,0xFFF40016,0xFFF40012,0xFFF4001C,0xFFF40017,0xFFF40013,0xFFF00012,0xFFF0000E,0xFFF0000A,0xFFF00009,0xFFF00005,0xFFF00001,0xFFEC0005,0xF7FC002C,0xFDF80027,0xFFF40023,0xFFF0001A,0xFFF00016,0xFFF00012,0xFFF00011,0xFFF0000D,0xFFE40005,0xFFE40005,0xFBFC002C, -0xFFEC0023,0xFFE00012,0xFFCC0004,0xFC00002C,0xFFF80027,0xF5F8002C,0xF5F8002C,0xFFF80022,0xFFF4001A,0xFFF40012,0xFFF40011,0xFFF00009,0xFFF80022,0xFFF4001F,0xFFF0000B,0xFFE40005,0xFBFC002C,0x1FC0003,0xFDFC0003,0xFFFC0002,0xFFFC0002,0xFDFC0003,0xFFFC0002,0xFFFC0002,0xFFFC0001,0xFFFC0001,0xFFF80001,0xFDFC0003,0xFFFC0002,0xFFFC0002,0xFFFC0001,0xFFF80001, -0xFFF80000,0xFFF80003,0xFFF80002,0xFFF00000,0xFE000004,0xFDFC0003,0xFFFC0002,0xFFFC0002,0xFFFC0001,0xFFF80001,0xFFF80000,0xFFF80003,0xFFF80002,0xFFF00000,0xFE000004,0xFFF80003,0xFFF80002,0xFFF00000,0xFE000004,0xFE000004,0xFDFC0003,0xF7FC0003,0xF7FC0003,0xFDFC0003,0xFFFC0002,0xFFFC0001,0xFFF80001,0xFFF80001,0xFDFC0003,0xFDFC0003,0xFFF80002,0xFFF00000, -0xFFF80003,0x1F80023,0x1F80023,0x1F80023,0x1F80023,0xFFF4001B,0xFFF4001B,0xFFF4001B,0xFFF40016,0xFFF40016,0xFFF40012,0xFFF40013,0xFFF40013,0xFFF40013,0xFFF0000E,0xFFF0000E,0xFFF0000A,0xFFF00005,0xFFF00005,0xFFF00001,0xFDEC0005,0xF7FC0023,0xF7FC0023,0xF7FC0023,0xFFF00016,0xFFF00016,0xFFF00012,0xFFF0000D,0xFFF0000D,0xFFE40005,0xFDE80004,0xFBFC0023, -0xFBFC0023,0xFFE00012,0xFDD40004,0xFA000024,0xFFF40022,0xF5F80023,0x1F80023,0xFFF4001D,0xFFF40016,0xFFF40011,0xFFF40011,0xFFF00009,0xFFF4001D,0xFFF40016,0xFFF0000B,0xFFE40005,0xFBFC0023,0x1FC0002,0x1FC0002,0x1FC0002,0x1FC0002,0xFDFC0002,0xFDFC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFF80001,0xFDFC0002,0xFDFC0002,0xFDFC0002,0xFFF80001,0xFFF80001, -0xFFF80000,0xFFF80002,0xFFF80002,0xFFF00000,0xFC000004,0xFDFC0002,0xFDFC0002,0xFDFC0002,0xFFF80001,0xFFF80001,0xFFF80000,0xFFF80002,0xFFF80002,0xFFF00000,0xFC000004,0xFFF80002,0xFFF80002,0xFFF00000,0xFC000004,0xFC000004,0xFBFC0002,0xF7FC0002,0x1FC0002,0xFDFC0002,0xFDFC0002,0xFFF80001,0xFFF80001,0xFFF80001,0xFDFC0002,0xFDFC0002,0xFFF80002,0xFFF00000, -0xFFF80002,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1F40012,0x1F40012,0x1F40012,0x1F40012,0x1F40012,0x1F40012,0x1F40012,0x1F40012,0x1F40012,0x1F40012,0xFDF0000A,0xFDF0000A,0xFDF0000A,0xFDF0000A,0xFDF0000A, -0xFDF0000A,0xFFF00001,0xFFF00001,0xFFF00001,0xFDEC0001,0xF1FC0012,0xF1FC0012,0xF1FC0012,0xF1FC0012,0xF1FC0012,0xF1FC0012,0xFDEC0005,0xFDEC0005,0xFDEC0005,0xFDE80000,0xF9F80012,0xF9F80012,0xF9F80012,0xFDD40000,0xF8000014,0xF3F40012,0x1F40012,0x1F40012,0xFFF4000D,0xFDF4000D,0xFFF0000A,0xFFF0000A,0xFFF00005,0xFFF4000D,0xFDF4000D,0xFFF00002,0xFDEC0005, -0xF7FC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table32[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x80001,0x80001,0x80001,0x80001,0x20C0000,0x20C0000,0x20C0000,0x180000,0x180000,0x4000000,0x20C0000,0x20C0000,0x20C0000,0x180000,0x180000,0x4000000,0x180000,0x180000,0x4000000,0x4000000,0x20C0000,0x20C0000,0x20C0000,0x180000,0x180000,0x4000000,0x180000,0x180000,0x4000000,0x4000000,0x180000, -0x180000,0x4000000,0x4000000,0x4000000,0xC0000,0xC080000,0x80001,0xC0000,0x100000,0x140000,0x140000,0x200000,0xC0000,0x20C0000,0x140000,0x4000000,0x140000,0x200001,0x2300000,0x640000,0x10000000,0x2300000,0x640000,0x10000000,0x640000,0x10000000,0x10000000,0x2300000,0x640000,0x10000000,0x640000,0x10000000, -0x10000000,0x640000,0x10000000,0x10000000,0x10000000,0x2300000,0x640000,0x10000000,0x640000,0x10000000,0x10000000,0x640000,0x10000000,0x10000000,0x10000000,0x640000,0x10000000,0x10000000,0x10000000,0x10000000,0x2280000,0x240000,0x240000,0x380000,0x500000,0x9C0000,0x10000000,0x10000000,0x22C0000,0x400000,0x1F40000,0x10000000, -0x480000,0xC00C2,0x2E040011,0x18040011,0x10040011,0x20000048,0x18000009,0x10000001,0x10000048,0xE00001D,0xA000048,0x14000099,0x12000035,0xE00001D,0xC000060,0xC000030,0xA000058,0xA000099,0xA000059,0x8000074,0x6000099,0x1000C2,0x12000059,0xC000031,0xC000070,0xC000040,0x8000062,0x80000A7,0xA000069,0x800007D,0x600009D,0x1800C2, -0x8000089,0x8000098,0x40000B2,0x40000C2,0x42000029,0xA8000048,0xEA040011,0x1E000029,0x16000032,0xE000032,0xC000022,0xA00003D,0x2C000056,0x1A00003D,0xC00004F,0x800007D,0x1400C2,0x10009A,0x2E04000D,0x1804000D,0x1004000D,0x20000048,0x18000009,0x10000001,0x10000048,0xE00001D,0xA000048,0x140099,0x12000035,0xE00001D,0xC000060,0xC000030, -0xA000058,0x240099,0xA000059,0x8000074,0x6000099,0x140099,0x12000035,0xE00001D,0xC000060,0xC000030,0xA000058,0x240099,0xA000059,0x8000074,0x6000099,0x240099,0xA000059,0x8000074,0x6000099,0x6000099,0x42000029,0xA8000048,0xEA04000D,0x1E000029,0x16000032,0xE000032,0xC000022,0xA00003D,0x2C00004D,0x1A000039,0xC00004E,0x8000074, -0x1C0099,0x40011,0x40011,0x40011,0x40011,0xE000000,0xE000000,0xE000000,0x6000000,0x6000000,0x4000000,0x400000D,0x400000D,0x400000D,0x6000004,0x6000004,0x4000004,0x200000D,0x200000D,0x2000008,0x200000D,0x40011,0x40011,0x40011,0x6000008,0x6000008,0x2000006,0x200000E,0x200000E,0x2000009,0x200000E,0x80011, -0x80011,0x200000C,0x2000011,0x12,0x20000004,0x48000000,0x40011,0x10000004,0x6000005,0x8000004,0x8000004,0x4000005,0xC000009,0xA000006,0x200000D,0x2000009,0x80011,0x4000D,0x4000D,0x4000D,0x4000D,0xE000000,0xE000000,0xE000000,0x6000000,0x6000000,0x4000000,0x4000D,0x4000D,0x4000D,0x6000004,0x6000004, -0x4000004,0x8000D,0x8000D,0x2000008,0x200000D,0x4000D,0x4000D,0x4000D,0x6000004,0x6000004,0x4000004,0x8000D,0x8000D,0x2000008,0x200000D,0x8000D,0x8000D,0x2000008,0x200000D,0x200000D,0x20000004,0x48000000,0x4000D,0x10000004,0x6000005,0x8000004,0x8000004,0x4000005,0x4040008,0xA000005,0x8000D,0x2000008, -0x8000D,0x14004A,0x260C0001,0x16080001,0x10080001,0x200048,0x18000009,0x10000001,0x3C0048,0xE00001D,0xA000048,0x200048,0x18000009,0x10000001,0x3C0048,0xE00001D,0xA000048,0x3C0048,0xE00001D,0xA000048,0xA000048,0x200048,0x18000009,0x10000001,0x3C0048,0xE00001D,0xA000048,0x3C0048,0xE00001D,0xA000048,0xA000048,0x3C0048, -0xE00001D,0xA000048,0xA000048,0xA000048,0x42000019,0x180048,0xAE0C0001,0x1E000019,0x16000019,0x10000019,0xE000014,0xE000028,0x32000022,0x1E00001D,0x10000011,0xA000048,0x2C0048,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table33[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x40001,0x40001,0x40001,0x40001,0x40001,0x40001,0x40001,0x40001,0x40001,0x40001,0x80000,0x80000,0x80000,0x80000,0x80000, -0x80000,0xC0000,0xC0000,0xC0000,0x2000000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0xC0000,0xC0000,0xC0000,0x2000000,0xC0000,0xC0000,0xC0000,0x2000000,0x2000000,0xA040000,0x40001,0x40001,0x6040000,0x80000,0x80000,0x80000,0x80000,0x6040000,0x80000,0xC0000,0xC0000, -0xC0000,0x180001,0x180001,0x180001,0x180001,0x2240000,0x2240000,0x2240000,0x4C0000,0x4C0000,0xC000000,0x2240000,0x2240000,0x2240000,0x4C0000,0x4C0000,0xC000000,0x4C0000,0x4C0000,0xC000000,0xC000000,0x2240000,0x2240000,0x2240000,0x4C0000,0x4C0000,0xC000000,0x4C0000,0x4C0000,0xC000000,0xC000000,0x4C0000, -0x4C0000,0xC000000,0xC000000,0xC000000,0x41C0000,0x1C0000,0x180001,0x240000,0x2C0000,0x340000,0x3C0000,0x5C0000,0x200000,0x2240000,0x340000,0xC000000,0x340000,0x300001,0x480000,0x940000,0x18000000,0x480000,0x940000,0x18000000,0x940000,0x18000000,0x18000000,0x480000,0x940000,0x18000000,0x940000,0x18000000, -0x18000000,0x940000,0x18000000,0x18000000,0x18000000,0x480000,0x940000,0x18000000,0x940000,0x18000000,0x18000000,0x940000,0x18000000,0x18000000,0x18000000,0x940000,0x18000000,0x18000000,0x18000000,0x18000000,0x23C0000,0x2340000,0x2340000,0x540000,0x780000,0xEC0000,0x18000000,0x18000000,0x440000,0x5C0000,0xBC80000,0x18000000, -0x680000,0x14017F,0x3E0C005E,0x220C005E,0x180C005E,0x3404004D,0x22040006,0x1804000E,0x1A04004D,0x16000011,0x1204004D,0x2A0000F3,0x22000045,0x18000032,0x18000069,0x14000021,0x12000051,0x140000F3,0x10000084,0x10000090,0xC0000F4,0x1C017F,0x1E0000AE,0x18000072,0x180000A9,0x12000051,0x12000075,0x12000118,0x100000A8,0xE0000B2,0xC000104,0x38017F, -0x100000FD,0xE0000FD,0xA000139,0xA000181,0x6400001A,0xFA04004F,0xFE0C0067,0x3400001A,0x22000021,0x1A00001A,0x1600000E,0x14000038,0x42000066,0x2C000041,0x1600005C,0xE0000B2,0x28017F,0x1C00F3,0x3A100032,0x20100032,0x18100032,0x30080049,0x22040002,0x18080005,0x1A040049,0x1604000E,0x12040049,0x2800F3,0x22000045,0x18000032,0x18000069,0x14000021, -0x12000051,0x5000F3,0x10000084,0x10000090,0xC0000F4,0x2800F3,0x22000045,0x18000032,0x18000069,0x14000021,0x12000051,0x5000F3,0x10000084,0x10000090,0xC0000F4,0x5000F3,0x10000084,0x10000090,0xC0000F4,0xC0000F4,0x6400001A,0xEC080049,0xF0100036,0x3400001A,0x22000021,0x1A00001A,0x1600000E,0x14000038,0x50000052,0x2C000038,0x1600005B,0x10000090, -0x3800F3,0xC005E,0xC005E,0xC005E,0xC005E,0x22040005,0x22040005,0x22040005,0x12040005,0x12040005,0xC040005,0x16000032,0x16000032,0x16000032,0x12000009,0x12000009,0xC000001,0xA000034,0xA000034,0xA000014,0x6000034,0x20C005D,0x20C005D,0x20C005D,0xC000021,0xC000021,0xC000011,0x8000043,0x8000043,0x8000022,0x6000038,0x18005D, -0x18005D,0x800003D,0x400004D,0x400005D,0x52000005,0xAA040005,0xC005E,0x2C00000A,0x1A000008,0x1600000A,0x12000008,0xC00000D,0x3400001D,0x1C000016,0xE000033,0x8000022,0x14005D,0x100032,0x100032,0x100032,0x100032,0x1E080001,0x1E080001,0x1E080001,0x10080001,0x10080001,0xC040001,0x180032,0x180032,0x180032,0x12000009,0x12000009, -0xC000001,0x2C0032,0x2C0032,0xA000014,0x6000034,0x180032,0x180032,0x180032,0x12000009,0x12000009,0xC000001,0x2C0032,0x2C0032,0xA000014,0x6000034,0x2C0032,0x2C0032,0xA000014,0x6000034,0x6000034,0x52000005,0x8C080001,0x100032,0x2C00000A,0x1A000008,0x1600000A,0x12000008,0xC00000D,0x34000014,0x1C000012,0x200032,0xA000014, -0x200032,0x24004A,0x2E1C0001,0x1E180001,0x18180001,0x380048,0x22040001,0x180C0001,0x700048,0x16000008,0x12000048,0x380048,0x22040001,0x180C0001,0x700048,0x16000008,0x12000048,0x700048,0x16000008,0x12000048,0x12000048,0x380048,0x22040001,0x180C0001,0x700048,0x16000008,0x12000048,0x700048,0x16000008,0x12000048,0x12000048,0x700048, -0x16000008,0x12000048,0x12000048,0x12000048,0x7400000A,0x280048,0xB61C0001,0x38000008,0x2204000D,0x1C000008,0x18000004,0x14000014,0x50000012,0x2E00000D,0x1A000001,0x12000048,0x500048,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x8000000,0x8000000,0x8000000,0x8000000,0x8000000, -0x8000000,0x4000000,0x4000000,0x4000000,0x2000000,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x2000002,0x2000002,0x2000002,0x2000001,0x5,0x5,0x5,0x2000004,0x5,0x28000000,0x40005,0x40005,0x12000000,0xC000000,0xA000000,0xA000000,0x6000000,0x12000001,0xC000001,0x4000000,0x2000002, -0x5,}; -static const uint32_t g_etc1_to_bc7_m6_table34[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x140001,0x140001,0x140001,0x140001,0x140001,0x140001,0x140001,0x140001,0x140001,0x140001,0x200000,0x200000,0x200000,0x200000,0x200000, -0x200000,0x3C0000,0x3C0000,0x3C0000,0xA000000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x3C0000,0x3C0000,0x3C0000,0xA000000,0x3C0000,0x3C0000,0x3C0000,0xA000000,0xA000000,0x180000,0x140001,0x140001,0x2180000,0x1C0000,0x1C0000,0x1C0000,0x240000,0x2180000,0x1C0000,0x2C0000,0x3C0000, -0x2C0000,0x280001,0x280001,0x280001,0x280001,0x23C0000,0x23C0000,0x23C0000,0x7C0000,0x7C0000,0x14000000,0x23C0000,0x23C0000,0x23C0000,0x7C0000,0x7C0000,0x14000000,0x7C0000,0x7C0000,0x14000000,0x14000000,0x23C0000,0x23C0000,0x23C0000,0x7C0000,0x7C0000,0x14000000,0x7C0000,0x7C0000,0x14000000,0x14000000,0x7C0000, -0x7C0000,0x14000000,0x14000000,0x14000000,0x300000,0x2C0000,0x280001,0x380000,0x440000,0x580000,0x640000,0x980000,0x340000,0x23C0000,0x580000,0x14000000,0x580000,0x400001,0x600000,0xC40000,0x20000000,0x600000,0xC40000,0x20000000,0xC40000,0x20000000,0x20000000,0x600000,0xC40000,0x20000000,0xC40000,0x20000000, -0x20000000,0xC40000,0x20000000,0x20000000,0x20000000,0x600000,0xC40000,0x20000000,0xC40000,0x20000000,0x20000000,0xC40000,0x20000000,0x20000000,0x20000000,0xC40000,0x20000000,0x20000000,0x20000000,0x20000000,0x4500000,0xA440000,0xA440000,0x26C0000,0xA00000,0x13C0000,0x20000000,0x20000000,0x580000,0x7C0000,0x13D80000,0x20000000, -0x8C0000,0x200253,0x4E1400DE,0x2A1400DF,0x201400DE,0x440C0085,0x2C0C0042,0x220C005A,0x240C0085,0x1E0C0045,0x1A0C0085,0x420000F3,0x30000032,0x2008004A,0x2600004E,0x20000001,0x1A00004C,0x200000F3,0x1C000054,0x18000074,0x140000F4,0x300253,0x28000106,0x200000DD,0x220000D3,0x1E000069,0x18000099,0x1E000158,0x1A0000AF,0x180000B4,0x14000125,0x5C0253, -0x16000179,0x16000159,0x100001AD,0x10000255,0x9C000003,0xF01000B1,0xF4180106,0x4E000001,0x36000001,0x28000001,0x20000005,0x1E00000C,0x64000051,0x3E00001D,0x20000042,0x180000B4,0x400253,0x2C00F3,0x42200032,0x28200032,0x20200032,0x38180049,0x2A140002,0x20180005,0x22140049,0x1E14000E,0x1A140049,0x4000F3,0x30000032,0x20100032,0x2404004E,0x20000001, -0x1A040049,0x8000F3,0x1C000054,0x18000074,0x140000F4,0x4000F3,0x30000032,0x20100032,0x2404004E,0x20000001,0x1A040049,0x8000F3,0x1C000054,0x18000074,0x140000F4,0x8000F3,0x1C000054,0x18000074,0x140000F4,0x140000F4,0x9C000003,0xF4180049,0xF8200036,0x4E000001,0x36000001,0x28000001,0x20080001,0x1E00000C,0x6C000021,0x3E00000D,0x2000003E,0x18000074, -0x5C00F3,0x1400DE,0x1400DE,0x1400DE,0x1400DE,0x320C003D,0x320C003D,0x320C003D,0x1C0C003D,0x1C0C003D,0x140C003D,0x30000032,0x30000032,0x30000032,0x1E000001,0x1E000001,0x1604000C,0x16000034,0x16000034,0x12000008,0xE000034,0x2000DD,0x2000DD,0x2000DD,0x18000059,0x18000059,0x12000041,0x12000068,0x12000068,0x1200002C,0xE00004D,0x3C00DD, -0x3C00DD,0xE000089,0xA000095,0xA0000DD,0x98000002,0xEE0C003D,0x1400DE,0x4E000000,0x32000001,0x26000001,0x24000002,0x1A000001,0x56000023,0x36000012,0x1E000036,0x1200002C,0x2C00DD,0x200032,0x200032,0x200032,0x200032,0x26180001,0x26180001,0x26180001,0x18180001,0x18180001,0x14140001,0x300032,0x300032,0x300032,0x1C040001,0x1C040001, -0x140C0000,0x5C0032,0x5C0032,0x12000008,0xE000034,0x300032,0x300032,0x300032,0x1C040001,0x1C040001,0x140C0000,0x5C0032,0x5C0032,0x12000008,0xE000034,0x5C0032,0x5C0032,0x12000008,0xE000034,0xE000034,0x7A080000,0x94180001,0x200032,0x4E000000,0x2C080001,0x24040000,0x1E080001,0x1A000001,0x5C000008,0x3C000002,0x400032,0x12000008, -0x400032,0x34004A,0x362C0001,0x26280001,0x20280001,0x500048,0x2A140001,0x201C0001,0xA00048,0x20000001,0x1A000048,0x500048,0x2A140001,0x201C0001,0xA00048,0x20000001,0x1A000048,0xA00048,0x20000001,0x1A000048,0x1A000048,0x500048,0x2A140001,0x201C0001,0xA00048,0x20000001,0x1A000048,0xA00048,0x20000001,0x1A000048,0x1A000048,0xA00048, -0x20000001,0x1A000048,0x1A000048,0x1A000048,0x9C000002,0x4380048,0xBE2C0001,0x4A040001,0x36000001,0x28000001,0x20080000,0x1E000008,0x72000008,0x46000004,0x220C0000,0x1A000048,0x700048,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0x20000000,0x20000000,0x20000000,0x20000000,0x20000000, -0x20000000,0x10000000,0x10000000,0x10000000,0xA000000,0x10003D,0x10003D,0x10003D,0x10003D,0x10003D,0x10003D,0xC000014,0xC000014,0xC000014,0x800000D,0x18003D,0x18003D,0x18003D,0x8000028,0x400003D,0xA8000000,0xC003D,0xC003D,0x4A000000,0x34000000,0x28000000,0x28000000,0x1A000000,0x44000011,0x34000009,0x14000001,0xC000014, -0x14003D,}; -static const uint32_t g_etc1_to_bc7_m6_table35[] = { -0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x240000, -0x240000,0x240000,0x240000,0x6000000,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xE0C0000,0xE0C0000,0xE0C0000,0x140000,0x1C0000,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x380000,0x380000,0x380000,0x380000,0x380000, -0x380000,0x700000,0x700000,0x700000,0x12000000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x700000,0x700000,0x700000,0x12000000,0x700000,0x700000,0x700000,0x12000000,0x12000000,0x280000,0x240001,0x240001,0x2C0000,0x300000,0x340000,0x340000,0x400000,0x2C0000,0x300000,0x500000,0x700000, -0x500000,0x380001,0x380001,0x380001,0x380001,0x540000,0x540000,0x540000,0xAC0000,0xAC0000,0x1C000000,0x540000,0x540000,0x540000,0xAC0000,0xAC0000,0x1C000000,0xAC0000,0xAC0000,0x1C000000,0x1C000000,0x540000,0x540000,0x540000,0xAC0000,0xAC0000,0x1C000000,0xAC0000,0xAC0000,0x1C000000,0x1C000000,0xAC0000, -0xAC0000,0x1C000000,0x1C000000,0x1C000000,0x440000,0x63C0000,0x380001,0x24C0000,0x600000,0x780000,0x8C0000,0xD40000,0x480000,0x540000,0x780000,0x1C000000,0x780000,0x500001,0x780000,0xF40000,0x28000000,0x780000,0xF40000,0x28000000,0xF40000,0x28000000,0x28000000,0x780000,0xF40000,0x28000000,0xF40000,0x28000000, -0x28000000,0xF40000,0x28000000,0x28000000,0x28000000,0x780000,0xF40000,0x28000000,0xF40000,0x28000000,0x28000000,0xF40000,0x28000000,0x28000000,0x28000000,0xF40000,0x28000000,0x28000000,0x28000000,0x28000000,0x4640000,0x580000,0x580000,0x880000,0xC80000,0x1880000,0x28000000,0x28000000,0x700000,0x980000,0x1BE80000,0x28000000, -0xAC0000,0x300274,0x562400F3,0x322400F4,0x282400F3,0x4C1C0092,0x341C004F,0x2A1C0067,0x2C1C0092,0x2818004E,0x22180092,0x4A1000F4,0x38100033,0x2A140051,0x300C004A,0x28100002,0x2210004D,0x280C00F4,0x24080053,0x20080069,0x1C0C00F5,0x2440274,0x3A0000F5,0x281000F4,0x2E0000AA,0x28000049,0x22040090,0x2800011F,0x2400005D,0x20000069,0x1C0000FD,0x8C0274, -0x2000015B,0x1C000120,0x1A000181,0x16000278,0xA4100004,0xF82000C2,0xFC28011F,0x56100002,0x3E100002,0x30100002,0x28100006,0x260C0006,0x90000009,0x54040000,0x2A080035,0x20000069,0x640274,0x3C00F3,0x4A300032,0x30300032,0x28300032,0x40280049,0x32240002,0x28280005,0x2A240049,0x2624000E,0x22240049,0x5800F3,0x38100032,0x28200032,0x300C0049,0x28100001, -0x22140049,0xB000F3,0x2600003E,0x20000059,0x1C0000F4,0x5800F3,0x38100032,0x28200032,0x300C0049,0x28100001,0x22140049,0xB000F3,0x2600003E,0x20000059,0x1C0000F4,0xB000F3,0x2600003E,0x20000059,0x1C0000F4,0x1C0000F4,0xA4100003,0xFC280049,0xF030003B,0x56100001,0x3E100001,0x30100001,0x28180001,0x28080004,0x90000005,0x54040000,0x2A040033,0x20000059, -0x7C00F3,0x2400F3,0x2400F3,0x2400F3,0x2400F3,0x3A1C004A,0x3A1C004A,0x3A1C004A,0x2418004A,0x2418004A,0x1C18004A,0x38100033,0x38100033,0x38100033,0x26100002,0x26100002,0x1E10000E,0x200C0033,0x200C0033,0x1C0C0005,0x160C0035,0x3400F3,0x3400F3,0x3400F3,0x28000049,0x28000049,0x1C08004A,0x22000042,0x22000042,0x1A000009,0x16000035,0x6800F3, -0x6800F3,0x16000074,0x16000074,0x120000F4,0xA0100003,0xF61C004A,0x2400F3,0x56100001,0x3A100002,0x2E100002,0x2C100003,0x240C0001,0x84000005,0x54040000,0x260C0033,0x1A000009,0x4C00F3,0x300032,0x300032,0x300032,0x300032,0x2E280001,0x2E280001,0x2E280001,0x20280001,0x20280001,0x1C240001,0x2440032,0x2440032,0x2440032,0x24140001,0x24140001, -0x1C1C0000,0x8C0032,0x8C0032,0x1C000000,0x16000034,0x2440032,0x2440032,0x2440032,0x24140001,0x24140001,0x1C1C0000,0x8C0032,0x8C0032,0x1C000000,0x16000034,0x8C0032,0x8C0032,0x1C000000,0x16000034,0x16000034,0x82180000,0x9C280001,0x300032,0x56100000,0x34180001,0x2C140000,0x26180001,0x240C0000,0x84040001,0x4E080000,0x640032,0x1C000000, -0x640032,0x44004A,0x3E3C0001,0x2E380001,0x28380001,0x680048,0x32240001,0x282C0001,0xD00048,0x28080000,0x22000048,0x680048,0x32240001,0x282C0001,0xD00048,0x28080000,0x22000048,0xD00048,0x28080000,0x22000048,0x22000048,0x680048,0x32240001,0x282C0001,0xD00048,0x28080000,0x22000048,0xD00048,0x28080000,0x22000048,0x22000048,0xD00048, -0x28080000,0x22000048,0x22000048,0x22000048,0xB8080000,0xC480048,0xC63C0001,0x5A0C0000,0x3E0C0001,0x32080000,0x28180000,0x28000001,0x94000002,0x54040000,0x2A1C0000,0x22000048,0x940048,0x18004A,0x18004A,0x18004A,0x18004A,0x18004A,0x18004A,0x18004A,0x18004A,0x18004A,0x18004A,0x28100001,0x28100001,0x28100001,0x28100001,0x28100001, -0x28100001,0x180C0001,0x180C0001,0x180C0001,0x120C0001,0x2240048,0x2240048,0x2240048,0x2240048,0x2240048,0x2240048,0x18000005,0x18000005,0x18000005,0x12000001,0x4C0048,0x4C0048,0x4C0048,0x10000014,0xC000048,0xB0100001,0x18004A,0x18004A,0x52100001,0x3C100001,0x30100001,0x30100001,0x22100001,0x84000001,0x54040000,0x1C0C0001,0x18000005, -0x340048,}; -static const uint32_t g_etc1_to_bc7_m6_table36[] = { -0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x5C0000, -0x5C0000,0x5C0000,0x5C0000,0xE000001,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x8200000,0x8200000,0x8200000,0x300000,0x400000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000, -0x2500000,0xA40000,0xA40000,0xA40000,0x1A000001,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000,0xA40000,0xA40000,0xA40000,0x1A000001,0xA40000,0xA40000,0xA40000,0x1A000001,0x1A000001,0x3C0000,0x380000,0x380000,0x400000,0x2440000,0x4C0000,0x4C0000,0x5C0000,0x400000,0x2440000,0x740000,0xA40000, -0x740000,0x4C0000,0x4C0000,0x4C0000,0x4C0000,0x700000,0x700000,0x700000,0xE40000,0xE40000,0x24000001,0x700000,0x700000,0x700000,0xE40000,0xE40000,0x24000001,0xE40000,0xE40000,0x24000001,0x24000001,0x700000,0x700000,0x700000,0xE40000,0xE40000,0x24000001,0xE40000,0xE40000,0x24000001,0x24000001,0xE40000, -0xE40000,0x24000001,0x24000001,0x24000001,0x580000,0x500000,0x4C0000,0x680000,0x800000,0xA00000,0xB80000,0x1180000,0x600000,0x700000,0xA00000,0x24000001,0xA00000,0x640000,0x940000,0x12C0000,0x30000001,0x940000,0x12C0000,0x30000001,0x12C0000,0x30000001,0x30000001,0x940000,0x12C0000,0x30000001,0x12C0000,0x30000001, -0x30000001,0x12C0000,0x30000001,0x30000001,0x30000001,0x940000,0x12C0000,0x30000001,0x12C0000,0x30000001,0x30000001,0x12C0000,0x30000001,0x30000001,0x30000001,0x12C0000,0x30000001,0x30000001,0x30000001,0x30000001,0x7C0000,0xC680000,0xC680000,0xA80000,0xF40000,0x1E00000,0x30000001,0x30000001,0x880000,0xBC0000,0x25DC0000,0x30000001, -0xD40000,0x400278,0x5E3800F4,0x3C3800F4,0x303800F5,0x582C0090,0x402C004D,0x34300069,0x362C0090,0x302C004D,0x2A2C0092,0x542000F3,0x42200032,0x34280053,0x38200049,0x30200002,0x2C20004E,0x302000F4,0x2E1C0051,0x2A1C0067,0x262000F3,0x600274,0x460C00F3,0x302000F4,0x3E040092,0x3210004A,0x2A180092,0x360400FD,0x30000033,0x2A00004F,0x260800F4,0xC40274, -0x2C000121,0x260000E2,0x2400014C,0x20000274,0xB4200005,0xF23400D0,0xF63C012C,0x62200000,0x48200001,0x38200002,0x32240006,0x30200006,0xAC080002,0x5A180002,0x321C0035,0x2A00004F,0x8C0274,0x4C00F4,0x52440034,0x3A400034,0x30400035,0x4C380048,0x3A380001,0x32380005,0x3238004A,0x3034000E,0x2A38004A,0x7400F3,0x40240032,0x32300033,0x38200049,0x30200002, -0x2A28004A,0xE800F3,0x30000033,0x2A00004B,0x260000F3,0x7400F3,0x40240032,0x32300033,0x38200049,0x30200002,0x2A28004A,0xE800F3,0x30000033,0x2A00004B,0x260000F3,0xE800F3,0x30000033,0x2A00004B,0x260000F3,0x260000F3,0xA8240003,0xF63C004C,0xFA440038,0x62200000,0x48200001,0x38200002,0x32280001,0x30180003,0xAC080001,0x5A180001,0x34140032,0x2A00004B, -0xA400F3,0x3800F4,0x3800F4,0x3800F4,0x3800F4,0x462C0048,0x462C0048,0x462C0048,0x2C2C0049,0x2C2C0049,0x242C0049,0x42200032,0x42200032,0x42200032,0x30200001,0x30200001,0x2624000E,0x28200032,0x28200032,0x24200005,0x20200032,0x5000F3,0x5000F3,0x5000F3,0x32100049,0x32100049,0x241C0049,0x30000032,0x30000032,0x240C0002,0x20100032,0xA000F3, -0xA000F3,0x22000059,0x1E000053,0x1A0000F3,0xA8200004,0xFE2C0049,0x3800F4,0x62200000,0x44200001,0x38200001,0x34200004,0x2C200001,0xA20C0000,0x5A180001,0x301C0032,0x240C0002,0x7000F3,0x400034,0x400034,0x400034,0x400034,0x38380000,0x38380000,0x38380000,0x2A380000,0x2A380000,0x24380001,0x600032,0x600032,0x600032,0x2C280001,0x2C280001, -0x24300001,0xC40032,0xC40032,0x24140001,0x20000032,0x600032,0x600032,0x600032,0x2C280001,0x2C280001,0x24300001,0xC40032,0xC40032,0x24140001,0x20000032,0xC40032,0xC40032,0x24140001,0x20000032,0x20000032,0x90280000,0xB4380000,0x400034,0x62200000,0x42240000,0x36240000,0x32280000,0x2C200001,0xA20C0000,0x5A180000,0x8C0032,0x24140001, -0x8C0032,0x580048,0x4A4C0000,0x364C0001,0x304C0001,0x2800048,0x3A380001,0x30400001,0x1080048,0x301C0001,0x2A00004A,0x2800048,0x3A380001,0x30400001,0x1080048,0x301C0001,0x2A00004A,0x1080048,0x301C0001,0x2A00004A,0x2A00004A,0x2800048,0x3A380001,0x30400001,0x1080048,0x301C0001,0x2A00004A,0x1080048,0x301C0001,0x2A00004A,0x2A00004A,0x1080048, -0x301C0001,0x2A00004A,0x2A00004A,0x2A00004A,0xC4180000,0x65C0048,0xDE4C0000,0x62200000,0x46200001,0x3A1C0000,0x302C0001,0x300C0001,0xB4040000,0x5E140000,0x32300001,0x2A00004A,0xB80048,0x2C0048,0x2C0048,0x2C0048,0x2C0048,0x2C0048,0x2C0048,0x2C0048,0x2C0048,0x2C0048,0x2C0048,0x34200000,0x34200000,0x34200000,0x34200000,0x34200000, -0x34200000,0x20200001,0x20200001,0x20200001,0x1A200001,0x400048,0x400048,0x400048,0x400048,0x400048,0x400048,0x240C0001,0x240C0001,0x240C0001,0x1A140001,0x800048,0x800048,0x800048,0x1A000005,0x1400004A,0xC8200000,0x2C0048,0x2C0048,0x62200000,0x48200000,0x3C200000,0x3C200000,0x2C200000,0x9E0C0000,0x62140000,0x281C0000,0x240C0001, -0x5C0048,}; -static const uint32_t g_etc1_to_bc7_m6_table37[] = { -0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x8C0000, -0x8C0000,0x8C0000,0x8C0000,0x16000001,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x340000,0x340000,0x340000,0x2440000,0x640000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x2680000,0x2680000,0x2680000,0x2680000,0x2680000, -0x2680000,0xD80000,0xD80000,0xD80000,0x22000001,0x2680000,0x2680000,0x2680000,0x2680000,0x2680000,0x2680000,0xD80000,0xD80000,0xD80000,0x22000001,0xD80000,0xD80000,0xD80000,0x22000001,0x22000001,0x4C0000,0x480000,0x480000,0x540000,0x2580000,0x600000,0x600000,0x780000,0x540000,0x2580000,0x980000,0xD80000, -0x980000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x880000,0x880000,0x880000,0x1140000,0x1140000,0x2C000001,0x880000,0x880000,0x880000,0x1140000,0x1140000,0x2C000001,0x1140000,0x1140000,0x2C000001,0x2C000001,0x880000,0x880000,0x880000,0x1140000,0x1140000,0x2C000001,0x1140000,0x1140000,0x2C000001,0x2C000001,0x1140000, -0x1140000,0x2C000001,0x2C000001,0x2C000001,0x6680000,0x8600000,0x5C0000,0x7C0000,0x980000,0xC00000,0xE00000,0x1540000,0x740000,0x880000,0xC00000,0x2C000001,0xC00000,0x740000,0xAC0000,0x15C0000,0x38000001,0xAC0000,0x15C0000,0x38000001,0x15C0000,0x38000001,0x38000001,0xAC0000,0x15C0000,0x38000001,0x15C0000,0x38000001, -0x38000001,0x15C0000,0x38000001,0x38000001,0x38000001,0xAC0000,0x15C0000,0x38000001,0x15C0000,0x38000001,0x38000001,0x15C0000,0x38000001,0x38000001,0x38000001,0x15C0000,0x38000001,0x38000001,0x38000001,0x38000001,0x900000,0x7C0000,0x7C0000,0x2C00000,0x11C0000,0x9F00000,0x38000001,0x38000001,0x9C0000,0xD80000,0x2DEC0000,0x38000001, -0xF40000,0x500278,0x664800F4,0x444800F4,0x384800F5,0x603C0090,0x483C004D,0x3C400069,0x3E3C0090,0x383C004D,0x323C0092,0x5C3000F3,0x4A300032,0x3C380053,0x40300049,0x38300002,0x3430004E,0x383000F4,0x362C0051,0x322C0067,0x2E3000F3,0x780274,0x4E1C00F3,0x383000F4,0x46140092,0x3A20004A,0x32280092,0x440400F3,0x38100033,0x3210004F,0x2E1800F4,0xF40274, -0x36000104,0x320000B2,0x2C00011F,0x28000274,0xBC300005,0xFA4400D0,0xFE4C012C,0x6A300000,0x50300001,0x40300002,0x3A340006,0x38300006,0xB4180002,0x62280002,0x3A2C0035,0x3210004F,0xAC0274,0x5C00F4,0x5A540034,0x42500034,0x38500035,0x54480048,0x42480001,0x3A480005,0x3A48004A,0x3844000E,0x3248004A,0x8C00F3,0x48340032,0x3A400033,0x40300049,0x38300002, -0x3238004A,0x11800F3,0x38100033,0x3208004A,0x2E0000F3,0x8C00F3,0x48340032,0x3A400033,0x40300049,0x38300002,0x3238004A,0x11800F3,0x38100033,0x3208004A,0x2E0000F3,0x11800F3,0x38100033,0x3208004A,0x2E0000F3,0x2E0000F3,0xB0340003,0xFE4C004C,0xF254003D,0x6A300000,0x50300001,0x40300002,0x3A380001,0x38280003,0xB4180001,0x62280001,0x3C240032,0x3208004A, -0xC800F3,0x4800F4,0x4800F4,0x4800F4,0x4800F4,0x4E3C0048,0x4E3C0048,0x4E3C0048,0x343C0049,0x343C0049,0x2C3C0049,0x4A300032,0x4A300032,0x4A300032,0x38300001,0x38300001,0x2E34000E,0x30300032,0x30300032,0x2C300005,0x28300032,0x6800F3,0x6800F3,0x6800F3,0x3A200049,0x3A200049,0x2C2C0049,0x38100032,0x38100032,0x2C1C0002,0x28200032,0xD000F3, -0xD000F3,0x2C000049,0x2600003E,0x220000F3,0xB0300004,0xF63C004C,0x4800F4,0x6A300000,0x4C300001,0x40300001,0x3C300004,0x34300001,0xAA1C0000,0x62280001,0x382C0032,0x2C1C0002,0x9400F3,0x500034,0x500034,0x500034,0x500034,0x40480000,0x40480000,0x40480000,0x32480000,0x32480000,0x2C480001,0x780032,0x780032,0x780032,0x34380001,0x34380001, -0x2C400001,0xF40032,0xF40032,0x2C240001,0x28000032,0x780032,0x780032,0x780032,0x34380001,0x34380001,0x2C400001,0xF40032,0xF40032,0x2C240001,0x28000032,0xF40032,0xF40032,0x2C240001,0x28000032,0x28000032,0x98380000,0xBC480000,0x500034,0x6A300000,0x4A340000,0x3E340000,0x3A380000,0x34300001,0xAA1C0000,0x62280000,0xAC0032,0x2C240001, -0xAC0032,0x680048,0x525C0000,0x3E5C0001,0x385C0001,0x2980048,0x42480001,0x38500001,0x1380048,0x382C0001,0x3200004A,0x2980048,0x42480001,0x38500001,0x1380048,0x382C0001,0x3200004A,0x1380048,0x382C0001,0x3200004A,0x3200004A,0x2980048,0x42480001,0x38500001,0x1380048,0x382C0001,0x3200004A,0x1380048,0x382C0001,0x3200004A,0x3200004A,0x1380048, -0x382C0001,0x3200004A,0x3200004A,0x3200004A,0xCC280000,0xE6C0048,0xE65C0000,0x6A300000,0x4E300001,0x422C0000,0x383C0001,0x381C0001,0xBC140000,0x66240000,0x3A400001,0x3200004A,0xDC0048,0x3C0048,0x3C0048,0x3C0048,0x3C0048,0x3C0048,0x3C0048,0x3C0048,0x3C0048,0x3C0048,0x3C0048,0x3C300000,0x3C300000,0x3C300000,0x3C300000,0x3C300000, -0x3C300000,0x28300001,0x28300001,0x28300001,0x22300001,0x580048,0x580048,0x580048,0x580048,0x580048,0x580048,0x2C1C0001,0x2C1C0001,0x2C1C0001,0x22240001,0xB00048,0xB00048,0xB00048,0x22000001,0x1C00004A,0xD0300000,0x3C0048,0x3C0048,0x6A300000,0x50300000,0x44300000,0x44300000,0x34300000,0xA61C0000,0x6A240000,0x302C0000,0x2C1C0001, -0x7C0048,}; -static const uint32_t g_etc1_to_bc7_m6_table38[] = { -0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0xBC0000, -0xBC0000,0xBC0000,0xBC0000,0x1E000001,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x440000,0x440000,0x440000,0x25C0000,0x880000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x2800000,0x2800000,0x2800000,0x2800000,0x2800000, -0x2800000,0x1080000,0x1080000,0x1080000,0x2A000001,0x2800000,0x2800000,0x2800000,0x2800000,0x2800000,0x2800000,0x1080000,0x1080000,0x1080000,0x2A000001,0x1080000,0x1080000,0x1080000,0x2A000001,0x2A000001,0x65C0000,0x580000,0x580000,0x4640000,0x26C0000,0x780000,0x780000,0x940000,0x4640000,0x26C0000,0xB80000,0x1080000, -0xB80000,0x6C0000,0x6C0000,0x6C0000,0x6C0000,0xA00000,0xA00000,0xA00000,0x1440000,0x1440000,0x34000001,0xA00000,0xA00000,0xA00000,0x1440000,0x1440000,0x34000001,0x1440000,0x1440000,0x34000001,0x34000001,0xA00000,0xA00000,0xA00000,0x1440000,0x1440000,0x34000001,0x1440000,0x1440000,0x34000001,0x34000001,0x1440000, -0x1440000,0x34000001,0x34000001,0x34000001,0x27C0000,0x740000,0x6C0000,0x2900000,0xB40000,0xE40000,0x1080000,0x1900000,0x880000,0xA00000,0xE40000,0x34000001,0xE40000,0x840000,0xC40000,0x18C0000,0x40000001,0xC40000,0x18C0000,0x40000001,0x18C0000,0x40000001,0x40000001,0xC40000,0x18C0000,0x40000001,0x18C0000,0x40000001, -0x40000001,0x18C0000,0x40000001,0x40000001,0x40000001,0xC40000,0x18C0000,0x40000001,0x18C0000,0x40000001,0x40000001,0x18C0000,0x40000001,0x40000001,0x40000001,0x18C0000,0x40000001,0x40000001,0x40000001,0x40000001,0xA40000,0x8C0000,0x8C0000,0xDC0000,0x1400000,0x13F00000,0x40000001,0x40000001,0xB40000,0xF80000,0x35FC0000,0x40000001, -0x1180000,0x600278,0x6E5800F4,0x4C5800F4,0x405800F5,0x684C0090,0x504C004D,0x44500069,0x464C0090,0x404C004D,0x3A4C0092,0x644000F3,0x52400032,0x44480053,0x48400049,0x40400002,0x3C40004E,0x404000F4,0x3E3C0051,0x3A3C0067,0x364000F3,0x900274,0x562C00F3,0x404000F4,0x4E240092,0x4230004A,0x3A380092,0x4C1400F3,0x40200033,0x3A20004F,0x362800F4,0x1240274, -0x400000F5,0x3A00009A,0x3400010B,0x30000274,0xC4400005,0xF25400E2,0xF65C0139,0x72400000,0x58400001,0x48400002,0x42440006,0x40400006,0xBC280002,0x6A380002,0x423C0035,0x3A20004F,0xD00274,0x6C00F4,0x62640034,0x4A600034,0x40600035,0x5C580048,0x4A580001,0x42580005,0x4258004A,0x4054000E,0x3A58004A,0xA400F3,0x50440032,0x42500033,0x48400049,0x40400002, -0x3A48004A,0x14C00F3,0x40200033,0x3A18004A,0x360000F3,0xA400F3,0x50440032,0x42500033,0x48400049,0x40400002,0x3A48004A,0x14C00F3,0x40200033,0x3A18004A,0x360000F3,0x14C00F3,0x40200033,0x3A18004A,0x360000F3,0x360000F3,0xB8440003,0xF65C004E,0xFA64003D,0x72400000,0x58400001,0x48400002,0x42480001,0x40380003,0xBC280001,0x6A380001,0x44340032,0x3A18004A, -0xE800F3,0x5800F4,0x5800F4,0x5800F4,0x5800F4,0x564C0048,0x564C0048,0x564C0048,0x3C4C0049,0x3C4C0049,0x344C0049,0x52400032,0x52400032,0x52400032,0x40400001,0x40400001,0x3644000E,0x38400032,0x38400032,0x34400005,0x30400032,0x8000F3,0x8000F3,0x8000F3,0x42300049,0x42300049,0x343C0049,0x40200032,0x40200032,0x342C0002,0x30300032,0x10000F3, -0x10000F3,0x340C0049,0x30000033,0x2A0000F3,0xB8400004,0xFE4C004C,0x5800F4,0x72400000,0x54400001,0x48400001,0x44400004,0x3C400001,0xB22C0000,0x6A380001,0x403C0032,0x342C0002,0xB400F3,0x600034,0x600034,0x600034,0x600034,0x48580000,0x48580000,0x48580000,0x3A580000,0x3A580000,0x34580001,0x900032,0x900032,0x900032,0x3C480001,0x3C480001, -0x34500001,0x1240032,0x1240032,0x34340001,0x30000032,0x900032,0x900032,0x900032,0x3C480001,0x3C480001,0x34500001,0x1240032,0x1240032,0x34340001,0x30000032,0x1240032,0x1240032,0x34340001,0x30000032,0x30000032,0xA0480000,0xC4580000,0x600034,0x72400000,0x52440000,0x46440000,0x42480000,0x3C400001,0xB22C0000,0x6A380000,0xD00032,0x34340001, -0xD00032,0x780048,0x5A6C0000,0x466C0001,0x406C0001,0x2B00048,0x4A580001,0x40600001,0x1680048,0x403C0001,0x3A00004A,0x2B00048,0x4A580001,0x40600001,0x1680048,0x403C0001,0x3A00004A,0x1680048,0x403C0001,0x3A00004A,0x3A00004A,0x2B00048,0x4A580001,0x40600001,0x1680048,0x403C0001,0x3A00004A,0x1680048,0x403C0001,0x3A00004A,0x3A00004A,0x1680048, -0x403C0001,0x3A00004A,0x3A00004A,0x3A00004A,0xD4380000,0x800048,0xEE6C0000,0x72400000,0x56400001,0x4A3C0000,0x404C0001,0x402C0001,0xC4240000,0x6E340000,0x42500001,0x3A00004A,0xFC0048,0x4C0048,0x4C0048,0x4C0048,0x4C0048,0x4C0048,0x4C0048,0x4C0048,0x4C0048,0x4C0048,0x4C0048,0x44400000,0x44400000,0x44400000,0x44400000,0x44400000, -0x44400000,0x30400001,0x30400001,0x30400001,0x2A400001,0x700048,0x700048,0x700048,0x700048,0x700048,0x700048,0x342C0001,0x342C0001,0x342C0001,0x2A340001,0xE40048,0xE40048,0xE40048,0x2A100001,0x2400004A,0xD8400000,0x4C0048,0x4C0048,0x72400000,0x58400000,0x4C400000,0x4C400000,0x3C400000,0xAE2C0000,0x72340000,0x383C0000,0x342C0001, -0xA00048,}; -static const uint32_t g_etc1_to_bc7_m6_table39[] = { -0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0xF00000, -0xF00000,0xF00000,0xF00000,0x26000001,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x2540000,0x2540000,0x2540000,0x2740000,0xA80000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000, -0x2980000,0x1380000,0x1380000,0x1380000,0x32000001,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000,0x1380000,0x1380000,0x1380000,0x32000001,0x1380000,0x1380000,0x1380000,0x32000001,0x32000001,0xE6C0000,0x680000,0x680000,0x780000,0x2800000,0x8C0000,0x8C0000,0x2AC0000,0x780000,0x2800000,0xDC0000,0x1380000, -0xDC0000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0xB80000,0xB80000,0xB80000,0x1740000,0x1740000,0x3C000001,0xB80000,0xB80000,0xB80000,0x1740000,0x1740000,0x3C000001,0x1740000,0x1740000,0x3C000001,0x3C000001,0xB80000,0xB80000,0xB80000,0x1740000,0x1740000,0x3C000001,0x1740000,0x1740000,0x3C000001,0x3C000001,0x1740000, -0x1740000,0x3C000001,0x3C000001,0x3C000001,0x900000,0x840000,0x7C0000,0xA80000,0xD00000,0x1080000,0x12C0000,0x1CC0000,0x9C0000,0xB80000,0x1080000,0x3C000001,0x1080000,0x940000,0xDC0000,0x1BC0000,0x48000001,0xDC0000,0x1BC0000,0x48000001,0x1BC0000,0x48000001,0x48000001,0xDC0000,0x1BC0000,0x48000001,0x1BC0000,0x48000001, -0x48000001,0x1BC0000,0x48000001,0x48000001,0x48000001,0xDC0000,0x1BC0000,0x48000001,0x1BC0000,0x48000001,0x48000001,0x1BC0000,0x48000001,0x48000001,0x48000001,0x1BC0000,0x48000001,0x48000001,0x48000001,0x48000001,0xB80000,0x69C0000,0x69C0000,0xF80000,0x1680000,0x1DF40000,0x48000001,0x48000001,0xC80000,0x1140000,0x3FD00000,0x48000001, -0x1380000,0x700278,0x766800F4,0x546800F4,0x486800F5,0x705C0090,0x585C004D,0x4C600069,0x4E5C0090,0x485C004D,0x425C0092,0x6C5000F3,0x5A500032,0x4C580053,0x50500049,0x48500002,0x4450004E,0x485000F4,0x464C0051,0x424C0067,0x3E5000F3,0xA80274,0x5E3C00F3,0x485000F4,0x56340092,0x4A40004A,0x42480092,0x542400F3,0x48300033,0x4230004F,0x3E3800F4,0x1580274, -0x480C00F4,0x42040092,0x3E0000F7,0x38000274,0xCC500005,0xFA6400E2,0xFE6C0139,0x7A500000,0x60500001,0x50500002,0x4A540006,0x48500006,0xC4380002,0x72480002,0x4A4C0035,0x4230004F,0xF00274,0x7C00F4,0x6A740034,0x52700034,0x48700035,0x64680048,0x52680001,0x4A680005,0x4A68004A,0x4864000E,0x4268004A,0xBC00F3,0x58540032,0x4A600033,0x50500049,0x48500002, -0x4258004A,0x17C00F3,0x48300033,0x4228004A,0x3E0000F3,0xBC00F3,0x58540032,0x4A600033,0x50500049,0x48500002,0x4258004A,0x17C00F3,0x48300033,0x4228004A,0x3E0000F3,0x17C00F3,0x48300033,0x4228004A,0x3E0000F3,0x3E0000F3,0xC0540003,0xFE6C004E,0xF2740044,0x7A500000,0x60500001,0x50500002,0x4A580001,0x48480003,0xC4380001,0x72480001,0x4C440032,0x4228004A, -0x10C00F3,0x6800F4,0x6800F4,0x6800F4,0x6800F4,0x5E5C0048,0x5E5C0048,0x5E5C0048,0x445C0049,0x445C0049,0x3C5C0049,0x5A500032,0x5A500032,0x5A500032,0x48500001,0x48500001,0x3E54000E,0x40500032,0x40500032,0x3C500005,0x38500032,0x9800F3,0x9800F3,0x9800F3,0x4A400049,0x4A400049,0x3C4C0049,0x48300032,0x48300032,0x3C3C0002,0x38400032,0x13000F3, -0x13000F3,0x3C1C0049,0x38080032,0x320000F3,0xC0500004,0xF65C0051,0x6800F4,0x7A500000,0x5C500001,0x50500001,0x4C500004,0x44500001,0xBA3C0000,0x72480001,0x484C0032,0x3C3C0002,0xD800F3,0x700034,0x700034,0x700034,0x700034,0x50680000,0x50680000,0x50680000,0x42680000,0x42680000,0x3C680001,0xA80032,0xA80032,0xA80032,0x44580001,0x44580001, -0x3C600001,0x1580032,0x1580032,0x3C440001,0x38000032,0xA80032,0xA80032,0xA80032,0x44580001,0x44580001,0x3C600001,0x1580032,0x1580032,0x3C440001,0x38000032,0x1580032,0x1580032,0x3C440001,0x38000032,0x38000032,0xA8580000,0xCC680000,0x700034,0x7A500000,0x5A540000,0x4E540000,0x4A580000,0x44500001,0xBA3C0000,0x72480000,0xF00032,0x3C440001, -0xF00032,0x880048,0x627C0000,0x4E7C0001,0x487C0001,0xC80048,0x52680001,0x48700001,0x1980048,0x484C0001,0x4200004A,0xC80048,0x52680001,0x48700001,0x1980048,0x484C0001,0x4200004A,0x1980048,0x484C0001,0x4200004A,0x4200004A,0xC80048,0x52680001,0x48700001,0x1980048,0x484C0001,0x4200004A,0x1980048,0x484C0001,0x4200004A,0x4200004A,0x1980048, -0x484C0001,0x4200004A,0x4200004A,0x4200004A,0xDC480000,0x900048,0xF67C0000,0x7A500000,0x5E500001,0x524C0000,0x485C0001,0x483C0001,0xCC340000,0x76440000,0x4A600001,0x4200004A,0x1200048,0x5C0048,0x5C0048,0x5C0048,0x5C0048,0x5C0048,0x5C0048,0x5C0048,0x5C0048,0x5C0048,0x5C0048,0x4C500000,0x4C500000,0x4C500000,0x4C500000,0x4C500000, -0x4C500000,0x38500001,0x38500001,0x38500001,0x32500001,0x880048,0x880048,0x880048,0x880048,0x880048,0x880048,0x3C3C0001,0x3C3C0001,0x3C3C0001,0x32440001,0x1140048,0x1140048,0x1140048,0x32200001,0x2C00004A,0xE0500000,0x5C0048,0x5C0048,0x7A500000,0x60500000,0x54500000,0x54500000,0x44500000,0xB63C0000,0x7A440000,0x404C0000,0x3C3C0001, -0xC00048,}; -static const uint32_t g_etc1_to_bc7_m6_table40[] = { -0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x1240000, -0x1240000,0x1240000,0x1240000,0x30000000,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x680000,0x680000,0x680000,0x900000,0xD00000,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000, -0xB40000,0x1700000,0x1700000,0x1700000,0x3C000000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0x1700000,0x1700000,0x1700000,0x3C000000,0x1700000,0x1700000,0x1700000,0x3C000000,0x3C000000,0x8800000,0x780001,0x780001,0x28C0000,0x980000,0x2A40000,0x2A40000,0xCC0000,0x28C0000,0x980000,0x1000000,0x1700000, -0x1000000,0x8C0001,0x8C0001,0x8C0001,0x8C0001,0x2D00000,0x2D00000,0x2D00000,0x1AC0000,0x1AC0000,0x46000000,0x2D00000,0x2D00000,0x2D00000,0x1AC0000,0x1AC0000,0x46000000,0x1AC0000,0x1AC0000,0x46000000,0x46000000,0x2D00000,0x2D00000,0x2D00000,0x1AC0000,0x1AC0000,0x46000000,0x1AC0000,0x1AC0000,0x46000000,0x46000000,0x1AC0000, -0x1AC0000,0x46000000,0x46000000,0x46000000,0xA40000,0x980000,0x8C0001,0xC00000,0x2EC0000,0x12C0000,0x15C0000,0x5F80000,0x2B00000,0x2D00000,0x12C0000,0x46000000,0x12C0000,0xA40001,0x2F40000,0x1F40000,0x52000000,0x2F40000,0x1F40000,0x52000000,0x1F40000,0x52000000,0x52000000,0x2F40000,0x1F40000,0x52000000,0x1F40000,0x52000000, -0x52000000,0x1F40000,0x52000000,0x52000000,0x52000000,0x2F40000,0x1F40000,0x52000000,0x1F40000,0x52000000,0x52000000,0x1F40000,0x52000000,0x52000000,0x52000000,0x1F40000,0x52000000,0x52000000,0x52000000,0x52000000,0xD00000,0xB00000,0xB00000,0x3140000,0x1940000,0x27FC0000,0x52000000,0x52000000,0xE00000,0x1380000,0x49C40000,0x52000000, -0x1600000,0x840274,0x807800F3,0x5C7800F4,0x527800F3,0x76700092,0x5E70004F,0x54700067,0x56700092,0x526C004E,0x4C6C0092,0x746400F4,0x62640033,0x54680051,0x5A60004A,0x52640002,0x4C64004D,0x526000F4,0x4E5C0053,0x4A5C0069,0x466000F5,0xC40274,0x684C00F3,0x526400F4,0x5E480092,0x52540049,0x4C580090,0x5C3800F3,0x52400032,0x4C40004D,0x464C00F4,0x18C0274, -0x521800F3,0x4C140090,0x460800F4,0x40000278,0xCE640004,0xF47800F4,0xF67C014C,0x80640002,0x68640002,0x5A640002,0x52640006,0x50600006,0xCE4C0002,0x7E580000,0x545C0035,0x4C40004D,0x1180274,0x9000F3,0x74840032,0x5A840032,0x52840032,0x6A7C0049,0x5C780002,0x527C0005,0x54780049,0x5078000E,0x4C780049,0xD400F3,0x62640032,0x52740032,0x5A600049,0x52640001, -0x4C680049,0x1B000F3,0x523C0032,0x4C380048,0x460000F4,0xD400F3,0x62640032,0x52740032,0x5A600049,0x52640001,0x4C680049,0x1B000F3,0x523C0032,0x4C380048,0x460000F4,0x1B000F3,0x523C0032,0x4C380048,0x460000F4,0x460000F4,0xCE640003,0xF8800053,0xFC880043,0x80640001,0x68640001,0x5A640001,0x526C0001,0x525C0004,0xD4480001,0x7E580000,0x54580033,0x4C380048, -0x13000F3,0x7800F3,0x7800F3,0x7800F3,0x7800F3,0x6470004A,0x6470004A,0x6470004A,0x4E6C004A,0x4E6C004A,0x466C004A,0x62640033,0x62640033,0x62640033,0x50640002,0x50640002,0x4864000E,0x4A600033,0x4A600033,0x46600005,0x40600035,0x2B000F3,0x2B000F3,0x2B000F3,0x52540049,0x52540049,0x465C004A,0x50440032,0x50440032,0x464C0001,0x42500034,0x16800F3, -0x16800F3,0x462C0048,0x40200034,0x3C0000F4,0xCA640003,0xF0700053,0x7800F3,0x80640001,0x64640002,0x58640002,0x56640003,0x4E600001,0xBE500000,0x7E580000,0x50600033,0x464C0001,0xFC00F3,0x840032,0x840032,0x840032,0x840032,0x587C0001,0x587C0001,0x587C0001,0x4A7C0001,0x4A7C0001,0x46780001,0xC40032,0xC40032,0xC40032,0x4E680001,0x4E680001, -0x46700000,0x18C0032,0x18C0032,0x46500000,0x40000034,0xC40032,0xC40032,0xC40032,0x4E680001,0x4E680001,0x46700000,0x18C0032,0x18C0032,0x46500000,0x40000034,0x18C0032,0x18C0032,0x46500000,0x40000034,0x40000034,0xAC6C0000,0xC67C0001,0x840032,0x80640000,0x5E6C0001,0x56680000,0x506C0001,0x4E600000,0xBE500000,0x785C0000,0x1180032,0x46500000, -0x1180032,0x98004A,0x68900001,0x588C0001,0x528C0001,0xE40048,0x5C780001,0x52800001,0x1D00048,0x525C0000,0x4C000048,0xE40048,0x5C780001,0x52800001,0x1D00048,0x525C0000,0x4C000048,0x1D00048,0x525C0000,0x4C000048,0x4C000048,0xE40048,0x5C780001,0x52800001,0x1D00048,0x525C0000,0x4C000048,0x1D00048,0x525C0000,0x4C000048,0x4C000048,0x1D00048, -0x525C0000,0x4C000048,0x4C000048,0x4C000048,0xE25C0000,0xA40048,0xF0900001,0x84600000,0x68600001,0x5C5C0000,0x526C0000,0x52480000,0xD8440000,0x7E580000,0x54700000,0x4C000048,0x1480048,0x6C004A,0x6C004A,0x6C004A,0x6C004A,0x6C004A,0x6C004A,0x6C004A,0x6C004A,0x6C004A,0x6C004A,0x52640001,0x52640001,0x52640001,0x52640001,0x52640001, -0x52640001,0x42600001,0x42600001,0x42600001,0x3C600001,0xA40048,0xA40048,0xA40048,0xA40048,0xA40048,0xA40048,0x464C0001,0x464C0001,0x464C0001,0x3C540001,0x14C0048,0x14C0048,0x14C0048,0x3C300000,0x36000048,0xDA640001,0x6C004A,0x6C004A,0x7C640001,0x66640001,0x5A640001,0x5A640001,0x4C640001,0xBA500000,0x7E580000,0x46600001,0x464C0001, -0xE80048,}; -static const uint32_t g_etc1_to_bc7_m6_table41[] = { -0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0x1580000, -0x1580000,0x1580000,0x1580000,0x38000000,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x4780000,0x4780000,0x4780000,0xA80000,0xF00000,0x880001,0x880001,0x880001,0x880001,0x880001,0x880001,0x880001,0x880001,0x880001,0x880001,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000, -0xCC0000,0x1A00000,0x1A00000,0x1A00000,0x44000000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0x1A00000,0x1A00000,0x1A00000,0x44000000,0x1A00000,0x1A00000,0x1A00000,0x44000000,0x44000000,0x940000,0x880001,0x880001,0xA00000,0xAC0000,0xBC0000,0xBC0000,0xE80000,0xA00000,0xAC0000,0x1240000,0x1A00000, -0x1240000,0x9C0001,0x9C0001,0x9C0001,0x9C0001,0x2E80000,0x2E80000,0x2E80000,0x1DC0000,0x1DC0000,0x4E000000,0x2E80000,0x2E80000,0x2E80000,0x1DC0000,0x1DC0000,0x4E000000,0x1DC0000,0x1DC0000,0x4E000000,0x4E000000,0x2E80000,0x2E80000,0x2E80000,0x1DC0000,0x1DC0000,0x4E000000,0x1DC0000,0x1DC0000,0x4E000000,0x4E000000,0x1DC0000, -0x1DC0000,0x4E000000,0x4E000000,0x4E000000,0xB80000,0xA80000,0x9C0001,0x2D40000,0x1080000,0x1500000,0x1800000,0x11F40000,0x2C40000,0x2E80000,0x1500000,0x4E000000,0x1500000,0xB40001,0x30C0000,0xBFC0000,0x5A000000,0x30C0000,0xBFC0000,0x5A000000,0xBFC0000,0x5A000000,0x5A000000,0x30C0000,0xBFC0000,0x5A000000,0xBFC0000,0x5A000000, -0x5A000000,0xBFC0000,0x5A000000,0x5A000000,0x5A000000,0x30C0000,0xBFC0000,0x5A000000,0xBFC0000,0x5A000000,0x5A000000,0xBFC0000,0x5A000000,0x5A000000,0x5A000000,0xBFC0000,0x5A000000,0x5A000000,0x5A000000,0x5A000000,0xE40000,0x8C00000,0x8C00000,0x1300000,0x1BC0000,0x31FC0000,0x5A000000,0x5A000000,0xF80000,0x1540000,0x51D40000,0x5A000000, -0x1800000,0x940274,0x888800F3,0x648800F4,0x5A8800F3,0x7E800092,0x6680004F,0x5C800067,0x5E800092,0x5A7C004E,0x547C0092,0x7C7400F4,0x6A740033,0x5C780051,0x6270004A,0x5A740002,0x5474004D,0x5A7000F4,0x566C0053,0x526C0069,0x4E7000F5,0xDC0274,0x705C00F3,0x5A7400F4,0x66580092,0x5A640049,0x54680090,0x644800F3,0x5A500032,0x5450004D,0x4E5C00F4,0x1BC0274, -0x5A2800F3,0x54240090,0x4E1800F4,0x48000278,0xD6740004,0xFC8800F4,0xFE8C014C,0x88740002,0x70740002,0x62740002,0x5A740006,0x58700006,0xD65C0002,0x86680000,0x5C6C0035,0x5450004D,0x1380274,0xA000F3,0x7C940032,0x62940032,0x5A940032,0x728C0049,0x64880002,0x5A8C0005,0x5C880049,0x5888000E,0x54880049,0xEC00F3,0x6A740032,0x5A840032,0x62700049,0x5A740001, -0x54780049,0x1E400F3,0x5A4C0032,0x54480048,0x4E0000F4,0xEC00F3,0x6A740032,0x5A840032,0x62700049,0x5A740001,0x54780049,0x1E400F3,0x5A4C0032,0x54480048,0x4E0000F4,0x1E400F3,0x5A4C0032,0x54480048,0x4E0000F4,0x4E0000F4,0xD6740003,0xF0900059,0xF498004A,0x88740001,0x70740001,0x62740001,0x5A7C0001,0x5A6C0004,0xDC580001,0x86680000,0x5C680033,0x54480048, -0x15400F3,0x8800F3,0x8800F3,0x8800F3,0x8800F3,0x6C80004A,0x6C80004A,0x6C80004A,0x567C004A,0x567C004A,0x4E7C004A,0x6A740033,0x6A740033,0x6A740033,0x58740002,0x58740002,0x5074000E,0x52700033,0x52700033,0x4E700005,0x48700035,0xC800F3,0xC800F3,0xC800F3,0x5A640049,0x5A640049,0x4E6C004A,0x58540032,0x58540032,0x4E5C0001,0x4A600034,0x19800F3, -0x19800F3,0x4E3C0048,0x48300034,0x440000F4,0xD2740003,0xF8800053,0x8800F3,0x88740001,0x6C740002,0x60740002,0x5E740003,0x56700001,0xC6600000,0x86680000,0x58700033,0x4E5C0001,0x12000F3,0x940032,0x940032,0x940032,0x940032,0x608C0001,0x608C0001,0x608C0001,0x528C0001,0x528C0001,0x4E880001,0xDC0032,0xDC0032,0xDC0032,0x56780001,0x56780001, -0x4E800000,0x1BC0032,0x1BC0032,0x4E600000,0x48000034,0xDC0032,0xDC0032,0xDC0032,0x56780001,0x56780001,0x4E800000,0x1BC0032,0x1BC0032,0x4E600000,0x48000034,0x1BC0032,0x1BC0032,0x4E600000,0x48000034,0x48000034,0xB47C0000,0xCE8C0001,0x940032,0x88740000,0x667C0001,0x5E780000,0x587C0001,0x56700000,0xC6600000,0x806C0000,0x1380032,0x4E600000, -0x1380032,0xA8004A,0x70A00001,0x609C0001,0x5A9C0001,0xFC0048,0x64880001,0x5A900001,0x3F80048,0x5A6C0000,0x54000048,0xFC0048,0x64880001,0x5A900001,0x3F80048,0x5A6C0000,0x54000048,0x3F80048,0x5A6C0000,0x54000048,0x54000048,0xFC0048,0x64880001,0x5A900001,0x3F80048,0x5A6C0000,0x54000048,0x3F80048,0x5A6C0000,0x54000048,0x54000048,0x3F80048, -0x5A6C0000,0x54000048,0x54000048,0x54000048,0xEA6C0000,0x2B40048,0xF8A00001,0x8C700000,0x70700001,0x646C0000,0x5A7C0000,0x5A580000,0xE0540000,0x86680000,0x5C800000,0x54000048,0x1680048,0x7C004A,0x7C004A,0x7C004A,0x7C004A,0x7C004A,0x7C004A,0x7C004A,0x7C004A,0x7C004A,0x7C004A,0x5A740001,0x5A740001,0x5A740001,0x5A740001,0x5A740001, -0x5A740001,0x4A700001,0x4A700001,0x4A700001,0x44700001,0xBC0048,0xBC0048,0xBC0048,0xBC0048,0xBC0048,0xBC0048,0x4E5C0001,0x4E5C0001,0x4E5C0001,0x44640001,0x17C0048,0x17C0048,0x17C0048,0x44400000,0x3E000048,0xE2740001,0x7C004A,0x7C004A,0x84740001,0x6E740001,0x62740001,0x62740001,0x54740001,0xC2600000,0x86680000,0x4E700001,0x4E5C0001, -0x10C0048,}; -static const uint32_t g_etc1_to_bc7_m6_table42[] = { -0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0x1880000, -0x1880000,0x1880000,0x1880000,0x40000000,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0xC880000,0xC880000,0xC880000,0xC00000,0x1140000,0x980001,0x980001,0x980001,0x980001,0x980001,0x980001,0x980001,0x980001,0x980001,0x980001,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000, -0xE40000,0x1D00000,0x1D00000,0x1D00000,0x4C000000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0x1D00000,0x1D00000,0x1D00000,0x4C000000,0x1D00000,0x1D00000,0x1D00000,0x4C000000,0x4C000000,0xA40000,0x980001,0x980001,0xB40000,0xC00000,0xD00000,0xD00000,0x3000000,0xB40000,0xC00000,0x1480000,0x1D00000, -0x1480000,0xAC0001,0xAC0001,0xAC0001,0xAC0001,0x3000000,0x3000000,0x3000000,0x5FC0000,0x5FC0000,0x56000000,0x3000000,0x3000000,0x3000000,0x5FC0000,0x5FC0000,0x56000000,0x5FC0000,0x5FC0000,0x56000000,0x56000000,0x3000000,0x3000000,0x3000000,0x5FC0000,0x5FC0000,0x56000000,0x5FC0000,0x5FC0000,0x56000000,0x56000000,0x5FC0000, -0x5FC0000,0x56000000,0x56000000,0x56000000,0x4C80000,0x4B80000,0xAC0001,0xEC0000,0x1240000,0x1700000,0x1A80000,0x1BF80000,0x2D80000,0x3000000,0x1700000,0x56000000,0x1700000,0xC40001,0x3240000,0x17FC0000,0x62000000,0x3240000,0x17FC0000,0x62000000,0x17FC0000,0x62000000,0x62000000,0x3240000,0x17FC0000,0x62000000,0x17FC0000,0x62000000, -0x62000000,0x17FC0000,0x62000000,0x62000000,0x62000000,0x3240000,0x17FC0000,0x62000000,0x17FC0000,0x62000000,0x62000000,0x17FC0000,0x62000000,0x62000000,0x62000000,0x17FC0000,0x62000000,0x62000000,0x62000000,0x62000000,0xF80000,0xD40000,0xD40000,0x14C0000,0x1E40000,0x3BFC0000,0x62000000,0x62000000,0x10C0000,0x1740000,0x59E40000,0x62000000, -0x1A40000,0xA40274,0x909800F3,0x6C9800F4,0x629800F3,0x86900092,0x6E90004F,0x64900067,0x66900092,0x628C004E,0x5C8C0092,0x848400F4,0x72840033,0x64880051,0x6A80004A,0x62840002,0x5C84004D,0x628000F4,0x5E7C0053,0x5A7C0069,0x568000F5,0xF40274,0x786C00F3,0x628400F4,0x6E680092,0x62740049,0x5C780090,0x6C5800F3,0x62600032,0x5C60004D,0x566C00F4,0x1F00274, -0x623800F3,0x5C340090,0x562800F4,0x50000278,0xDE840004,0xF498010A,0xF8A0015B,0x90840002,0x78840002,0x6A840002,0x62840006,0x60800006,0xDE6C0002,0x8E780000,0x647C0035,0x5C60004D,0x15C0274,0xB000F3,0x84A40032,0x6AA40032,0x62A40032,0x7A9C0049,0x6C980002,0x629C0005,0x64980049,0x6098000E,0x5C980049,0x10400F3,0x72840032,0x62940032,0x6A800049,0x62840001, -0x5C880049,0x7FC00F3,0x625C0032,0x5C580048,0x560000F4,0x10400F3,0x72840032,0x62940032,0x6A800049,0x62840001,0x5C880049,0x7FC00F3,0x625C0032,0x5C580048,0x560000F4,0x7FC00F3,0x625C0032,0x5C580048,0x560000F4,0x560000F4,0xDE840003,0xF8A00059,0xFCA8004A,0x90840001,0x78840001,0x6A840001,0x628C0001,0x627C0004,0xE4680001,0x8E780000,0x64780033,0x5C580048, -0x17400F3,0x9800F3,0x9800F3,0x9800F3,0x9800F3,0x7490004A,0x7490004A,0x7490004A,0x5E8C004A,0x5E8C004A,0x568C004A,0x72840033,0x72840033,0x72840033,0x60840002,0x60840002,0x5884000E,0x5A800033,0x5A800033,0x56800005,0x50800035,0xE000F3,0xE000F3,0xE000F3,0x62740049,0x62740049,0x567C004A,0x60640032,0x60640032,0x566C0001,0x52700034,0x1CC00F3, -0x1CC00F3,0x564C0048,0x50400034,0x4C0000F4,0xDA840003,0xF090005A,0x9800F3,0x90840001,0x74840002,0x68840002,0x66840003,0x5E800001,0xCE700000,0x8E780000,0x60800033,0x566C0001,0x14000F3,0xA40032,0xA40032,0xA40032,0xA40032,0x689C0001,0x689C0001,0x689C0001,0x5A9C0001,0x5A9C0001,0x56980001,0xF40032,0xF40032,0xF40032,0x5E880001,0x5E880001, -0x56900000,0x1F00032,0x1F00032,0x56700000,0x50000034,0xF40032,0xF40032,0xF40032,0x5E880001,0x5E880001,0x56900000,0x1F00032,0x1F00032,0x56700000,0x50000034,0x1F00032,0x1F00032,0x56700000,0x50000034,0x50000034,0xBC8C0000,0xD69C0001,0xA40032,0x90840000,0x6E8C0001,0x66880000,0x608C0001,0x5E800000,0xCE700000,0x887C0000,0x15C0032,0x56700000, -0x15C0032,0xB8004A,0x78B00001,0x68AC0001,0x62AC0001,0x1140048,0x6C980001,0x62A00001,0xFF80048,0x627C0000,0x5C000048,0x1140048,0x6C980001,0x62A00001,0xFF80048,0x627C0000,0x5C000048,0xFF80048,0x627C0000,0x5C000048,0x5C000048,0x1140048,0x6C980001,0x62A00001,0xFF80048,0x627C0000,0x5C000048,0xFF80048,0x627C0000,0x5C000048,0x5C000048,0xFF80048, -0x627C0000,0x5C000048,0x5C000048,0x5C000048,0xF27C0000,0xAC40048,0xF0B00002,0x94800000,0x78800001,0x6C7C0000,0x628C0000,0x62680000,0xE8640000,0x8E780000,0x64900000,0x5C000048,0x18C0048,0x8C004A,0x8C004A,0x8C004A,0x8C004A,0x8C004A,0x8C004A,0x8C004A,0x8C004A,0x8C004A,0x8C004A,0x62840001,0x62840001,0x62840001,0x62840001,0x62840001, -0x62840001,0x52800001,0x52800001,0x52800001,0x4C800001,0x2D00048,0x2D00048,0x2D00048,0x2D00048,0x2D00048,0x2D00048,0x566C0001,0x566C0001,0x566C0001,0x4C740001,0x1AC0048,0x1AC0048,0x1AC0048,0x4C500000,0x46000048,0xEA840001,0x8C004A,0x8C004A,0x8C840001,0x76840001,0x6A840001,0x6A840001,0x5C840001,0xCA700000,0x8E780000,0x56800001,0x566C0001, -0x12C0048,}; -static const uint32_t g_etc1_to_bc7_m6_table43[] = { -0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0x1B80000, -0x1B80000,0x1B80000,0x1B80000,0x48000000,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x9C0000,0x9C0000,0x9C0000,0xD80000,0x1340000,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000, -0xFC0000,0x3F80000,0x3F80000,0x3F80000,0x54000000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0x3F80000,0x3F80000,0x3F80000,0x54000000,0x3F80000,0x3F80000,0x3F80000,0x54000000,0x54000000,0x2B40000,0xA80001,0xA80001,0x2C40000,0xD40000,0xE80000,0xE80000,0x11C0000,0x2C40000,0xD40000,0x1680000,0x3F80000, -0x1680000,0xBC0001,0xBC0001,0xBC0001,0xBC0001,0x3180000,0x3180000,0x3180000,0x11FC0000,0x11FC0000,0x5E000000,0x3180000,0x3180000,0x3180000,0x11FC0000,0x11FC0000,0x5E000000,0x11FC0000,0x11FC0000,0x5E000000,0x5E000000,0x3180000,0x3180000,0x3180000,0x11FC0000,0x11FC0000,0x5E000000,0x11FC0000,0x11FC0000,0x5E000000,0x5E000000,0x11FC0000, -0x11FC0000,0x5E000000,0x5E000000,0x5E000000,0xDC0000,0xCC80000,0xBC0001,0x3000000,0x1400000,0x1940000,0x1D00000,0x25FC0000,0x2EC0000,0x3180000,0x1940000,0x5E000000,0x1940000,0xD40001,0x33C0000,0x23FC0000,0x6A000000,0x33C0000,0x23FC0000,0x6A000000,0x23FC0000,0x6A000000,0x6A000000,0x33C0000,0x23FC0000,0x6A000000,0x23FC0000,0x6A000000, -0x6A000000,0x23FC0000,0x6A000000,0x6A000000,0x6A000000,0x33C0000,0x23FC0000,0x6A000000,0x23FC0000,0x6A000000,0x6A000000,0x23FC0000,0x6A000000,0x6A000000,0x6A000000,0x23FC0000,0x6A000000,0x6A000000,0x6A000000,0x6A000000,0x10C0000,0xE40000,0xE40000,0x1680000,0x7F80000,0x45FC0000,0x6A000000,0x6A000000,0x1240000,0x1900000,0x61F40000,0x6A000000, -0x1C80000,0xB40274,0x98A800F3,0x74A800F4,0x6AA800F3,0x8EA00092,0x76A0004F,0x6CA00067,0x6EA00092,0x6A9C004E,0x649C0092,0x8C9400F4,0x7A940033,0x6C980051,0x7290004A,0x6A940002,0x6494004D,0x6A9000F4,0x668C0053,0x628C0069,0x5E9000F5,0x10C0274,0x807C00F3,0x6A9400F4,0x76780092,0x6A840049,0x64880090,0x746800F3,0x6A700032,0x6470004D,0x5E7C00F4,0xBF80274, -0x6A4800F3,0x64440090,0x5E3800F4,0x58000278,0xE6940004,0xFCA8010A,0xFEAC015F,0x98940002,0x80940002,0x72940002,0x6A940006,0x68900006,0xE67C0002,0x96880000,0x6C8C0035,0x6470004D,0x17C0274,0xC000F3,0x8CB40032,0x72B40032,0x6AB40032,0x82AC0049,0x74A80002,0x6AAC0005,0x6CA80049,0x68A8000E,0x64A80049,0x11C00F3,0x7A940032,0x6AA40032,0x72900049,0x6A940001, -0x64980049,0x13FC00F3,0x6A6C0032,0x64680048,0x5E0000F4,0x11C00F3,0x7A940032,0x6AA40032,0x72900049,0x6A940001,0x64980049,0x13FC00F3,0x6A6C0032,0x64680048,0x5E0000F4,0x13FC00F3,0x6A6C0032,0x64680048,0x5E0000F4,0x5E0000F4,0xE6940003,0xF0B00063,0xF4B80053,0x98940001,0x80940001,0x72940001,0x6A9C0001,0x6A8C0004,0xEC780001,0x96880000,0x6C880033,0x64680048, -0x19800F3,0xA800F3,0xA800F3,0xA800F3,0xA800F3,0x7CA0004A,0x7CA0004A,0x7CA0004A,0x669C004A,0x669C004A,0x5E9C004A,0x7A940033,0x7A940033,0x7A940033,0x68940002,0x68940002,0x6094000E,0x62900033,0x62900033,0x5E900005,0x58900035,0xF800F3,0xF800F3,0xF800F3,0x6A840049,0x6A840049,0x5E8C004A,0x68740032,0x68740032,0x5E7C0001,0x5A800034,0x1FC00F3, -0x1FC00F3,0x5E5C0048,0x58500034,0x540000F4,0xE2940003,0xF8A0005A,0xA800F3,0x98940001,0x7C940002,0x70940002,0x6E940003,0x66900001,0xD6800000,0x96880000,0x68900033,0x5E7C0001,0x16400F3,0xB40032,0xB40032,0xB40032,0xB40032,0x70AC0001,0x70AC0001,0x70AC0001,0x62AC0001,0x62AC0001,0x5EA80001,0x10C0032,0x10C0032,0x10C0032,0x66980001,0x66980001, -0x5EA00000,0xBF80032,0xBF80032,0x5E800000,0x58000034,0x10C0032,0x10C0032,0x10C0032,0x66980001,0x66980001,0x5EA00000,0xBF80032,0xBF80032,0x5E800000,0x58000034,0xBF80032,0xBF80032,0x5E800000,0x58000034,0x58000034,0xC49C0000,0xDEAC0001,0xB40032,0x98940000,0x769C0001,0x6E980000,0x689C0001,0x66900000,0xD6800000,0x908C0000,0x17C0032,0x5E800000, -0x17C0032,0xC8004A,0x80C00001,0x70BC0001,0x6ABC0001,0x12C0048,0x74A80001,0x6AB00001,0x1BF80048,0x6A8C0000,0x64000048,0x12C0048,0x74A80001,0x6AB00001,0x1BF80048,0x6A8C0000,0x64000048,0x1BF80048,0x6A8C0000,0x64000048,0x64000048,0x12C0048,0x74A80001,0x6AB00001,0x1BF80048,0x6A8C0000,0x64000048,0x1BF80048,0x6A8C0000,0x64000048,0x64000048,0x1BF80048, -0x6A8C0000,0x64000048,0x64000048,0x64000048,0xFA8C0000,0xD80048,0xF8C00002,0x9C900000,0x80900001,0x748C0000,0x6A9C0000,0x6A780000,0xF0740000,0x96880000,0x6CA00000,0x64000048,0x1AC0048,0x9C004A,0x9C004A,0x9C004A,0x9C004A,0x9C004A,0x9C004A,0x9C004A,0x9C004A,0x9C004A,0x9C004A,0x6A940001,0x6A940001,0x6A940001,0x6A940001,0x6A940001, -0x6A940001,0x5A900001,0x5A900001,0x5A900001,0x54900001,0x2E80048,0x2E80048,0x2E80048,0x2E80048,0x2E80048,0x2E80048,0x5E7C0001,0x5E7C0001,0x5E7C0001,0x54840001,0x1DC0048,0x1DC0048,0x1DC0048,0x54600000,0x4E000048,0xF2940001,0x9C004A,0x9C004A,0x94940001,0x7E940001,0x72940001,0x72940001,0x64940001,0xD2800000,0x96880000,0x5E900001,0x5E7C0001, -0x1500048,}; -static const uint32_t g_etc1_to_bc7_m6_table44[] = { -0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0x1F00000, -0x1F00000,0x1F00000,0x1F00000,0x50000001,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xEAC0000,0xEAC0000,0xEAC0000,0xF40000,0x15C0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000, -0x1180000,0x11F80000,0x11F80000,0x11F80000,0x5C000001,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x11F80000,0x11F80000,0x11F80000,0x5C000001,0x11F80000,0x11F80000,0x11F80000,0x5C000001,0x5C000001,0xC80000,0xBC0000,0xBC0000,0x4D80000,0xEC0000,0x1000000,0x1000000,0x13C0000,0x4D80000,0xEC0000,0x1900000,0x11F80000, -0x1900000,0xD00000,0xD00000,0xD00000,0xD00000,0x1340000,0x1340000,0x1340000,0x1FF80000,0x1FF80000,0x66000001,0x1340000,0x1340000,0x1340000,0x1FF80000,0x1FF80000,0x66000001,0x1FF80000,0x1FF80000,0x66000001,0x66000001,0x1340000,0x1340000,0x1340000,0x1FF80000,0x1FF80000,0x66000001,0x1FF80000,0x1FF80000,0x66000001,0x66000001,0x1FF80000, -0x1FF80000,0x66000001,0x66000001,0x66000001,0x2F00000,0x6DC0000,0xD00000,0x3180000,0x15C0000,0x1B80000,0x1FC0000,0x33F40000,0x1040000,0x1340000,0x1B80000,0x66000001,0x1B80000,0xE80000,0x1580000,0x31F80000,0x72000001,0x1580000,0x31F80000,0x72000001,0x31F80000,0x72000001,0x72000001,0x1580000,0x31F80000,0x72000001,0x31F80000,0x72000001, -0x72000001,0x31F80000,0x72000001,0x72000001,0x72000001,0x1580000,0x31F80000,0x72000001,0x31F80000,0x72000001,0x72000001,0x31F80000,0x72000001,0x72000001,0x72000001,0x31F80000,0x72000001,0x72000001,0x72000001,0x72000001,0x5200000,0xF80000,0xF80000,0x1840000,0x15FC0000,0x51F80000,0x72000001,0x72000001,0x13C0000,0x1B40000,0x6BE80000,0x72000001, -0x1EC0000,0xC40278,0xA0BC00F4,0x7EBC00F4,0x72BC00F5,0x9AB00090,0x82B0004D,0x76B40069,0x78B00090,0x72B0004D,0x6CB00092,0x96A400F3,0x84A40032,0x76AC0053,0x7AA40049,0x72A40002,0x6EA4004E,0x72A400F4,0x70A00051,0x6CA00067,0x68A400F3,0x3240274,0x889000F3,0x72A400F4,0x80880092,0x7494004A,0x6C9C0092,0x7E7800F3,0x72840033,0x6C84004F,0x688C00F4,0x17FC0274, -0x726000F4,0x6C580092,0x684400F3,0x62000274,0xF6A40005,0xF6BC0120,0xF8C0016C,0xA4A40000,0x8AA40001,0x7AA40002,0x74A80006,0x72A40006,0xEE8C0002,0x9C9C0002,0x74A00035,0x6C84004F,0x1A40274,0xD000F4,0x94C80034,0x7CC40034,0x72C40035,0x8EBC0048,0x7CBC0001,0x74BC0005,0x74BC004A,0x72B8000E,0x6CBC004A,0x13800F3,0x82A80032,0x74B40033,0x7AA40049,0x72A40002, -0x6CAC004A,0x21F800F3,0x72840033,0x6C7C004A,0x680000F3,0x13800F3,0x82A80032,0x74B40033,0x7AA40049,0x72A40002,0x6CAC004A,0x21F800F3,0x72840033,0x6C7C004A,0x680000F3,0x21F800F3,0x72840033,0x6C7C004A,0x680000F3,0x680000F3,0xEAA80003,0xFAC40060,0xFECC0054,0xA4A40000,0x8AA40001,0x7AA40002,0x74AC0001,0x729C0003,0xEE8C0001,0x9C9C0001,0x76980032,0x6C7C004A, -0x1BC00F3,0xBC00F4,0xBC00F4,0xBC00F4,0xBC00F4,0x88B00048,0x88B00048,0x88B00048,0x6EB00049,0x6EB00049,0x66B00049,0x84A40032,0x84A40032,0x84A40032,0x72A40001,0x72A40001,0x68A8000E,0x6AA40032,0x6AA40032,0x66A40005,0x62A40032,0x11400F3,0x11400F3,0x11400F3,0x74940049,0x74940049,0x66A00049,0x72840032,0x72840032,0x66900002,0x62940032,0xFF800F3, -0xFF800F3,0x66700049,0x625C0032,0x5C0000F3,0xEAA40004,0xF2B40060,0xBC00F4,0xA4A40000,0x86A40001,0x7AA40001,0x76A40004,0x6EA40001,0xE4900000,0x9C9C0001,0x72A00032,0x66900002,0x18C00F3,0xC40034,0xC40034,0xC40034,0xC40034,0x7ABC0000,0x7ABC0000,0x7ABC0000,0x6CBC0000,0x6CBC0000,0x66BC0001,0x3240032,0x3240032,0x3240032,0x6EAC0001,0x6EAC0001, -0x66B40001,0x17FC0032,0x17FC0032,0x66980001,0x62000032,0x3240032,0x3240032,0x3240032,0x6EAC0001,0x6EAC0001,0x66B40001,0x17FC0032,0x17FC0032,0x66980001,0x62000032,0x17FC0032,0x17FC0032,0x66980001,0x62000032,0x62000032,0xD2AC0000,0xF6BC0000,0xC40034,0xA4A40000,0x84A80000,0x78A80000,0x74AC0000,0x6EA40001,0xE4900000,0x9C9C0000,0x1A40032,0x66980001, -0x1A40032,0xDC0048,0x8CD00000,0x78D00001,0x72D00001,0x3440048,0x7CBC0001,0x72C40001,0x27FC0048,0x72A00001,0x6C00004A,0x3440048,0x7CBC0001,0x72C40001,0x27FC0048,0x72A00001,0x6C00004A,0x27FC0048,0x72A00001,0x6C00004A,0x6C00004A,0x3440048,0x7CBC0001,0x72C40001,0x27FC0048,0x72A00001,0x6C00004A,0x27FC0048,0x72A00001,0x6C00004A,0x6C00004A,0x27FC0048, -0x72A00001,0x6C00004A,0x6C00004A,0x6C00004A,0xF6A40001,0xCE80048,0xF2D40005,0xA4A40000,0x88A40001,0x7CA00000,0x72B00001,0x72900001,0xF6880000,0xA0980000,0x74B40001,0x6C00004A,0x1D40048,0xB00048,0xB00048,0xB00048,0xB00048,0xB00048,0xB00048,0xB00048,0xB00048,0xB00048,0xB00048,0x76A40000,0x76A40000,0x76A40000,0x76A40000,0x76A40000, -0x76A40000,0x62A40001,0x62A40001,0x62A40001,0x5CA40001,0x1040048,0x1040048,0x1040048,0x1040048,0x1040048,0x1040048,0x66900001,0x66900001,0x66900001,0x5C980001,0x7FC0048,0x7FC0048,0x7FC0048,0x5C740001,0x5600004A,0xFAA40001,0xB00048,0xB00048,0xA4A40000,0x8AA40000,0x7EA40000,0x7EA40000,0x6EA40000,0xE0900000,0xA4980000,0x6AA00000,0x66900001, -0x1740048,}; -static const uint32_t g_etc1_to_bc7_m6_table45[] = { -0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0xBF80000, -0xBF80000,0xBF80000,0xBF80000,0x58000001,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xC00000,0xC00000,0xC00000,0x10C0000,0x17C0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000, -0x1300000,0x1DF40000,0x1DF40000,0x1DF40000,0x64000001,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1DF40000,0x1DF40000,0x1DF40000,0x64000001,0x1DF40000,0x1DF40000,0x1DF40000,0x64000001,0x64000001,0x4D80000,0xCC0000,0xCC0000,0xEC0000,0x1000000,0x1140000,0x1140000,0x3540000,0xEC0000,0x1000000,0x1B00000,0x1DF40000, -0x1B00000,0xE00000,0xE00000,0xE00000,0xE00000,0x14C0000,0x14C0000,0x14C0000,0x2BF80000,0x2BF80000,0x6E000001,0x14C0000,0x14C0000,0x14C0000,0x2BF80000,0x2BF80000,0x6E000001,0x2BF80000,0x2BF80000,0x6E000001,0x6E000001,0x14C0000,0x14C0000,0x14C0000,0x2BF80000,0x2BF80000,0x6E000001,0x2BF80000,0x2BF80000,0x6E000001,0x6E000001,0x2BF80000, -0x2BF80000,0x6E000001,0x6E000001,0x6E000001,0x1040000,0xEEC0000,0xE00000,0x1300000,0x1780000,0x1DC0000,0xFFC0000,0x3DF80000,0x1180000,0x14C0000,0x1DC0000,0x6E000001,0x1DC0000,0xF80000,0x1700000,0x3DF80000,0x7A000001,0x1700000,0x3DF80000,0x7A000001,0x3DF80000,0x7A000001,0x7A000001,0x1700000,0x3DF80000,0x7A000001,0x3DF80000,0x7A000001, -0x7A000001,0x3DF80000,0x7A000001,0x7A000001,0x7A000001,0x1700000,0x3DF80000,0x7A000001,0x3DF80000,0x7A000001,0x7A000001,0x3DF80000,0x7A000001,0x7A000001,0x7A000001,0x3DF80000,0x7A000001,0x7A000001,0x7A000001,0x7A000001,0x5340000,0x1080000,0x1080000,0x1A00000,0x23FC0000,0x5BF80000,0x7A000001,0x7A000001,0x1500000,0x1D00000,0x73F80000,0x7A000001, -0x9FC0000,0xD40278,0xA8CC00F4,0x86CC00F4,0x7ACC00F5,0xA2C00090,0x8AC0004D,0x7EC40069,0x80C00090,0x7AC0004D,0x74C00092,0x9EB400F3,0x8CB40032,0x7EBC0053,0x82B40049,0x7AB40002,0x76B4004E,0x7AB400F4,0x78B00051,0x74B00067,0x70B400F3,0x33C0274,0x90A000F3,0x7AB400F4,0x88980092,0x7CA4004A,0x74AC0092,0x868800F3,0x7A940033,0x7494004F,0x709C00F4,0x23FC0274, -0x7A7000F4,0x74680092,0x705400F3,0x6A000274,0xFEB40005,0xFECC0120,0xF0D00181,0xACB40000,0x92B40001,0x82B40002,0x7CB80006,0x7AB40006,0xF69C0002,0xA4AC0002,0x7CB00035,0x7494004F,0x1C80274,0xE000F4,0x9CD80034,0x84D40034,0x7AD40035,0x96CC0048,0x84CC0001,0x7CCC0005,0x7CCC004A,0x7AC8000E,0x74CC004A,0x15000F3,0x8AB80032,0x7CC40033,0x82B40049,0x7AB40002, -0x74BC004A,0x2DF800F3,0x7A940033,0x748C004A,0x700000F3,0x15000F3,0x8AB80032,0x7CC40033,0x82B40049,0x7AB40002,0x74BC004A,0x2DF800F3,0x7A940033,0x748C004A,0x700000F3,0x2DF800F3,0x7A940033,0x748C004A,0x700000F3,0x700000F3,0xF2B80003,0xF2D4006A,0xF6DC005D,0xACB40000,0x92B40001,0x82B40002,0x7CBC0001,0x7AAC0003,0xF69C0001,0xA4AC0001,0x7EA80032,0x748C004A, -0x1E000F3,0xCC00F4,0xCC00F4,0xCC00F4,0xCC00F4,0x90C00048,0x90C00048,0x90C00048,0x76C00049,0x76C00049,0x6EC00049,0x8CB40032,0x8CB40032,0x8CB40032,0x7AB40001,0x7AB40001,0x70B8000E,0x72B40032,0x72B40032,0x6EB40005,0x6AB40032,0x12C00F3,0x12C00F3,0x12C00F3,0x7CA40049,0x7CA40049,0x6EB00049,0x7A940032,0x7A940032,0x6EA00002,0x6AA40032,0x1BF800F3, -0x1BF800F3,0x6E800049,0x6A6C0032,0x640000F3,0xF2B40004,0xFAC40060,0xCC00F4,0xACB40000,0x8EB40001,0x82B40001,0x7EB40004,0x76B40001,0xECA00000,0xA4AC0001,0x7AB00032,0x6EA00002,0x1AC00F3,0xD40034,0xD40034,0xD40034,0xD40034,0x82CC0000,0x82CC0000,0x82CC0000,0x74CC0000,0x74CC0000,0x6ECC0001,0x33C0032,0x33C0032,0x33C0032,0x76BC0001,0x76BC0001, -0x6EC40001,0x23FC0032,0x23FC0032,0x6EA80001,0x6A000032,0x33C0032,0x33C0032,0x33C0032,0x76BC0001,0x76BC0001,0x6EC40001,0x23FC0032,0x23FC0032,0x6EA80001,0x6A000032,0x23FC0032,0x23FC0032,0x6EA80001,0x6A000032,0x6A000032,0xDABC0000,0xFECC0000,0xD40034,0xACB40000,0x8CB80000,0x80B80000,0x7CBC0000,0x76B40001,0xECA00000,0xA4AC0000,0x1C80032,0x6EA80001, -0x1C80032,0xEC0048,0x94E00000,0x80E00001,0x7AE00001,0x35C0048,0x84CC0001,0x7AD40001,0x33FC0048,0x7AB00001,0x7400004A,0x35C0048,0x84CC0001,0x7AD40001,0x33FC0048,0x7AB00001,0x7400004A,0x33FC0048,0x7AB00001,0x7400004A,0x7400004A,0x35C0048,0x84CC0001,0x7AD40001,0x33FC0048,0x7AB00001,0x7400004A,0x33FC0048,0x7AB00001,0x7400004A,0x7400004A,0x33FC0048, -0x7AB00001,0x7400004A,0x7400004A,0x7400004A,0xFEB40001,0xFC0048,0xFAE40005,0xACB40000,0x90B40001,0x84B00000,0x7AC00001,0x7AA00001,0xFE980000,0xA8A80000,0x7CC40001,0x7400004A,0x1F40048,0xC00048,0xC00048,0xC00048,0xC00048,0xC00048,0xC00048,0xC00048,0xC00048,0xC00048,0xC00048,0x7EB40000,0x7EB40000,0x7EB40000,0x7EB40000,0x7EB40000, -0x7EB40000,0x6AB40001,0x6AB40001,0x6AB40001,0x64B40001,0x11C0048,0x11C0048,0x11C0048,0x11C0048,0x11C0048,0x11C0048,0x6EA00001,0x6EA00001,0x6EA00001,0x64A80001,0x13FC0048,0x13FC0048,0x13FC0048,0x64840001,0x5E00004A,0xF2B40004,0xC00048,0xC00048,0xACB40000,0x92B40000,0x86B40000,0x86B40000,0x76B40000,0xE8A00000,0xACA80000,0x72B00000,0x6EA00001, -0x1980048,}; -static const uint32_t g_etc1_to_bc7_m6_table46[] = { -0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x17F80000, -0x17F80000,0x17F80000,0x17F80000,0x60000001,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xD00000,0xD00000,0xD00000,0x1240000,0x1A00000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0x3440000,0x3440000,0x3440000,0x3440000,0x3440000, -0x3440000,0x27FC0000,0x27FC0000,0x27FC0000,0x6C000001,0x3440000,0x3440000,0x3440000,0x3440000,0x3440000,0x3440000,0x27FC0000,0x27FC0000,0x27FC0000,0x6C000001,0x27FC0000,0x27FC0000,0x27FC0000,0x6C000001,0x6C000001,0xCE80000,0xDC0000,0xDC0000,0x1000000,0x1140000,0x12C0000,0x12C0000,0x1700000,0x1000000,0x1140000,0x1D40000,0x27FC0000, -0x1D40000,0xF00000,0xF00000,0xF00000,0xF00000,0x1640000,0x1640000,0x1640000,0x37F80000,0x37F80000,0x76000001,0x1640000,0x1640000,0x1640000,0x37F80000,0x37F80000,0x76000001,0x37F80000,0x37F80000,0x76000001,0x76000001,0x1640000,0x1640000,0x1640000,0x37F80000,0x37F80000,0x76000001,0x37F80000,0x37F80000,0x76000001,0x76000001,0x37F80000, -0x37F80000,0x76000001,0x76000001,0x76000001,0x1180000,0x1000000,0xF00000,0x3440000,0x1940000,0x1FC0000,0x1DF80000,0x47FC0000,0x12C0000,0x1640000,0x1FC0000,0x76000001,0x1FC0000,0x1080000,0x1880000,0x49F80000,0x82000001,0x1880000,0x49F80000,0x82000001,0x49F80000,0x82000001,0x82000001,0x1880000,0x49F80000,0x82000001,0x49F80000,0x82000001, -0x82000001,0x49F80000,0x82000001,0x82000001,0x82000001,0x1880000,0x49F80000,0x82000001,0x49F80000,0x82000001,0x82000001,0x49F80000,0x82000001,0x82000001,0x82000001,0x49F80000,0x82000001,0x82000001,0x82000001,0x82000001,0x14C0000,0x5180000,0x5180000,0x1BC0000,0x31FC0000,0x65F80000,0x82000001,0x82000001,0x1680000,0x1F00000,0x7DCC0000,0x82000001, -0x19FC0000,0xE40278,0xB0DC00F4,0x8EDC00F4,0x82DC00F5,0xAAD00090,0x92D0004D,0x86D40069,0x88D00090,0x82D0004D,0x7CD00092,0xA6C400F3,0x94C40032,0x86CC0053,0x8AC40049,0x82C40002,0x7EC4004E,0x82C400F4,0x80C00051,0x7CC00067,0x78C400F3,0x1540274,0x98B000F3,0x82C400F4,0x90A80092,0x84B4004A,0x7CBC0092,0x8E9800F3,0x82A40033,0x7CA4004F,0x78AC00F4,0x2FFC0274, -0x828000F4,0x7C780092,0x786400F3,0x72000274,0xFAC80007,0xF6DC013A,0xF8E00181,0xB4C40000,0x9AC40001,0x8AC40002,0x84C80006,0x82C40006,0xFEAC0002,0xACBC0002,0x84C00035,0x7CA4004F,0x1E80274,0xF000F4,0xA4E80034,0x8CE40034,0x82E40035,0x9EDC0048,0x8CDC0001,0x84DC0005,0x84DC004A,0x82D8000E,0x7CDC004A,0x16800F3,0x92C80032,0x84D40033,0x8AC40049,0x82C40002, -0x7CCC004A,0x39F800F3,0x82A40033,0x7C9C004A,0x780000F3,0x16800F3,0x92C80032,0x84D40033,0x8AC40049,0x82C40002,0x7CCC004A,0x39F800F3,0x82A40033,0x7C9C004A,0x780000F3,0x39F800F3,0x82A40033,0x7C9C004A,0x780000F3,0x780000F3,0xFAC80003,0xFAE4006A,0xFEEC005D,0xB4C40000,0x9AC40001,0x8AC40002,0x84CC0001,0x82BC0003,0xFEAC0001,0xACBC0001,0x86B80032,0x7C9C004A, -0x3FC00F3,0xDC00F4,0xDC00F4,0xDC00F4,0xDC00F4,0x98D00048,0x98D00048,0x98D00048,0x7ED00049,0x7ED00049,0x76D00049,0x94C40032,0x94C40032,0x94C40032,0x82C40001,0x82C40001,0x78C8000E,0x7AC40032,0x7AC40032,0x76C40005,0x72C40032,0x14400F3,0x14400F3,0x14400F3,0x84B40049,0x84B40049,0x76C00049,0x82A40032,0x82A40032,0x76B00002,0x72B40032,0x27F800F3, -0x27F800F3,0x76900049,0x727C0032,0x6C0000F3,0xFAC40004,0xF2D40069,0xDC00F4,0xB4C40000,0x96C40001,0x8AC40001,0x86C40004,0x7EC40001,0xF4B00000,0xACBC0001,0x82C00032,0x76B00002,0x1D000F3,0xE40034,0xE40034,0xE40034,0xE40034,0x8ADC0000,0x8ADC0000,0x8ADC0000,0x7CDC0000,0x7CDC0000,0x76DC0001,0x1540032,0x1540032,0x1540032,0x7ECC0001,0x7ECC0001, -0x76D40001,0x2FFC0032,0x2FFC0032,0x76B80001,0x72000032,0x1540032,0x1540032,0x1540032,0x7ECC0001,0x7ECC0001,0x76D40001,0x2FFC0032,0x2FFC0032,0x76B80001,0x72000032,0x2FFC0032,0x2FFC0032,0x76B80001,0x72000032,0x72000032,0xE2CC0000,0xF6DC0001,0xE40034,0xB4C40000,0x94C80000,0x88C80000,0x84CC0000,0x7EC40001,0xF4B00000,0xACBC0000,0x1E80032,0x76B80001, -0x1E80032,0xFC0048,0x9CF00000,0x88F00001,0x82F00001,0x3740048,0x8CDC0001,0x82E40001,0x3FFC0048,0x82C00001,0x7C00004A,0x3740048,0x8CDC0001,0x82E40001,0x3FFC0048,0x82C00001,0x7C00004A,0x3FFC0048,0x82C00001,0x7C00004A,0x7C00004A,0x3740048,0x8CDC0001,0x82E40001,0x3FFC0048,0x82C00001,0x7C00004A,0x3FFC0048,0x82C00001,0x7C00004A,0x7C00004A,0x3FFC0048, -0x82C00001,0x7C00004A,0x7C00004A,0x7C00004A,0xFAC80002,0x10C0048,0xF2F40008,0xB4C40000,0x98C40001,0x8CC00000,0x82D00001,0x82B00001,0xFEAC0001,0xB0B80000,0x84D40001,0x7C00004A,0xDFC0048,0xD00048,0xD00048,0xD00048,0xD00048,0xD00048,0xD00048,0xD00048,0xD00048,0xD00048,0xD00048,0x86C40000,0x86C40000,0x86C40000,0x86C40000,0x86C40000, -0x86C40000,0x72C40001,0x72C40001,0x72C40001,0x6CC40001,0x1340048,0x1340048,0x1340048,0x1340048,0x1340048,0x1340048,0x76B00001,0x76B00001,0x76B00001,0x6CB80001,0x1FF80048,0x1FF80048,0x1FF80048,0x6C940001,0x6600004A,0xFAC40004,0xD00048,0xD00048,0xB4C40000,0x9AC40000,0x8EC40000,0x8EC40000,0x7EC40000,0xF0B00000,0xB4B80000,0x7AC00000,0x76B00001, -0x1B80048,}; -static const uint32_t g_etc1_to_bc7_m6_table47[] = { -0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x21FC0000, -0x21FC0000,0x21FC0000,0x21FC0000,0x68000001,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0x8E00000,0x8E00000,0x8E00000,0x13C0000,0x1C00000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000, -0x35C0000,0x33FC0000,0x33FC0000,0x33FC0000,0x74000001,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x33FC0000,0x33FC0000,0x33FC0000,0x74000001,0x33FC0000,0x33FC0000,0x33FC0000,0x74000001,0x74000001,0xFC0000,0xEC0000,0xEC0000,0x5100000,0x1280000,0x1400000,0x1400000,0x18C0000,0x5100000,0x1280000,0x1F40000,0x33FC0000, -0x1F40000,0x1000000,0x1000000,0x1000000,0x1000000,0x17C0000,0x17C0000,0x17C0000,0x43F80000,0x43F80000,0x7E000001,0x17C0000,0x17C0000,0x17C0000,0x43F80000,0x43F80000,0x7E000001,0x43F80000,0x43F80000,0x7E000001,0x7E000001,0x17C0000,0x17C0000,0x17C0000,0x43F80000,0x43F80000,0x7E000001,0x43F80000,0x43F80000,0x7E000001,0x7E000001,0x43F80000, -0x43F80000,0x7E000001,0x7E000001,0x7E000001,0x3280000,0x1100000,0x1000000,0x15C0000,0x3AC0000,0x11FC0000,0x29FC0000,0x53F80000,0x1400000,0x17C0000,0x11FC0000,0x7E000001,0x11FC0000,0x1180000,0x1A00000,0x55F80000,0x8A000001,0x1A00000,0x55F80000,0x8A000001,0x55F80000,0x8A000001,0x8A000001,0x1A00000,0x55F80000,0x8A000001,0x55F80000,0x8A000001, -0x8A000001,0x55F80000,0x8A000001,0x8A000001,0x8A000001,0x1A00000,0x55F80000,0x8A000001,0x55F80000,0x8A000001,0x8A000001,0x55F80000,0x8A000001,0x8A000001,0x8A000001,0x55F80000,0x8A000001,0x8A000001,0x8A000001,0x8A000001,0x1600000,0xD280000,0xD280000,0x3D40000,0x3FF80000,0x6FF80000,0x8A000001,0x8A000001,0x17C0000,0xBFC0000,0x85DC0000,0x8A000001, -0x27FC0000,0xF40278,0xB8EC00F4,0x96EC00F4,0x8AEC00F5,0xB2E00090,0x9AE0004D,0x8EE40069,0x90E00090,0x8AE0004D,0x84E00092,0xAED400F3,0x9CD40032,0x8EDC0053,0x92D40049,0x8AD40002,0x86D4004E,0x8AD400F4,0x88D00051,0x84D00067,0x80D400F3,0x16C0274,0xA0C000F3,0x8AD400F4,0x98B80092,0x8CC4004A,0x84CC0092,0x96A800F3,0x8AB40033,0x84B4004F,0x80BC00F4,0x3BFC0274, -0x8A9000F4,0x84880092,0x807400F3,0x7A000274,0xFED8000A,0xFEEC013A,0xF0F00198,0xBCD40000,0xA2D40001,0x92D40002,0x8CD80006,0x8AD40006,0xFCC00004,0xB4CC0002,0x8CD00035,0x84B4004F,0x7FC0274,0x10000F4,0xACF80034,0x94F40034,0x8AF40035,0xA6EC0048,0x94EC0001,0x8CEC0005,0x8CEC004A,0x8AE8000E,0x84EC004A,0x18000F3,0x9AD80032,0x8CE40033,0x92D40049,0x8AD40002, -0x84DC004A,0x45F800F3,0x8AB40033,0x84AC004A,0x800000F3,0x18000F3,0x9AD80032,0x8CE40033,0x92D40049,0x8AD40002,0x84DC004A,0x45F800F3,0x8AB40033,0x84AC004A,0x800000F3,0x45F800F3,0x8AB40033,0x84AC004A,0x800000F3,0x800000F3,0xF6DC0006,0xF4F80074,0xF6FC0068,0xBCD40000,0xA2D40001,0x92D40002,0x8CDC0001,0x8ACC0003,0xFCC00004,0xB4CC0001,0x8EC80032,0x84AC004A, -0x13FC00F3,0xEC00F4,0xEC00F4,0xEC00F4,0xEC00F4,0xA0E00048,0xA0E00048,0xA0E00048,0x86E00049,0x86E00049,0x7EE00049,0x9CD40032,0x9CD40032,0x9CD40032,0x8AD40001,0x8AD40001,0x80D8000E,0x82D40032,0x82D40032,0x7ED40005,0x7AD40032,0x15C00F3,0x15C00F3,0x15C00F3,0x8CC40049,0x8CC40049,0x7ED00049,0x8AB40032,0x8AB40032,0x7EC00002,0x7AC40032,0x33F800F3, -0x33F800F3,0x7EA00049,0x7A8C0032,0x740000F3,0xF6D80005,0xFAE40069,0xEC00F4,0xBCD40000,0x9ED40001,0x92D40001,0x8ED40004,0x86D40001,0xFCC00000,0xB4CC0001,0x8AD00032,0x7EC00002,0x1F000F3,0xF40034,0xF40034,0xF40034,0xF40034,0x92EC0000,0x92EC0000,0x92EC0000,0x84EC0000,0x84EC0000,0x7EEC0001,0x16C0032,0x16C0032,0x16C0032,0x86DC0001,0x86DC0001, -0x7EE40001,0x3BFC0032,0x3BFC0032,0x7EC80001,0x7A000032,0x16C0032,0x16C0032,0x16C0032,0x86DC0001,0x86DC0001,0x7EE40001,0x3BFC0032,0x3BFC0032,0x7EC80001,0x7A000032,0x3BFC0032,0x3BFC0032,0x7EC80001,0x7A000032,0x7A000032,0xEADC0000,0xFEEC0001,0xF40034,0xBCD40000,0x9CD80000,0x90D80000,0x8CDC0000,0x86D40001,0xFCC00000,0xB4CC0000,0x7FC0032,0x7EC80001, -0x7FC0032,0x10C0048,0xA5000000,0x91000001,0x8B000001,0x38C0048,0x94EC0001,0x8AF40001,0x4BFC0048,0x8AD00001,0x8400004A,0x38C0048,0x94EC0001,0x8AF40001,0x4BFC0048,0x8AD00001,0x8400004A,0x4BFC0048,0x8AD00001,0x8400004A,0x8400004A,0x38C0048,0x94EC0001,0x8AF40001,0x4BFC0048,0x8AD00001,0x8400004A,0x4BFC0048,0x8AD00001,0x8400004A,0x8400004A,0x4BFC0048, -0x8AD00001,0x8400004A,0x8400004A,0x8400004A,0xF2E00005,0x71C0048,0xFB040008,0xBCD40000,0xA0D40001,0x94D00000,0x8AE00001,0x8AC00001,0xF4C80002,0xB8C80000,0x8CE40001,0x8400004A,0x1DF80048,0xE00048,0xE00048,0xE00048,0xE00048,0xE00048,0xE00048,0xE00048,0xE00048,0xE00048,0xE00048,0x8ED40000,0x8ED40000,0x8ED40000,0x8ED40000,0x8ED40000, -0x8ED40000,0x7AD40001,0x7AD40001,0x7AD40001,0x74D40001,0x14C0048,0x14C0048,0x14C0048,0x14C0048,0x14C0048,0x14C0048,0x7EC00001,0x7EC00001,0x7EC00001,0x74C80001,0x2BF80048,0x2BF80048,0x2BF80048,0x74A40001,0x6E00004A,0xF4D80005,0xE00048,0xE00048,0xBCD40000,0xA2D40000,0x96D40000,0x96D40000,0x86D40000,0xF8C00000,0xBCC80000,0x82D00000,0x7EC00001, -0x1DC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table48[] = { -0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x2FFC0000, -0x2FFC0000,0x2FFC0000,0x2FFC0000,0x72000000,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0x2F40000,0x2F40000,0x2F40000,0x1540000,0x1E80000,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000, -0x1780000,0x41FC0000,0x41FC0000,0x41FC0000,0x7E000000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x41FC0000,0x41FC0000,0x41FC0000,0x7E000000,0x41FC0000,0x41FC0000,0x41FC0000,0x7E000000,0x7E000000,0xF0C0000,0xFC0001,0xFC0001,0x1280000,0x33C0000,0x1580000,0x1580000,0x1AC0000,0x1280000,0x33C0000,0xFFC0000,0x41FC0000, -0xFFC0000,0x1100001,0x1100001,0x1100001,0x1100001,0x1980000,0x1980000,0x1980000,0x51F80000,0x51F80000,0x88000000,0x1980000,0x1980000,0x1980000,0x51F80000,0x51F80000,0x88000000,0x51F80000,0x51F80000,0x88000000,0x88000000,0x1980000,0x1980000,0x1980000,0x51F80000,0x51F80000,0x88000000,0x51F80000,0x51F80000,0x88000000,0x88000000,0x51F80000, -0x51F80000,0x88000000,0x88000000,0x88000000,0x73C0000,0x1240000,0x1100001,0x1740000,0x1CC0000,0x21FC0000,0x39FC0000,0x5FF80000,0x1580000,0x1980000,0x21FC0000,0x88000000,0x21FC0000,0x1280001,0x1BC0000,0x61FC0000,0x94000000,0x1BC0000,0x61FC0000,0x94000000,0x61FC0000,0x94000000,0x94000000,0x1BC0000,0x61FC0000,0x94000000,0x61FC0000,0x94000000, -0x94000000,0x61FC0000,0x94000000,0x94000000,0x94000000,0x1BC0000,0x61FC0000,0x94000000,0x61FC0000,0x94000000,0x94000000,0x61FC0000,0x94000000,0x94000000,0x94000000,0x61FC0000,0x94000000,0x94000000,0x94000000,0x94000000,0x3740000,0x73C0000,0x73C0000,0x1F40000,0x4DFC0000,0x7BF40000,0x94000000,0x94000000,0x1940000,0x1DFC0000,0x8FD00000,0x94000000, -0x39FC0000,0x1080274,0xC2FC00F3,0x9EFC00F4,0x94FC00F3,0xB8F40092,0xA0F4004F,0x96F40067,0x98F40092,0x94F0004E,0x8EF00092,0xB6E800F4,0xA4E80033,0x96EC0051,0x9CE4004A,0x94E80002,0x8EE8004D,0x94E400F4,0x90E00053,0x8CE00069,0x88E400F5,0x1880274,0xAAD000F3,0x94E800F4,0xA0CC0092,0x94D80049,0x8EDC0090,0x9EBC00F3,0x94C40032,0x8EC4004D,0x88D000F4,0x49F80274, -0x949C00F3,0x8E980090,0x888C00F4,0x82000278,0xFEEC0012,0xF9000154,0xFB040194,0xC2E80002,0xAAE80002,0x9CE80002,0x94E80006,0x92E40006,0xFCD80009,0xC0DC0000,0x96E00035,0x8EC4004D,0x19FC0274,0x11400F3,0xB7080032,0x9D080032,0x95080032,0xAD000049,0x9EFC0002,0x95000005,0x96FC0049,0x92FC000E,0x8EFC0049,0x39800F3,0xA4E80032,0x94F80032,0x9CE40049,0x94E80001, -0x8EEC0049,0x51FC00F3,0x94C00032,0x8EBC0048,0x880000F4,0x39800F3,0xA4E80032,0x94F80032,0x9CE40049,0x94E80001,0x8EEC0049,0x51FC00F3,0x94C00032,0x8EBC0048,0x880000F4,0x51FC00F3,0x94C00032,0x8EBC0048,0x880000F4,0x880000F4,0xFAF00006,0xFD080073,0xFF0C006B,0xC2E80001,0xAAE80001,0x9CE80001,0x94F00001,0x94E00004,0xFCD80005,0xC0DC0000,0x96DC0033,0x8EBC0048, -0x23FC00F3,0xFC00F3,0xFC00F3,0xFC00F3,0xFC00F3,0xA6F4004A,0xA6F4004A,0xA6F4004A,0x90F0004A,0x90F0004A,0x88F0004A,0xA4E80033,0xA4E80033,0xA4E80033,0x92E80002,0x92E80002,0x8AE8000E,0x8CE40033,0x8CE40033,0x88E40005,0x82E40035,0x37400F3,0x37400F3,0x37400F3,0x94D80049,0x94D80049,0x88E0004A,0x92C80032,0x92C80032,0x88D00001,0x84D40034,0x3FFC00F3, -0x3FFC00F3,0x88B00048,0x82A40034,0x7E0000F4,0xFEE80006,0xF4F80073,0xFC00F3,0xC2E80001,0xA6E80002,0x9AE80002,0x98E80003,0x90E40001,0xFCD40001,0xC0DC0000,0x92E40033,0x88D00001,0xDFC00F3,0x1080032,0x1080032,0x1080032,0x1080032,0x9B000001,0x9B000001,0x9B000001,0x8D000001,0x8D000001,0x88FC0001,0x1880032,0x1880032,0x1880032,0x90EC0001,0x90EC0001, -0x88F40000,0x49F80032,0x49F80032,0x88D40000,0x82000034,0x1880032,0x1880032,0x1880032,0x90EC0001,0x90EC0001,0x88F40000,0x49F80032,0x49F80032,0x88D40000,0x82000034,0x49F80032,0x49F80032,0x88D40000,0x82000034,0x82000034,0xEEF00000,0xF9000002,0x1080032,0xC2E80000,0xA0F00001,0x98EC0000,0x92F00001,0x90E40000,0xF0DC0001,0xBAE00000,0x19FC0032,0x88D40000, -0x19FC0032,0x11C004A,0xAB140001,0x9B100001,0x95100001,0x1A80048,0x9EFC0001,0x95040001,0x59FC0048,0x94E00000,0x8E000048,0x1A80048,0x9EFC0001,0x95040001,0x59FC0048,0x94E00000,0x8E000048,0x59FC0048,0x94E00000,0x8E000048,0x8E000048,0x1A80048,0x9EFC0001,0x95040001,0x59FC0048,0x94E00000,0x8E000048,0x59FC0048,0x94E00000,0x8E000048,0x8E000048,0x59FC0048, -0x94E00000,0x8E000048,0x8E000048,0x8E000048,0xFAF00005,0x1300048,0xF518000D,0xC6E40000,0xAAE40001,0x9EE00000,0x94F00000,0x94CC0000,0xFCD80004,0xC0DC0000,0x96F40000,0x8E000048,0x2DFC0048,0xF0004A,0xF0004A,0xF0004A,0xF0004A,0xF0004A,0xF0004A,0xF0004A,0xF0004A,0xF0004A,0xF0004A,0x94E80001,0x94E80001,0x94E80001,0x94E80001,0x94E80001, -0x94E80001,0x84E40001,0x84E40001,0x84E40001,0x7EE40001,0x1680048,0x1680048,0x1680048,0x1680048,0x1680048,0x1680048,0x88D00001,0x88D00001,0x88D00001,0x7ED80001,0x39F80048,0x39F80048,0x39F80048,0x7EB40000,0x78000048,0xFCE80005,0xF0004A,0xF0004A,0xBEE80001,0xA8E80001,0x9CE80001,0x9CE80001,0x8EE80001,0xFCD40000,0xC0DC0000,0x88E40001,0x88D00001, -0x3FC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table49[] = { -0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x3BFC0000, -0x3BFC0000,0x3BFC0000,0x3BFC0000,0x7A000000,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xB040000,0xB040000,0xB040000,0x16C0000,0x7FC0000,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000, -0x1900000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x86000000,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x86000000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x86000000,0x86000000,0x1200000,0x10C0001,0x10C0001,0x5380000,0x5500000,0x1700000,0x1700000,0x1C40000,0x5380000,0x5500000,0x1FF80000,0x4DFC0000, -0x1FF80000,0x1200001,0x1200001,0x1200001,0x1200001,0x1B00000,0x1B00000,0x1B00000,0x5DF40000,0x5DF40000,0x90000000,0x1B00000,0x1B00000,0x1B00000,0x5DF40000,0x5DF40000,0x90000000,0x5DF40000,0x5DF40000,0x90000000,0x90000000,0x1B00000,0x1B00000,0x1B00000,0x5DF40000,0x5DF40000,0x90000000,0x5DF40000,0x5DF40000,0x90000000,0x90000000,0x5DF40000, -0x5DF40000,0x90000000,0x90000000,0x90000000,0x1500000,0x3340000,0x1200001,0x3880000,0x1E80000,0x31FC0000,0x47F80000,0x69FC0000,0x16C0000,0x1B00000,0x31FC0000,0x90000000,0x31FC0000,0x1380001,0x3D00000,0x6DFC0000,0x9C000000,0x3D00000,0x6DFC0000,0x9C000000,0x6DFC0000,0x9C000000,0x9C000000,0x3D00000,0x6DFC0000,0x9C000000,0x6DFC0000,0x9C000000, -0x9C000000,0x6DFC0000,0x9C000000,0x9C000000,0x9C000000,0x3D00000,0x6DFC0000,0x9C000000,0x6DFC0000,0x9C000000,0x9C000000,0x6DFC0000,0x9C000000,0x9C000000,0x9C000000,0x6DFC0000,0x9C000000,0x9C000000,0x9C000000,0x9C000000,0x3880000,0xF4C0000,0xF4C0000,0xFFC0000,0x5BFC0000,0x85F40000,0x9C000000,0x9C000000,0x1AC0000,0x2FFC0000,0x97E00000,0x9C000000, -0x47FC0000,0x1180274,0xCB0C00F3,0xA70C00F4,0x9D0C00F3,0xC1040092,0xA904004F,0x9F040067,0xA1040092,0x9D00004E,0x97000092,0xBEF800F4,0xACF80033,0x9EFC0051,0xA4F4004A,0x9CF80002,0x96F8004D,0x9CF400F4,0x98F00053,0x94F00069,0x90F400F5,0x1A00274,0xB2E000F3,0x9CF800F4,0xA8DC0092,0x9CE80049,0x96EC0090,0xA6CC00F3,0x9CD40032,0x96D4004D,0x90E000F4,0x55F80274, -0x9CAC00F3,0x96A80090,0x909C00F4,0x8A000278,0xFCFC0024,0xFF0C0164,0xF31401AB,0xCAF80002,0xB2F80002,0xA4F80002,0x9CF80006,0x9AF40006,0xFCEC0016,0xC8EC0000,0x9EF00035,0x96D4004D,0x27FC0274,0x12400F3,0xBF180032,0xA5180032,0x9D180032,0xB5100049,0xA70C0002,0x9D100005,0x9F0C0049,0x9B0C000E,0x970C0049,0x3B000F3,0xACF80032,0x9D080032,0xA4F40049,0x9CF80001, -0x96FC0049,0x5DFC00F3,0x9CD00032,0x96CC0048,0x900000F4,0x3B000F3,0xACF80032,0x9D080032,0xA4F40049,0x9CF80001,0x96FC0049,0x5DFC00F3,0x9CD00032,0x96CC0048,0x900000F4,0x5DFC00F3,0x9CD00032,0x96CC0048,0x900000F4,0x900000F4,0xFF00000B,0xF5180081,0xF9200076,0xCAF80001,0xB2F80001,0xA4F80001,0x9D000001,0x9CF00004,0xFCEC000D,0xC8EC0000,0x9EEC0033,0x96CC0048, -0x33FC00F3,0x10C00F3,0x10C00F3,0x10C00F3,0x10C00F3,0xAF04004A,0xAF04004A,0xAF04004A,0x9900004A,0x9900004A,0x9100004A,0xACF80033,0xACF80033,0xACF80033,0x9AF80002,0x9AF80002,0x92F8000E,0x94F40033,0x94F40033,0x90F40005,0x8AF40035,0x38C00F3,0x38C00F3,0x38C00F3,0x9CE80049,0x9CE80049,0x90F0004A,0x9AD80032,0x9AD80032,0x90E00001,0x8CE40034,0x4BFC00F3, -0x4BFC00F3,0x90C00048,0x8AB40034,0x860000F4,0xFAFC000B,0xFD080073,0x10C00F3,0xCAF80001,0xAEF80002,0xA2F80002,0xA0F80003,0x98F40001,0xF8E80005,0xC8EC0000,0x9AF40033,0x90E00001,0x1DF800F3,0x1180032,0x1180032,0x1180032,0x1180032,0xA3100001,0xA3100001,0xA3100001,0x95100001,0x95100001,0x910C0001,0x1A00032,0x1A00032,0x1A00032,0x98FC0001,0x98FC0001, -0x91040000,0x55F80032,0x55F80032,0x90E40000,0x8A000034,0x1A00032,0x1A00032,0x1A00032,0x98FC0001,0x98FC0001,0x91040000,0x55F80032,0x55F80032,0x90E40000,0x8A000034,0x55F80032,0x55F80032,0x90E40000,0x8A000034,0x8A000034,0xF7000000,0xF1100005,0x1180032,0xCAF80000,0xA9000001,0xA0FC0000,0x9B000001,0x98F40000,0xF8EC0001,0xC2F00000,0x27FC0032,0x90E40000, -0x27FC0032,0x12C004A,0xB3240001,0xA3200001,0x9D200001,0x1C00048,0xA70C0001,0x9D140001,0x65F80048,0x9CF00000,0x96000048,0x1C00048,0xA70C0001,0x9D140001,0x65F80048,0x9CF00000,0x96000048,0x65F80048,0x9CF00000,0x96000048,0x96000048,0x1C00048,0xA70C0001,0x9D140001,0x65F80048,0x9CF00000,0x96000048,0x65F80048,0x9CF00000,0x96000048,0x96000048,0x65F80048, -0x9CF00000,0x96000048,0x96000048,0x96000048,0xFB040008,0x9400048,0xFD28000D,0xCEF40000,0xB2F40001,0xA6F00000,0x9D000000,0x9CDC0000,0xFEF00005,0xC8EC0000,0x9F040000,0x96000048,0x3DF80048,0x100004A,0x100004A,0x100004A,0x100004A,0x100004A,0x100004A,0x100004A,0x100004A,0x100004A,0x100004A,0x9CF80001,0x9CF80001,0x9CF80001,0x9CF80001,0x9CF80001, -0x9CF80001,0x8CF40001,0x8CF40001,0x8CF40001,0x86F40001,0x1800048,0x1800048,0x1800048,0x1800048,0x1800048,0x1800048,0x90E00001,0x90E00001,0x90E00001,0x86E80001,0x45F80048,0x45F80048,0x45F80048,0x86C40000,0x80000048,0xF4F8000A,0x100004A,0x100004A,0xC6F80001,0xB0F80001,0xA4F80001,0xA4F80001,0x96F80001,0xF8E80001,0xC8EC0000,0x90F40001,0x90E00001, -0x13FC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table50[] = { -0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x47FC0000, -0x47FC0000,0x47FC0000,0x47FC0000,0x82000000,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1180000,0x1180000,0x1180000,0x1840000,0x17FC0000,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000, -0x1A80000,0x59FC0000,0x59FC0000,0x59FC0000,0x8E000000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x59FC0000,0x59FC0000,0x59FC0000,0x8E000000,0x59FC0000,0x59FC0000,0x59FC0000,0x8E000000,0x8E000000,0x1300000,0x11C0001,0x11C0001,0x14C0000,0x5640000,0x1840000,0x1840000,0x1E00000,0x14C0000,0x5640000,0x2DFC0000,0x59FC0000, -0x2DFC0000,0x1300001,0x1300001,0x1300001,0x1300001,0x3C40000,0x3C40000,0x3C40000,0x67FC0000,0x67FC0000,0x98000000,0x3C40000,0x3C40000,0x3C40000,0x67FC0000,0x67FC0000,0x98000000,0x67FC0000,0x67FC0000,0x98000000,0x98000000,0x3C40000,0x3C40000,0x3C40000,0x67FC0000,0x67FC0000,0x98000000,0x67FC0000,0x67FC0000,0x98000000,0x98000000,0x67FC0000, -0x67FC0000,0x98000000,0x98000000,0x98000000,0x1640000,0xB440000,0x1300001,0x1A00000,0x5FC0000,0x3FFC0000,0x55F80000,0x75F80000,0x1800000,0x3C40000,0x3FFC0000,0x98000000,0x3FFC0000,0x1480001,0x3E80000,0x79FC0000,0xA4000000,0x3E80000,0x79FC0000,0xA4000000,0x79FC0000,0xA4000000,0xA4000000,0x3E80000,0x79FC0000,0xA4000000,0x79FC0000,0xA4000000, -0xA4000000,0x79FC0000,0xA4000000,0xA4000000,0xA4000000,0x3E80000,0x79FC0000,0xA4000000,0x79FC0000,0xA4000000,0xA4000000,0x79FC0000,0xA4000000,0xA4000000,0xA4000000,0x79FC0000,0xA4000000,0xA4000000,0xA4000000,0xA4000000,0x39C0000,0x1600000,0x1600000,0x23FC0000,0x69F80000,0x8FF40000,0xA4000000,0xA4000000,0x1C00000,0x41FC0000,0x9FF00000,0xA4000000, -0x57FC0000,0x1280274,0xD31C00F3,0xAF1C00F4,0xA51C00F3,0xC9140092,0xB114004F,0xA7140067,0xA9140092,0xA510004E,0x9F100092,0xC70800F4,0xB5080033,0xA70C0051,0xAD04004A,0xA5080002,0x9F08004D,0xA50400F4,0xA1000053,0x9D000069,0x990400F5,0x1B80274,0xBAF000F3,0xA50800F4,0xB0EC0092,0xA4F80049,0x9EFC0090,0xAEDC00F3,0xA4E40032,0x9EE4004D,0x98F000F4,0x61F80274, -0xA4BC00F3,0x9EB80090,0x98AC00F4,0x92000278,0xFF0C0032,0xF9200172,0xFB2401AB,0xD3080002,0xBB080002,0xAD080002,0xA5080006,0xA3040006,0xFCFC0024,0xD0FC0000,0xA7000035,0x9EE4004D,0x37FC0274,0x13400F3,0xC7280032,0xAD280032,0xA5280032,0xBD200049,0xAF1C0002,0xA5200005,0xA71C0049,0xA31C000E,0x9F1C0049,0x1C800F3,0xB5080032,0xA5180032,0xAD040049,0xA5080001, -0x9F0C0049,0x69FC00F3,0xA4E00032,0x9EDC0048,0x980000F4,0x1C800F3,0xB5080032,0xA5180032,0xAD040049,0xA5080001,0x9F0C0049,0x69FC00F3,0xA4E00032,0x9EDC0048,0x980000F4,0x69FC00F3,0xA4E00032,0x9EDC0048,0x980000F4,0x980000F4,0xFF14000E,0xFD280081,0xFF2C007A,0xD3080001,0xBB080001,0xAD080001,0xA5100001,0xA5000004,0xFF000013,0xD0FC0000,0xA6FC0033,0x9EDC0048, -0x41FC00F3,0x11C00F3,0x11C00F3,0x11C00F3,0x11C00F3,0xB714004A,0xB714004A,0xB714004A,0xA110004A,0xA110004A,0x9910004A,0xB5080033,0xB5080033,0xB5080033,0xA3080002,0xA3080002,0x9B08000E,0x9D040033,0x9D040033,0x99040005,0x93040035,0x3A400F3,0x3A400F3,0x3A400F3,0xA4F80049,0xA4F80049,0x9900004A,0xA2E80032,0xA2E80032,0x98F00001,0x94F40034,0x57FC00F3, -0x57FC00F3,0x98D00048,0x92C40034,0x8E0000F4,0xFF0C000E,0xF518007E,0x11C00F3,0xD3080001,0xB7080002,0xAB080002,0xA9080003,0xA1040001,0xFEF80006,0xD0FC0000,0xA3040033,0x98F00001,0x2BFC00F3,0x1280032,0x1280032,0x1280032,0x1280032,0xAB200001,0xAB200001,0xAB200001,0x9D200001,0x9D200001,0x991C0001,0x1B80032,0x1B80032,0x1B80032,0xA10C0001,0xA10C0001, -0x99140000,0x61F80032,0x61F80032,0x98F40000,0x92000034,0x1B80032,0x1B80032,0x1B80032,0xA10C0001,0xA10C0001,0x99140000,0x61F80032,0x61F80032,0x98F40000,0x92000034,0x61F80032,0x61F80032,0x98F40000,0x92000034,0x92000034,0xFF100000,0xF9200005,0x1280032,0xD3080000,0xB1100001,0xA90C0000,0xA3100001,0xA1040000,0xF5000002,0xCB000000,0x37FC0032,0x98F40000, -0x37FC0032,0x13C004A,0xBB340001,0xAB300001,0xA5300001,0x1D80048,0xAF1C0001,0xA5240001,0x71F80048,0xA5000000,0x9E000048,0x1D80048,0xAF1C0001,0xA5240001,0x71F80048,0xA5000000,0x9E000048,0x71F80048,0xA5000000,0x9E000048,0x9E000048,0x1D80048,0xAF1C0001,0xA5240001,0x71F80048,0xA5000000,0x9E000048,0x71F80048,0xA5000000,0x9E000048,0x9E000048,0x71F80048, -0xA5000000,0x9E000048,0x9E000048,0x9E000048,0xFF14000A,0x1540048,0xF5380012,0xD7040000,0xBB040001,0xAF000000,0xA5100000,0xA4EC0000,0xF7080008,0xD0FC0000,0xA7140000,0x9E000048,0x4BFC0048,0x110004A,0x110004A,0x110004A,0x110004A,0x110004A,0x110004A,0x110004A,0x110004A,0x110004A,0x110004A,0xA5080001,0xA5080001,0xA5080001,0xA5080001,0xA5080001, -0xA5080001,0x95040001,0x95040001,0x95040001,0x8F040001,0x1980048,0x1980048,0x1980048,0x1980048,0x1980048,0x1980048,0x98F00001,0x98F00001,0x98F00001,0x8EF80001,0x51F80048,0x51F80048,0x51F80048,0x8ED40000,0x88000048,0xFD08000A,0x110004A,0x110004A,0xCF080001,0xB9080001,0xAD080001,0xAD080001,0x9F080001,0xF8F80002,0xD0FC0000,0x99040001,0x98F00001, -0x21FC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table51[] = { -0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x53FC0000, -0x53FC0000,0x53FC0000,0x53FC0000,0x8A000000,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1280000,0x1280000,0x1280000,0x19C0000,0x25FC0000,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000, -0x1C00000,0x65F80000,0x65F80000,0x65F80000,0x96000000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x65F80000,0x65F80000,0x65F80000,0x96000000,0x65F80000,0x65F80000,0x65F80000,0x96000000,0x96000000,0x9400000,0x12C0001,0x12C0001,0x1600000,0x5780000,0x3980000,0x3980000,0x1FC0000,0x1600000,0x5780000,0x3DF80000,0x65F80000, -0x3DF80000,0x1400001,0x1400001,0x1400001,0x1400001,0x3DC0000,0x3DC0000,0x3DC0000,0x73FC0000,0x73FC0000,0xA0000000,0x3DC0000,0x3DC0000,0x3DC0000,0x73FC0000,0x73FC0000,0xA0000000,0x73FC0000,0x73FC0000,0xA0000000,0xA0000000,0x3DC0000,0x3DC0000,0x3DC0000,0x73FC0000,0x73FC0000,0xA0000000,0x73FC0000,0x73FC0000,0xA0000000,0xA0000000,0x73FC0000, -0x73FC0000,0xA0000000,0xA0000000,0xA0000000,0x5740000,0x1580000,0x1400001,0x3B40000,0x19FC0000,0x4FFC0000,0x61FC0000,0x7FFC0000,0x1940000,0x3DC0000,0x4FFC0000,0xA0000000,0x4FFC0000,0x1580001,0x7FC0000,0x85FC0000,0xAC000000,0x7FC0000,0x85FC0000,0xAC000000,0x85FC0000,0xAC000000,0xAC000000,0x7FC0000,0x85FC0000,0xAC000000,0x85FC0000,0xAC000000, -0xAC000000,0x85FC0000,0xAC000000,0xAC000000,0xAC000000,0x7FC0000,0x85FC0000,0xAC000000,0x85FC0000,0xAC000000,0xAC000000,0x85FC0000,0xAC000000,0xAC000000,0xAC000000,0x85FC0000,0xAC000000,0xAC000000,0xAC000000,0xAC000000,0x3B00000,0x1700000,0x1700000,0x37FC0000,0x75FC0000,0x99F40000,0xAC000000,0xAC000000,0x3D40000,0x51FC0000,0xA9C40000,0xAC000000, -0x65FC0000,0x1380274,0xDB2C00F3,0xB72C00F4,0xAD2C00F3,0xD1240092,0xB924004F,0xAF240067,0xB1240092,0xAD20004E,0xA7200092,0xCF1800F4,0xBD180033,0xAF1C0051,0xB514004A,0xAD180002,0xA718004D,0xAD1400F4,0xA9100053,0xA5100069,0xA11400F5,0x1D00274,0xC30000F3,0xAD1800F4,0xB8FC0092,0xAD080049,0xA70C0090,0xB6EC00F3,0xACF40032,0xA6F4004D,0xA10000F4,0x6DF80274, -0xACCC00F3,0xA6C80090,0xA0BC00F4,0x9A000278,0xFF20003E,0xFF2C018A,0xF33401C4,0xDB180002,0xC3180002,0xB5180002,0xAD180006,0xAB140006,0xFF10002E,0xD90C0000,0xAF100035,0xA6F4004D,0x45FC0274,0x14400F3,0xCF380032,0xB5380032,0xAD380032,0xC5300049,0xB72C0002,0xAD300005,0xAF2C0049,0xAB2C000E,0xA72C0049,0x1E000F3,0xBD180032,0xAD280032,0xB5140049,0xAD180001, -0xA71C0049,0x75FC00F3,0xACF00032,0xA6EC0048,0xA00000F4,0x1E000F3,0xBD180032,0xAD280032,0xB5140049,0xAD180001,0xA71C0049,0x75FC00F3,0xACF00032,0xA6EC0048,0xA00000F4,0x75FC00F3,0xACF00032,0xA6EC0048,0xA00000F4,0xA00000F4,0xF928001A,0xF73C008B,0xF9400083,0xDB180001,0xC3180001,0xB5180001,0xAD200001,0xAD100004,0xFD140019,0xD90C0000,0xAF0C0033,0xA6EC0048, -0x51FC00F3,0x12C00F3,0x12C00F3,0x12C00F3,0x12C00F3,0xBF24004A,0xBF24004A,0xBF24004A,0xA920004A,0xA920004A,0xA120004A,0xBD180033,0xBD180033,0xBD180033,0xAB180002,0xAB180002,0xA318000E,0xA5140033,0xA5140033,0xA1140005,0x9B140035,0x3BC00F3,0x3BC00F3,0x3BC00F3,0xAD080049,0xAD080049,0xA110004A,0xAAF80032,0xAAF80032,0xA1000001,0x9D040034,0x63FC00F3, -0x63FC00F3,0xA0E00048,0x9AD40034,0x960000F4,0xFD1C0016,0xFD28007E,0x12C00F3,0xDB180001,0xBF180002,0xB3180002,0xB1180003,0xA9140001,0xFD0C000D,0xD90C0000,0xAB140033,0xA1000001,0x3BFC00F3,0x1380032,0x1380032,0x1380032,0x1380032,0xB3300001,0xB3300001,0xB3300001,0xA5300001,0xA5300001,0xA12C0001,0x1D00032,0x1D00032,0x1D00032,0xA91C0001,0xA91C0001, -0xA1240000,0x6DF80032,0x6DF80032,0xA1040000,0x9A000034,0x1D00032,0x1D00032,0x1D00032,0xA91C0001,0xA91C0001,0xA1240000,0x6DF80032,0x6DF80032,0xA1040000,0x9A000034,0x6DF80032,0x6DF80032,0xA1040000,0x9A000034,0x9A000034,0xFF200001,0xF130000A,0x1380032,0xDB180000,0xB9200001,0xB11C0000,0xAB200001,0xA9140000,0xFD100002,0xD3100000,0x45FC0032,0xA1040000, -0x45FC0032,0x14C004A,0xC3440001,0xB3400001,0xAD400001,0x1F00048,0xB72C0001,0xAD340001,0x7DF80048,0xAD100000,0xA6000048,0x1F00048,0xB72C0001,0xAD340001,0x7DF80048,0xAD100000,0xA6000048,0x7DF80048,0xAD100000,0xA6000048,0xA6000048,0x1F00048,0xB72C0001,0xAD340001,0x7DF80048,0xAD100000,0xA6000048,0x7DF80048,0xAD100000,0xA6000048,0xA6000048,0x7DF80048, -0xAD100000,0xA6000048,0xA6000048,0xA6000048,0xFB2C000D,0x1640048,0xFD480012,0xDF140000,0xC3140001,0xB7100000,0xAD200000,0xACFC0000,0xFF180008,0xD90C0000,0xAF240000,0xA6000048,0x5BFC0048,0x120004A,0x120004A,0x120004A,0x120004A,0x120004A,0x120004A,0x120004A,0x120004A,0x120004A,0x120004A,0xAD180001,0xAD180001,0xAD180001,0xAD180001,0xAD180001, -0xAD180001,0x9D140001,0x9D140001,0x9D140001,0x97140001,0x1B00048,0x1B00048,0x1B00048,0x1B00048,0x1B00048,0x1B00048,0xA1000001,0xA1000001,0xA1000001,0x97080001,0x5DF40048,0x5DF40048,0x5DF40048,0x96E40000,0x90000048,0xF71C000D,0x120004A,0x120004A,0xD7180001,0xC1180001,0xB5180001,0xB5180001,0xA7180001,0xFB080004,0xD90C0000,0xA1140001,0xA1000001, -0x31FC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table52[] = { -0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x61F80000, -0x61F80000,0x61F80000,0x61F80000,0x92000001,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x13C0000,0x13C0000,0x13C0000,0x1B80000,0x37FC0000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000, -0x1DC0000,0x73F80000,0x73F80000,0x73F80000,0x9E000001,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x73F80000,0x73F80000,0x73F80000,0x9E000001,0x73F80000,0x73F80000,0x73F80000,0x9E000001,0x9E000001,0x3540000,0x1400000,0x1400000,0x1740000,0x1900000,0x1B40000,0x1B40000,0x17FC0000,0x1740000,0x1900000,0x4DFC0000,0x73F80000, -0x4DFC0000,0x1540000,0x1540000,0x1540000,0x1540000,0x1F80000,0x1F80000,0x1F80000,0x81FC0000,0x81FC0000,0xA8000001,0x1F80000,0x1F80000,0x1F80000,0x81FC0000,0x81FC0000,0xA8000001,0x81FC0000,0x81FC0000,0xA8000001,0xA8000001,0x1F80000,0x1F80000,0x1F80000,0x81FC0000,0x81FC0000,0xA8000001,0x81FC0000,0x81FC0000,0xA8000001,0xA8000001,0x81FC0000, -0x81FC0000,0xA8000001,0xA8000001,0xA8000001,0x18C0000,0xD680000,0x1540000,0x3CC0000,0x2FFC0000,0x5FFC0000,0x71FC0000,0x8BFC0000,0x3A80000,0x1F80000,0x5FFC0000,0xA8000001,0x5FFC0000,0x16C0000,0x23FC0000,0x93FC0000,0xB4000001,0x23FC0000,0x93FC0000,0xB4000001,0x93FC0000,0xB4000001,0xB4000001,0x23FC0000,0x93FC0000,0xB4000001,0x93FC0000,0xB4000001, -0xB4000001,0x93FC0000,0xB4000001,0xB4000001,0xB4000001,0x23FC0000,0x93FC0000,0xB4000001,0x93FC0000,0xB4000001,0xB4000001,0x93FC0000,0xB4000001,0xB4000001,0xB4000001,0x93FC0000,0xB4000001,0xB4000001,0xB4000001,0xB4000001,0x1C80000,0x1840000,0x1840000,0x4DFC0000,0x85FC0000,0xA5F00000,0xB4000001,0xB4000001,0x1F00000,0x65FC0000,0xB1F40000,0xB4000001, -0x77FC0000,0x1480278,0xE34000F4,0xC14000F4,0xB54000F5,0xDD340090,0xC534004D,0xB9380069,0xBB340090,0xB534004D,0xAF340092,0xD92800F3,0xC7280032,0xB9300053,0xBD280049,0xB5280002,0xB128004E,0xB52800F4,0xB3240051,0xAF240067,0xAB2800F3,0x3E80274,0xCB1400F3,0xB52800F4,0xC30C0092,0xB718004A,0xAF200092,0xC0FC00F3,0xB5080033,0xAF08004F,0xAB1000F4,0x79FC0274, -0xB4E400F4,0xAEDC0092,0xAAC800F3,0xA4000274,0xFF340059,0xFB440190,0xFD4801C4,0xE7280000,0xCD280001,0xBD280002,0xB72C0006,0xB5280006,0xFF24004F,0xDF200002,0xB7240035,0xAF08004F,0x57FC0274,0x15400F4,0xD74C0034,0xBF480034,0xB5480035,0xD1400048,0xBF400001,0xB7400005,0xB740004A,0xB53C000E,0xAF40004A,0x1FC00F3,0xC52C0032,0xB7380033,0xBD280049,0xB5280002, -0xAF30004A,0x83F800F3,0xB5080033,0xAF00004A,0xAA0000F3,0x1FC00F3,0xC52C0032,0xB7380033,0xBD280049,0xB5280002,0xAF30004A,0x83F800F3,0xB5080033,0xAF00004A,0xAA0000F3,0x83F800F3,0xB5080033,0xAF00004A,0xAA0000F3,0xAA0000F3,0xFF3C0024,0xFF4C008C,0xF1500095,0xE7280000,0xCD280001,0xBD280002,0xB7300001,0xB5200003,0xFB2C002A,0xDF200001,0xB91C0032,0xAF00004A, -0x61FC00F3,0x14000F4,0x14000F4,0x14000F4,0x14000F4,0xCB340048,0xCB340048,0xCB340048,0xB1340049,0xB1340049,0xA9340049,0xC7280032,0xC7280032,0xC7280032,0xB5280001,0xB5280001,0xAB2C000E,0xAD280032,0xAD280032,0xA9280005,0xA5280032,0x1D800F3,0x1D800F3,0x1D800F3,0xB7180049,0xB7180049,0xA9240049,0xB5080032,0xB5080032,0xA9140002,0xA5180032,0x71F800F3, -0x71F800F3,0xA8F40049,0xA4E00032,0x9E0000F3,0xF9300024,0xF73C008C,0x14000F4,0xE7280000,0xC9280001,0xBD280001,0xB9280004,0xB1280001,0xFF200012,0xDF200001,0xB5240032,0xA9140002,0x4BFC00F3,0x1480034,0x1480034,0x1480034,0x1480034,0xBD400000,0xBD400000,0xBD400000,0xAF400000,0xAF400000,0xA9400001,0x3E80032,0x3E80032,0x3E80032,0xB1300001,0xB1300001, -0xA9380001,0x79FC0032,0x79FC0032,0xA91C0001,0xA4000032,0x3E80032,0x3E80032,0x3E80032,0xB1300001,0xB1300001,0xA9380001,0x79FC0032,0x79FC0032,0xA91C0001,0xA4000032,0x79FC0032,0x79FC0032,0xA91C0001,0xA4000032,0xA4000032,0xFB340004,0xFB440008,0x1480034,0xE7280000,0xC72C0000,0xBB2C0000,0xB7300000,0xB1280001,0xF9280005,0xDF200000,0x57FC0032,0xA91C0001, -0x57FC0032,0x1600048,0xCF540000,0xBB540001,0xB5540001,0xFFC0048,0xBF400001,0xB5480001,0x8BF80048,0xB5240001,0xAE00004A,0xFFC0048,0xBF400001,0xB5480001,0x8BF80048,0xB5240001,0xAE00004A,0x8BF80048,0xB5240001,0xAE00004A,0xAE00004A,0xFFC0048,0xBF400001,0xB5480001,0x8BF80048,0xB5240001,0xAE00004A,0x8BF80048,0xB5240001,0xAE00004A,0xAE00004A,0x8BF80048, -0xB5240001,0xAE00004A,0xAE00004A,0xAE00004A,0xFD400012,0x1780048,0xF75C0019,0xE7280000,0xCB280001,0xBF240000,0xB5340001,0xB5140001,0xFF2C0011,0xE31C0000,0xB7380001,0xAE00004A,0x6BFC0048,0x1340048,0x1340048,0x1340048,0x1340048,0x1340048,0x1340048,0x1340048,0x1340048,0x1340048,0x1340048,0xB9280000,0xB9280000,0xB9280000,0xB9280000,0xB9280000, -0xB9280000,0xA5280001,0xA5280001,0xA5280001,0x9F280001,0x1C80048,0x1C80048,0x1C80048,0x1C80048,0x1C80048,0x1C80048,0xA9140001,0xA9140001,0xA9140001,0x9F1C0001,0x69FC0048,0x69FC0048,0x69FC0048,0x9EF80001,0x9800004A,0xFF2C000D,0x1340048,0x1340048,0xE7280000,0xCD280000,0xC1280000,0xC1280000,0xB1280000,0xFD1C0005,0xE71C0000,0xAD240000,0xA9140001, -0x41FC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table53[] = { -0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x6DF80000, -0x6DF80000,0x6DF80000,0x6DF80000,0x9A000001,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x14C0000,0x14C0000,0x14C0000,0x1D00000,0x45FC0000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000, -0x1F40000,0x7FF80000,0x7FF80000,0x7FF80000,0xA6000001,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x7FF80000,0x7FF80000,0x7FF80000,0xA6000001,0x7FF80000,0x7FF80000,0x7FF80000,0xA6000001,0xA6000001,0xB640000,0x1500000,0x1500000,0x7840000,0x1A40000,0x1C80000,0x1C80000,0x2BFC0000,0x7840000,0x1A40000,0x5DF80000,0x7FF80000, -0x5DF80000,0x1640000,0x1640000,0x1640000,0x1640000,0x15FC0000,0x15FC0000,0x15FC0000,0x8DFC0000,0x8DFC0000,0xB0000001,0x15FC0000,0x15FC0000,0x15FC0000,0x8DFC0000,0x8DFC0000,0xB0000001,0x8DFC0000,0x8DFC0000,0xB0000001,0xB0000001,0x15FC0000,0x15FC0000,0x15FC0000,0x8DFC0000,0x8DFC0000,0xB0000001,0x8DFC0000,0x8DFC0000,0xB0000001,0xB0000001,0x8DFC0000, -0x8DFC0000,0xB0000001,0xB0000001,0xB0000001,0x59C0000,0x17C0000,0x1640000,0x1E40000,0x43FC0000,0x6FFC0000,0x7FF80000,0x97F80000,0x3BC0000,0x15FC0000,0x6FFC0000,0xB0000001,0x6FFC0000,0x17C0000,0x3BFC0000,0x9FF80000,0xBC000001,0x3BFC0000,0x9FF80000,0xBC000001,0x9FF80000,0xBC000001,0xBC000001,0x3BFC0000,0x9FF80000,0xBC000001,0x9FF80000,0xBC000001, -0xBC000001,0x9FF80000,0xBC000001,0xBC000001,0xBC000001,0x3BFC0000,0x9FF80000,0xBC000001,0x9FF80000,0xBC000001,0xBC000001,0x9FF80000,0xBC000001,0xBC000001,0xBC000001,0x9FF80000,0xBC000001,0xBC000001,0xBC000001,0xBC000001,0x1DC0000,0x3940000,0x3940000,0x61FC0000,0x93F80000,0xAFF00000,0xBC000001,0xBC000001,0xDFC0000,0x75FC0000,0xBBC80000,0xBC000001, -0x85FC0000,0x1580278,0xEB5000F4,0xC95000F4,0xBD5000F5,0xE5440090,0xCD44004D,0xC1480069,0xC3440090,0xBD44004D,0xB7440092,0xE13800F3,0xCF380032,0xC1400053,0xC5380049,0xBD380002,0xB938004E,0xBD3800F4,0xBB340051,0xB7340067,0xB33800F3,0x7FC0274,0xD32400F3,0xBD3800F4,0xCB1C0092,0xBF28004A,0xB7300092,0xC90C00F3,0xBD180033,0xB718004F,0xB32000F4,0x85FC0274, -0xBCF400F4,0xB6EC0092,0xB2D800F3,0xAC000274,0xFF44007E,0xF35401B2,0xF55801DD,0xEF380000,0xD5380001,0xC5380002,0xBF3C0006,0xBD380006,0xFF34006A,0xE7300002,0xBF340035,0xB718004F,0x65FC0274,0x16400F4,0xDF5C0034,0xC7580034,0xBD580035,0xD9500048,0xC7500001,0xBF500005,0xBF50004A,0xBD4C000E,0xB750004A,0x19FC00F3,0xCD3C0032,0xBF480033,0xC5380049,0xBD380002, -0xB740004A,0x8FF800F3,0xBD180033,0xB710004A,0xB20000F3,0x19FC00F3,0xCD3C0032,0xBF480033,0xC5380049,0xBD380002,0xB740004A,0x8FF800F3,0xBD180033,0xB710004A,0xB20000F3,0x8FF800F3,0xBD180033,0xB710004A,0xB20000F3,0xB20000F3,0xFD4C002D,0xF960009A,0xF9600095,0xEF380000,0xD5380001,0xC5380002,0xBF400001,0xBD300003,0xFD400033,0xE7300001,0xC12C0032,0xB710004A, -0x71FC00F3,0x15000F4,0x15000F4,0x15000F4,0x15000F4,0xD3440048,0xD3440048,0xD3440048,0xB9440049,0xB9440049,0xB1440049,0xCF380032,0xCF380032,0xCF380032,0xBD380001,0xBD380001,0xB33C000E,0xB5380032,0xB5380032,0xB1380005,0xAD380032,0x1F000F3,0x1F000F3,0x1F000F3,0xBF280049,0xBF280049,0xB1340049,0xBD180032,0xBD180032,0xB1240002,0xAD280032,0x7DF800F3, -0x7DF800F3,0xB1040049,0xACF00032,0xA60000F3,0xF940002D,0xFF4C008C,0x15000F4,0xEF380000,0xD1380001,0xC5380001,0xC1380004,0xB9380001,0xFF340019,0xE7300001,0xBD340032,0xB1240002,0x5BFC00F3,0x1580034,0x1580034,0x1580034,0x1580034,0xC5500000,0xC5500000,0xC5500000,0xB7500000,0xB7500000,0xB1500001,0x7FC0032,0x7FC0032,0x7FC0032,0xB9400001,0xB9400001, -0xB1480001,0x85FC0032,0x85FC0032,0xB12C0001,0xAC000032,0x7FC0032,0x7FC0032,0x7FC0032,0xB9400001,0xB9400001,0xB1480001,0x85FC0032,0x85FC0032,0xB12C0001,0xAC000032,0x85FC0032,0x85FC0032,0xB12C0001,0xAC000032,0xAC000032,0xF7480005,0xF354000D,0x1580034,0xEF380000,0xCF3C0000,0xC33C0000,0xBF400000,0xB9380001,0xF53C0008,0xE7300000,0x65FC0032,0xB12C0001, -0x65FC0032,0x1700048,0xD7640000,0xC3640001,0xBD640001,0x29FC0048,0xC7500001,0xBD580001,0x97F80048,0xBD340001,0xB600004A,0x29FC0048,0xC7500001,0xBD580001,0x97F80048,0xBD340001,0xB600004A,0x97F80048,0xBD340001,0xB600004A,0xB600004A,0x29FC0048,0xC7500001,0xBD580001,0x97F80048,0xBD340001,0xB600004A,0x97F80048,0xBD340001,0xB600004A,0xB600004A,0x97F80048, -0xBD340001,0xB600004A,0xB600004A,0xB600004A,0xFF500014,0x1880048,0xFF6C0019,0xEF380000,0xD3380001,0xC7340000,0xBD440001,0xBD240001,0xF9480012,0xEB2C0000,0xBF480001,0xB600004A,0x7BFC0048,0x1440048,0x1440048,0x1440048,0x1440048,0x1440048,0x1440048,0x1440048,0x1440048,0x1440048,0x1440048,0xC1380000,0xC1380000,0xC1380000,0xC1380000,0xC1380000, -0xC1380000,0xAD380001,0xAD380001,0xAD380001,0xA7380001,0x1E00048,0x1E00048,0x1E00048,0x1E00048,0x1E00048,0x1E00048,0xB1240001,0xB1240001,0xB1240001,0xA72C0001,0x75FC0048,0x75FC0048,0x75FC0048,0xA7080001,0xA000004A,0xF73C0014,0x1440048,0x1440048,0xEF380000,0xD5380000,0xC9380000,0xC9380000,0xB9380000,0xF9300008,0xEF2C0000,0xB5340000,0xB1240001, -0x51FC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table54[] = { -0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x79F80000, -0x79F80000,0x79F80000,0x79F80000,0xA2000001,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x75C0000,0x75C0000,0x75C0000,0x1E80000,0x55FC0000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0xFFC0000,0xFFC0000,0xFFC0000,0xFFC0000,0xFFC0000, -0xFFC0000,0x8BF80000,0x8BF80000,0x8BF80000,0xAE000001,0xFFC0000,0xFFC0000,0xFFC0000,0xFFC0000,0xFFC0000,0xFFC0000,0x8BF80000,0x8BF80000,0x8BF80000,0xAE000001,0x8BF80000,0x8BF80000,0x8BF80000,0xAE000001,0xAE000001,0x1780000,0x1600000,0x1600000,0x3980000,0x1B80000,0x3DC0000,0x3DC0000,0x3DFC0000,0x3980000,0x1B80000,0x6BFC0000,0x8BF80000, -0x6BFC0000,0x1740000,0x1740000,0x1740000,0x1740000,0x2FFC0000,0x2FFC0000,0x2FFC0000,0x99FC0000,0x99FC0000,0xB8000001,0x2FFC0000,0x2FFC0000,0x2FFC0000,0x99FC0000,0x99FC0000,0xB8000001,0x99FC0000,0x99FC0000,0xB8000001,0xB8000001,0x2FFC0000,0x2FFC0000,0x2FFC0000,0x99FC0000,0x99FC0000,0xB8000001,0x99FC0000,0x99FC0000,0xB8000001,0xB8000001,0x99FC0000, -0x99FC0000,0xB8000001,0xB8000001,0xB8000001,0x1B00000,0x18C0000,0x1740000,0x3F80000,0x57FC0000,0x7FF80000,0x8BFC0000,0xA1FC0000,0x5D00000,0x2FFC0000,0x7FF80000,0xB8000001,0x7FF80000,0x18C0000,0x53FC0000,0xABF80000,0xC4000001,0x53FC0000,0xABF80000,0xC4000001,0xABF80000,0xC4000001,0xC4000001,0x53FC0000,0xABF80000,0xC4000001,0xABF80000,0xC4000001, -0xC4000001,0xABF80000,0xC4000001,0xC4000001,0xC4000001,0x53FC0000,0xABF80000,0xC4000001,0xABF80000,0xC4000001,0xC4000001,0xABF80000,0xC4000001,0xC4000001,0xC4000001,0xABF80000,0xC4000001,0xC4000001,0xC4000001,0xC4000001,0x1F00000,0xBA40000,0xBA40000,0x73FC0000,0x9FFC0000,0xB9F00000,0xC4000001,0xC4000001,0x2BFC0000,0x87FC0000,0xC3D80000,0xC4000001, -0x95FC0000,0x1680278,0xF36000F4,0xD16000F4,0xC56000F5,0xED540090,0xD554004D,0xC9580069,0xCB540090,0xC554004D,0xBF540092,0xE94800F3,0xD7480032,0xC9500053,0xCD480049,0xC5480002,0xC148004E,0xC54800F4,0xC3440051,0xBF440067,0xBB4800F3,0x1FFC0274,0xDB3400F3,0xC54800F4,0xD32C0092,0xC738004A,0xBF400092,0xD11C00F3,0xC5280033,0xBF28004F,0xBB3000F4,0x91FC0274, -0xC50400F4,0xBEFC0092,0xBAE800F3,0xB4000274,0xFF580096,0xFB6401B2,0xFD6801DD,0xF7480000,0xDD480001,0xCD480002,0xC74C0006,0xC5480006,0xFD4C008A,0xEF400002,0xC7440035,0xBF28004F,0x75FC0274,0x17400F4,0xE76C0034,0xCF680034,0xC5680035,0xE1600048,0xCF600001,0xC7600005,0xC760004A,0xC55C000E,0xBF60004A,0x31FC00F3,0xD54C0032,0xC7580033,0xCD480049,0xC5480002, -0xBF50004A,0x9BF800F3,0xC5280033,0xBF20004A,0xBA0000F3,0x31FC00F3,0xD54C0032,0xC7580033,0xCD480049,0xC5480002,0xBF50004A,0x9BF800F3,0xC5280033,0xBF20004A,0xBA0000F3,0x9BF800F3,0xC5280033,0xBF20004A,0xBA0000F3,0xBA0000F3,0xFD60003E,0xFF6C009E,0xF37400A4,0xF7480000,0xDD480001,0xCD480002,0xC7500001,0xC5400003,0xFB54003D,0xEF400001,0xC93C0032,0xBF20004A, -0x7FFC00F3,0x16000F4,0x16000F4,0x16000F4,0x16000F4,0xDB540048,0xDB540048,0xDB540048,0xC1540049,0xC1540049,0xB9540049,0xD7480032,0xD7480032,0xD7480032,0xC5480001,0xC5480001,0xBB4C000E,0xBD480032,0xBD480032,0xB9480005,0xB5480032,0xDFC00F3,0xDFC00F3,0xDFC00F3,0xC7380049,0xC7380049,0xB9440049,0xC5280032,0xC5280032,0xB9340002,0xB5380032,0x89F800F3, -0x89F800F3,0xB9140049,0xB5000032,0xAE0000F3,0xFD540035,0xF75C0099,0x16000F4,0xF7480000,0xD9480001,0xCD480001,0xC9480004,0xC1480001,0xFB480029,0xEF400001,0xC5440032,0xB9340002,0x69FC00F3,0x1680034,0x1680034,0x1680034,0x1680034,0xCD600000,0xCD600000,0xCD600000,0xBF600000,0xBF600000,0xB9600001,0x1FFC0032,0x1FFC0032,0x1FFC0032,0xC1500001,0xC1500001, -0xB9580001,0x91FC0032,0x91FC0032,0xB93C0001,0xB4000032,0x1FFC0032,0x1FFC0032,0x1FFC0032,0xC1500001,0xC1500001,0xB9580001,0x91FC0032,0x91FC0032,0xB93C0001,0xB4000032,0x91FC0032,0x91FC0032,0xB93C0001,0xB4000032,0xB4000032,0xFF580005,0xFB64000D,0x1680034,0xF7480000,0xD74C0000,0xCB4C0000,0xC7500000,0xC1480001,0xFD4C0008,0xEF400000,0x75FC0032,0xB93C0001, -0x75FC0032,0x1800048,0xDF740000,0xCB740001,0xC5740001,0x41FC0048,0xCF600001,0xC5680001,0xA1FC0048,0xC5440001,0xBE00004A,0x41FC0048,0xCF600001,0xC5680001,0xA1FC0048,0xC5440001,0xBE00004A,0xA1FC0048,0xC5440001,0xBE00004A,0xBE00004A,0x41FC0048,0xCF600001,0xC5680001,0xA1FC0048,0xC5440001,0xBE00004A,0xA1FC0048,0xC5440001,0xBE00004A,0xBE00004A,0xA1FC0048, -0xC5440001,0xBE00004A,0xBE00004A,0xBE00004A,0xF7680019,0x5980048,0xF77C0020,0xF7480000,0xDB480001,0xCF440000,0xC5540001,0xC5340001,0xFD580014,0xF33C0000,0xC7580001,0xBE00004A,0x89FC0048,0x1540048,0x1540048,0x1540048,0x1540048,0x1540048,0x1540048,0x1540048,0x1540048,0x1540048,0x1540048,0xC9480000,0xC9480000,0xC9480000,0xC9480000,0xC9480000, -0xC9480000,0xB5480001,0xB5480001,0xB5480001,0xAF480001,0x1F80048,0x1F80048,0x1F80048,0x1F80048,0x1F80048,0x1F80048,0xB9340001,0xB9340001,0xB9340001,0xAF3C0001,0x81FC0048,0x81FC0048,0x81FC0048,0xAF180001,0xA800004A,0xFF4C0014,0x1540048,0x1540048,0xF7480000,0xDD480000,0xD1480000,0xD1480000,0xC1480000,0xF544000D,0xF73C0000,0xBD440000,0xB9340001, -0x5FFC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table55[] = { -0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x85F80000, -0x85F80000,0x85F80000,0x85F80000,0xAA000001,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0xF6C0000,0xF6C0000,0xF6C0000,0x3FC0000,0x63FC0000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x29FC0000,0x29FC0000,0x29FC0000,0x29FC0000,0x29FC0000, -0x29FC0000,0x97F80000,0x97F80000,0x97F80000,0xB6000001,0x29FC0000,0x29FC0000,0x29FC0000,0x29FC0000,0x29FC0000,0x29FC0000,0x97F80000,0x97F80000,0x97F80000,0xB6000001,0x97F80000,0x97F80000,0x97F80000,0xB6000001,0xB6000001,0x1880000,0x1700000,0x1700000,0x1AC0000,0x3CC0000,0x1F40000,0x1F40000,0x51FC0000,0x1AC0000,0x3CC0000,0x7BFC0000,0x97F80000, -0x7BFC0000,0x1840000,0x1840000,0x1840000,0x1840000,0x47FC0000,0x47FC0000,0x47FC0000,0xA5F80000,0xA5F80000,0xC0000001,0x47FC0000,0x47FC0000,0x47FC0000,0xA5F80000,0xA5F80000,0xC0000001,0xA5F80000,0xA5F80000,0xC0000001,0xC0000001,0x47FC0000,0x47FC0000,0x47FC0000,0xA5F80000,0xA5F80000,0xC0000001,0xA5F80000,0xA5F80000,0xC0000001,0xC0000001,0xA5F80000, -0xA5F80000,0xC0000001,0xC0000001,0xC0000001,0x1C40000,0x79C0000,0x1840000,0x1BFC0000,0x6BFC0000,0x8DFC0000,0x99FC0000,0xADF80000,0x5E40000,0x47FC0000,0x8DFC0000,0xC0000001,0x8DFC0000,0x19C0000,0x6BFC0000,0xB7F80000,0xCC000001,0x6BFC0000,0xB7F80000,0xCC000001,0xB7F80000,0xCC000001,0xCC000001,0x6BFC0000,0xB7F80000,0xCC000001,0xB7F80000,0xCC000001, -0xCC000001,0xB7F80000,0xCC000001,0xCC000001,0xCC000001,0x6BFC0000,0xB7F80000,0xCC000001,0xB7F80000,0xCC000001,0xCC000001,0xB7F80000,0xCC000001,0xCC000001,0xCC000001,0xB7F80000,0xCC000001,0xCC000001,0xCC000001,0xCC000001,0x11FC0000,0x1B80000,0x1B80000,0x87FC0000,0xADFC0000,0xC3F00000,0xCC000001,0xCC000001,0x49FC0000,0x97FC0000,0xCBE80000,0xCC000001, -0xA3FC0000,0x1780278,0xFB7000F4,0xD97000F4,0xCD7000F5,0xF5640090,0xDD64004D,0xD1680069,0xD3640090,0xCD64004D,0xC7640092,0xF15800F3,0xDF580032,0xD1600053,0xD5580049,0xCD580002,0xC958004E,0xCD5800F4,0xCB540051,0xC7540067,0xC35800F3,0x37FC0274,0xE34400F3,0xCD5800F4,0xDB3C0092,0xCF48004A,0xC7500092,0xD92C00F3,0xCD380033,0xC738004F,0xC34000F4,0x9DFC0274, -0xCD1400F4,0xC70C0092,0xC2F800F3,0xBC000274,0xFF6C00C2,0xF37401D8,0xF57801F8,0xFF580000,0xE5580001,0xD5580002,0xCF5C0006,0xCD580006,0xFF5C00A6,0xF7500002,0xCF540035,0xC738004F,0x83FC0274,0x18400F4,0xEF7C0034,0xD7780034,0xCD780035,0xE9700048,0xD7700001,0xCF700005,0xCF70004A,0xCD6C000E,0xC770004A,0x49FC00F3,0xDD5C0032,0xCF680033,0xD5580049,0xCD580002, -0xC760004A,0xA7F800F3,0xCD380033,0xC730004A,0xC20000F3,0x49FC00F3,0xDD5C0032,0xCF680033,0xD5580049,0xCD580002,0xC760004A,0xA7F800F3,0xCD380033,0xC730004A,0xC20000F3,0xA7F800F3,0xCD380033,0xC730004A,0xC20000F3,0xC20000F3,0xFD740049,0xF98000A8,0xFB8400A4,0xFF580000,0xE5580001,0xD5580002,0xCF600001,0xCD500003,0xFB680055,0xF7500001,0xD14C0032,0xC730004A, -0x8FFC00F3,0x17000F4,0x17000F4,0x17000F4,0x17000F4,0xE3640048,0xE3640048,0xE3640048,0xC9640049,0xC9640049,0xC1640049,0xDF580032,0xDF580032,0xDF580032,0xCD580001,0xCD580001,0xC35C000E,0xC5580032,0xC5580032,0xC1580005,0xBD580032,0x25FC00F3,0x25FC00F3,0x25FC00F3,0xCF480049,0xCF480049,0xC1540049,0xCD380032,0xCD380032,0xC1440002,0xBD480032,0x95F800F3, -0x95F800F3,0xC1240049,0xBD100032,0xB60000F3,0xFD64003E,0xFF6C0099,0x17000F4,0xFF580000,0xE1580001,0xD5580001,0xD1580004,0xC9580001,0xFF580032,0xF7500001,0xCD540032,0xC1440002,0x79FC00F3,0x1780034,0x1780034,0x1780034,0x1780034,0xD5700000,0xD5700000,0xD5700000,0xC7700000,0xC7700000,0xC1700001,0x37FC0032,0x37FC0032,0x37FC0032,0xC9600001,0xC9600001, -0xC1680001,0x9DFC0032,0x9DFC0032,0xC14C0001,0xBC000032,0x37FC0032,0x37FC0032,0x37FC0032,0xC9600001,0xC9600001,0xC1680001,0x9DFC0032,0x9DFC0032,0xC14C0001,0xBC000032,0x9DFC0032,0x9DFC0032,0xC14C0001,0xBC000032,0xBC000032,0xFB6C0008,0xF3740014,0x1780034,0xFF580000,0xDF5C0000,0xD35C0000,0xCF600000,0xC9580001,0xF564000D,0xF7500000,0x83FC0032,0xC14C0001, -0x83FC0032,0x1900048,0xE7840000,0xD3840001,0xCD840001,0x59FC0048,0xD7700001,0xCD780001,0xADFC0048,0xCD540001,0xC600004A,0x59FC0048,0xD7700001,0xCD780001,0xADFC0048,0xCD540001,0xC600004A,0xADFC0048,0xCD540001,0xC600004A,0xC600004A,0x59FC0048,0xD7700001,0xCD780001,0xADFC0048,0xCD540001,0xC600004A,0xADFC0048,0xCD540001,0xC600004A,0xC600004A,0xADFC0048, -0xCD540001,0xC600004A,0xC600004A,0xC600004A,0xFF780019,0xDA80048,0xFF8C0020,0xFF580000,0xE3580001,0xD7540000,0xCD640001,0xCD440001,0xFF700019,0xFB4C0000,0xCF680001,0xC600004A,0x99FC0048,0x1640048,0x1640048,0x1640048,0x1640048,0x1640048,0x1640048,0x1640048,0x1640048,0x1640048,0x1640048,0xD1580000,0xD1580000,0xD1580000,0xD1580000,0xD1580000, -0xD1580000,0xBD580001,0xBD580001,0xBD580001,0xB7580001,0x15FC0048,0x15FC0048,0x15FC0048,0x15FC0048,0x15FC0048,0x15FC0048,0xC1440001,0xC1440001,0xC1440001,0xB74C0001,0x8DFC0048,0x8DFC0048,0x8DFC0048,0xB7280001,0xB000004A,0xF9600019,0x1640048,0x1640048,0xFF580000,0xE5580000,0xD9580000,0xD9580000,0xC9580000,0xFD54000D,0xFF4C0000,0xC5540000,0xC1440001, -0x6FFC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table56[] = { -0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x91FC0000, -0x91FC0000,0x91FC0000,0x91FC0000,0xB4000000,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x9800000,0x9800000,0x9800000,0x1FFC0000,0x75FC0000,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000, -0x43FC0000,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xC0000000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xC0000000,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xC0000000,0xC0000000,0x19C0000,0x1800001,0x1800001,0x1C00000,0x1E40000,0x17FC0000,0x17FC0000,0x67FC0000,0x1C00000,0x1E40000,0x8BFC0000,0xA3FC0000, -0x8BFC0000,0x1940001,0x1940001,0x1940001,0x1940001,0x63FC0000,0x63FC0000,0x63FC0000,0xB3F80000,0xB3F80000,0xCA000000,0x63FC0000,0x63FC0000,0x63FC0000,0xB3F80000,0xB3F80000,0xCA000000,0xB3F80000,0xB3F80000,0xCA000000,0xCA000000,0x63FC0000,0x63FC0000,0x63FC0000,0xB3F80000,0xB3F80000,0xCA000000,0xB3F80000,0xB3F80000,0xCA000000,0xCA000000,0xB3F80000, -0xB3F80000,0xCA000000,0xCA000000,0xCA000000,0x1D80000,0x1B00000,0x1940001,0x3DFC0000,0x81FC0000,0x9FF80000,0xA9F80000,0xB9F80000,0x1FC0000,0x63FC0000,0x9FF80000,0xCA000000,0x9FF80000,0x1AC0001,0x87FC0000,0xC5F80000,0xD6000000,0x87FC0000,0xC5F80000,0xD6000000,0xC5F80000,0xD6000000,0xD6000000,0x87FC0000,0xC5F80000,0xD6000000,0xC5F80000,0xD6000000, -0xD6000000,0xC5F80000,0xD6000000,0xD6000000,0xD6000000,0x87FC0000,0xC5F80000,0xD6000000,0xC5F80000,0xD6000000,0xD6000000,0xC5F80000,0xD6000000,0xD6000000,0xD6000000,0xC5F80000,0xD6000000,0xD6000000,0xD6000000,0xD6000000,0x3DFC0000,0xDC80000,0xDC80000,0x9DFC0000,0xBDF80000,0xCDF80000,0xD6000000,0xD6000000,0x6BFC0000,0xABFC0000,0xD5DC0000,0xD6000000, -0xB5FC0000,0x18C0274,0xFF8000F7,0xE18000F4,0xD78000F3,0xFB780092,0xE378004F,0xD9780067,0xDB780092,0xD774004E,0xD1740092,0xF96C00F4,0xE76C0033,0xD9700051,0xDF68004A,0xD76C0002,0xD16C004D,0xD76800F4,0xD3640053,0xCF640069,0xCB6800F5,0x53FC0274,0xED5400F3,0xD76C00F4,0xE3500092,0xD75C0049,0xD1600090,0xE14000F3,0xD7480032,0xD148004D,0xCB5400F4,0xABF80274, -0xD72000F3,0xD11C0090,0xCB1000F4,0xC4000278,0xFF8000EA,0xFD8801D4,0xFD8801FC,0xFF70000E,0xED6C0002,0xDF6C0002,0xD76C0006,0xD5680006,0xFD7400D7,0xFF600006,0xD9640035,0xD148004D,0x95FC0274,0x19800F3,0xF98C0032,0xDF8C0032,0xD78C0032,0xEF840049,0xE1800002,0xD7840005,0xD9800049,0xD580000E,0xD1800049,0x65FC00F3,0xE76C0032,0xD77C0032,0xDF680049,0xD76C0001, -0xD1700049,0xB3FC00F3,0xD7440032,0xD1400048,0xCA0000F4,0x65FC00F3,0xE76C0032,0xD77C0032,0xDF680049,0xD76C0001,0xD1700049,0xB3FC00F3,0xD7440032,0xD1400048,0xCA0000F4,0xB3FC00F3,0xD7440032,0xD1400048,0xCA0000F4,0xCA0000F4,0xFD880063,0xF39400B9,0xF39400B6,0xFF700005,0xED6C0001,0xDF6C0001,0xD7740001,0xD7640004,0xFF780062,0xFF640002,0xD9600033,0xD1400048, -0x9FFC00F3,0x18000F3,0x18000F3,0x18000F3,0x18000F3,0xE978004A,0xE978004A,0xE978004A,0xD374004A,0xD374004A,0xCB74004A,0xE76C0033,0xE76C0033,0xE76C0033,0xD56C0002,0xD56C0002,0xCD6C000E,0xCF680033,0xCF680033,0xCB680005,0xC5680035,0x41FC00F3,0x41FC00F3,0x41FC00F3,0xD75C0049,0xD75C0049,0xCB64004A,0xD54C0032,0xD54C0032,0xCB540001,0xC7580034,0xA1FC00F3, -0xA1FC00F3,0xCB340048,0xC5280034,0xC00000F4,0xFF740053,0xF77C00AB,0x18000F3,0xFD6C0006,0xE96C0002,0xDD6C0002,0xDB6C0003,0xD3680001,0xFF6C0049,0xFF600002,0xD5680033,0xCB540001,0x89FC00F3,0x18C0032,0x18C0032,0x18C0032,0x18C0032,0xDD840001,0xDD840001,0xDD840001,0xCF840001,0xCF840001,0xCB800001,0x53FC0032,0x53FC0032,0x53FC0032,0xD3700001,0xD3700001, -0xCB780000,0xABF80032,0xABF80032,0xCB580000,0xC4000034,0x53FC0032,0x53FC0032,0x53FC0032,0xD3700001,0xD3700001,0xCB780000,0xABF80032,0xABF80032,0xCB580000,0xC4000034,0xABF80032,0xABF80032,0xCB580000,0xC4000034,0xC4000034,0xF780000D,0xFD880012,0x18C0032,0xFB700001,0xE3740001,0xDB700000,0xD5740001,0xD3680000,0xFD74000D,0xFD640000,0x95FC0032,0xCB580000, -0x95FC0032,0x1A0004A,0xED980001,0xDD940001,0xD7940001,0x75FC0048,0xE1800001,0xD7880001,0xBBFC0048,0xD7640000,0xD0000048,0x75FC0048,0xE1800001,0xD7880001,0xBBFC0048,0xD7640000,0xD0000048,0xBBFC0048,0xD7640000,0xD0000048,0xD0000048,0x75FC0048,0xE1800001,0xD7880001,0xBBFC0048,0xD7640000,0xD0000048,0xBBFC0048,0xD7640000,0xD0000048,0xD0000048,0xBBFC0048, -0xD7640000,0xD0000048,0xD0000048,0xD0000048,0xFD900020,0x7BC0048,0xF9A00029,0xFF740002,0xED680001,0xE1640000,0xD7740000,0xD7500000,0xFB880020,0xFF640001,0xD9780000,0xD0000048,0xA9FC0048,0x174004A,0x174004A,0x174004A,0x174004A,0x174004A,0x174004A,0x174004A,0x174004A,0x174004A,0x174004A,0xD76C0001,0xD76C0001,0xD76C0001,0xD76C0001,0xD76C0001, -0xD76C0001,0xC7680001,0xC7680001,0xC7680001,0xC1680001,0x31FC0048,0x31FC0048,0x31FC0048,0x31FC0048,0x31FC0048,0x31FC0048,0xCB540001,0xCB540001,0xCB540001,0xC15C0001,0x9BF80048,0x9BF80048,0x9BF80048,0xC1380000,0xBA000048,0xF1700022,0x174004A,0x174004A,0xFB6C0002,0xEB6C0001,0xDF6C0001,0xDF6C0001,0xD16C0001,0xF9680012,0xFD600001,0xCB680001,0xCB540001, -0x7FFC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table57[] = { -0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x9DFC0000, -0x9DFC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1940000,0x1940000,0x1940000,0x37FC0000,0x83FC0000,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x5BFC0000,0x5BFC0000,0x5BFC0000,0x5BFC0000,0x5BFC0000, -0x5BFC0000,0xAFFC0000,0xAFFC0000,0xAFFC0000,0xC8000000,0x5BFC0000,0x5BFC0000,0x5BFC0000,0x5BFC0000,0x5BFC0000,0x5BFC0000,0xAFFC0000,0xAFFC0000,0xAFFC0000,0xC8000000,0xAFFC0000,0xAFFC0000,0xAFFC0000,0xC8000000,0xC8000000,0x1AC0000,0x1900001,0x1900001,0x1D40000,0x1F80000,0x35FC0000,0x35FC0000,0x7BFC0000,0x1D40000,0x1F80000,0x9BFC0000,0xAFFC0000, -0x9BFC0000,0x1A40001,0x1A40001,0x1A40001,0x1A40001,0x7BFC0000,0x7BFC0000,0x7BFC0000,0xBFF80000,0xBFF80000,0xD2000000,0x7BFC0000,0x7BFC0000,0x7BFC0000,0xBFF80000,0xBFF80000,0xD2000000,0xBFF80000,0xBFF80000,0xD2000000,0xD2000000,0x7BFC0000,0x7BFC0000,0x7BFC0000,0xBFF80000,0xBFF80000,0xD2000000,0xBFF80000,0xBFF80000,0xD2000000,0xD2000000,0xBFF80000, -0xBFF80000,0xD2000000,0xD2000000,0xD2000000,0x7E80000,0x9C00000,0x1A40001,0x5BFC0000,0x93FC0000,0xADFC0000,0xB5FC0000,0xC3FC0000,0x29FC0000,0x7BFC0000,0xADFC0000,0xD2000000,0xADFC0000,0x1BC0001,0x9FFC0000,0xD1F80000,0xDE000000,0x9FFC0000,0xD1F80000,0xDE000000,0xD1F80000,0xDE000000,0xDE000000,0x9FFC0000,0xD1F80000,0xDE000000,0xD1F80000,0xDE000000, -0xDE000000,0xD1F80000,0xDE000000,0xDE000000,0xDE000000,0x9FFC0000,0xD1F80000,0xDE000000,0xD1F80000,0xDE000000,0xDE000000,0xD1F80000,0xDE000000,0xDE000000,0xDE000000,0xD1F80000,0xDE000000,0xDE000000,0xDE000000,0xDE000000,0x63FC0000,0x1DC0000,0x1DC0000,0xB1FC0000,0xC9FC0000,0xD7F80000,0xDE000000,0xDE000000,0x89FC0000,0xBBFC0000,0xDDEC0000,0xDE000000, -0xC3FC0000,0x19C0274,0xFD94010B,0xE99000F4,0xDF9000F3,0xFD88009A,0xEB88004F,0xE1880067,0xE3880092,0xDF84004E,0xD9840092,0xFD7C00F5,0xEF7C0033,0xE1800051,0xE778004A,0xDF7C0002,0xD97C004D,0xDF7800F4,0xDB740053,0xD7740069,0xD37800F5,0x6BFC0274,0xF56400F3,0xDF7C00F4,0xEB600092,0xDF6C0049,0xD9700090,0xE95000F3,0xDF580032,0xD958004D,0xD36400F4,0xB7F80274, -0xDF3000F3,0xD92C0090,0xD32000F4,0xCC000278,0xFF8C0114,0xF59801FA,0xF79C0217,0xFF800026,0xF57C0002,0xE77C0002,0xDF7C0006,0xDD780006,0xFF880104,0xFF78001B,0xE1740035,0xD958004D,0xA3FC0274,0x1A800F3,0xFF9C0033,0xE79C0032,0xDF9C0032,0xF7940049,0xE9900002,0xDF940005,0xE1900049,0xDD90000E,0xD9900049,0x7DFC00F3,0xEF7C0032,0xDF8C0032,0xE7780049,0xDF7C0001, -0xD9800049,0xBFFC00F3,0xDF540032,0xD9500048,0xD20000F4,0x7DFC00F3,0xEF7C0032,0xDF8C0032,0xE7780049,0xDF7C0001,0xD9800049,0xBFFC00F3,0xDF540032,0xD9500048,0xD20000F4,0xBFFC00F3,0xDF540032,0xD9500048,0xD20000F4,0xD20000F4,0xFB980075,0xFBA400B9,0xFBA400B6,0xFF880013,0xF57C0001,0xE77C0001,0xDF840001,0xDF740004,0xFF900071,0xFD7C000D,0xE1700033,0xD9500048, -0xAFFC00F3,0x19000F3,0x19000F3,0x19000F3,0x19000F3,0xF188004A,0xF188004A,0xF188004A,0xDB84004A,0xDB84004A,0xD384004A,0xEF7C0033,0xEF7C0033,0xEF7C0033,0xDD7C0002,0xDD7C0002,0xD57C000E,0xD7780033,0xD7780033,0xD3780005,0xCD780035,0x59FC00F3,0x59FC00F3,0x59FC00F3,0xDF6C0049,0xDF6C0049,0xD374004A,0xDD5C0032,0xDD5C0032,0xD3640001,0xCF680034,0xADFC00F3, -0xADFC00F3,0xD3440048,0xCD380034,0xC80000F4,0xFF840062,0xFF8C00AB,0x19000F3,0xFF80000D,0xF17C0002,0xE57C0002,0xE37C0003,0xDB780001,0xFF800055,0xFF740006,0xDD780033,0xD3640001,0x99FC00F3,0x19C0032,0x19C0032,0x19C0032,0x19C0032,0xE5940001,0xE5940001,0xE5940001,0xD7940001,0xD7940001,0xD3900001,0x6BFC0032,0x6BFC0032,0x6BFC0032,0xDB800001,0xDB800001, -0xD3880000,0xB7F80032,0xB7F80032,0xD3680000,0xCC000034,0x6BFC0032,0x6BFC0032,0x6BFC0032,0xDB800001,0xDB800001,0xD3880000,0xB7F80032,0xB7F80032,0xD3680000,0xCC000034,0xB7F80032,0xB7F80032,0xD3680000,0xCC000034,0xCC000034,0xFF90000D,0xF5980019,0x19C0032,0xFD840002,0xEB840001,0xE3800000,0xDD840001,0xDB780000,0xFD880012,0xFB7C0002,0xA3FC0032,0xD3680000, -0xA3FC0032,0x1B0004A,0xF5A80001,0xE5A40001,0xDFA40001,0x8DFC0048,0xE9900001,0xDF980001,0xC7FC0048,0xDF740000,0xD8000048,0x8DFC0048,0xE9900001,0xDF980001,0xC7FC0048,0xDF740000,0xD8000048,0xC7FC0048,0xDF740000,0xD8000048,0xD8000048,0x8DFC0048,0xE9900001,0xDF980001,0xC7FC0048,0xDF740000,0xD8000048,0xC7FC0048,0xDF740000,0xD8000048,0xD8000048,0xC7FC0048, -0xDF740000,0xD8000048,0xD8000048,0xD8000048,0xF7A40029,0xFCC0048,0xFFAC002D,0xFF8C0008,0xF5780001,0xE9740000,0xDF840000,0xDF600000,0xFF980022,0xFD800005,0xE1880000,0xD8000048,0xB9FC0048,0x184004A,0x184004A,0x184004A,0x184004A,0x184004A,0x184004A,0x184004A,0x184004A,0x184004A,0x184004A,0xDF7C0001,0xDF7C0001,0xDF7C0001,0xDF7C0001,0xDF7C0001, -0xDF7C0001,0xCF780001,0xCF780001,0xCF780001,0xC9780001,0x49FC0048,0x49FC0048,0x49FC0048,0x49FC0048,0x49FC0048,0x49FC0048,0xD3640001,0xD3640001,0xD3640001,0xC96C0001,0xA7F80048,0xA7F80048,0xA7F80048,0xC9480000,0xC2000048,0xF9800022,0x184004A,0x184004A,0xFB7C0005,0xF37C0001,0xE77C0001,0xE77C0001,0xD97C0001,0xF57C0019,0xFD740002,0xD3780001,0xD3640001, -0x8FFC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table58[] = { -0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0xA9FC0000, -0xA9FC0000,0xA9FC0000,0xA9FC0000,0xC4000000,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1A40000,0x1A40000,0x1A40000,0x4FFC0000,0x93FC0000,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x75FC0000,0x75FC0000,0x75FC0000,0x75FC0000,0x75FC0000, -0x75FC0000,0xBBFC0000,0xBBFC0000,0xBBFC0000,0xD0000000,0x75FC0000,0x75FC0000,0x75FC0000,0x75FC0000,0x75FC0000,0x75FC0000,0xBBFC0000,0xBBFC0000,0xBBFC0000,0xD0000000,0xBBFC0000,0xBBFC0000,0xBBFC0000,0xD0000000,0xD0000000,0x7BC0000,0x1A00001,0x1A00001,0x5E40000,0x1FFC0000,0x53FC0000,0x53FC0000,0x8FFC0000,0x5E40000,0x1FFC0000,0xA9FC0000,0xBBFC0000, -0xA9FC0000,0x1B40001,0x1B40001,0x1B40001,0x1B40001,0x93FC0000,0x93FC0000,0x93FC0000,0xCBF80000,0xCBF80000,0xDA000000,0x93FC0000,0x93FC0000,0x93FC0000,0xCBF80000,0xCBF80000,0xDA000000,0xCBF80000,0xCBF80000,0xDA000000,0xDA000000,0x93FC0000,0x93FC0000,0x93FC0000,0xCBF80000,0xCBF80000,0xDA000000,0xCBF80000,0xCBF80000,0xDA000000,0xDA000000,0xCBF80000, -0xCBF80000,0xDA000000,0xDA000000,0xDA000000,0x3FC0000,0x1D40000,0x1B40001,0x79FC0000,0xA7FC0000,0xBDF80000,0xC3FC0000,0xCFF80000,0x51FC0000,0x93FC0000,0xBDF80000,0xDA000000,0xBDF80000,0x1CC0001,0xB7FC0000,0xDDF40000,0xE6000000,0xB7FC0000,0xDDF40000,0xE6000000,0xDDF40000,0xE6000000,0xE6000000,0xB7FC0000,0xDDF40000,0xE6000000,0xDDF40000,0xE6000000, -0xE6000000,0xDDF40000,0xE6000000,0xE6000000,0xE6000000,0xB7FC0000,0xDDF40000,0xE6000000,0xDDF40000,0xE6000000,0xE6000000,0xDDF40000,0xE6000000,0xE6000000,0xE6000000,0xDDF40000,0xE6000000,0xE6000000,0xE6000000,0xE6000000,0x8BFC0000,0x1EC0000,0x1EC0000,0xC5FC0000,0xD7FC0000,0xE1FC0000,0xE6000000,0xE6000000,0xA7FC0000,0xCDFC0000,0xE5FC0000,0xE6000000, -0xD3FC0000,0x1AC0274,0xFFA4011F,0xF1A000F4,0xE7A000F3,0xFF9800B2,0xF398004F,0xE9980067,0xEB980092,0xE794004E,0xE1940092,0xFF900104,0xF78C0033,0xE9900051,0xEF88004A,0xE78C0002,0xE18C004D,0xE78800F4,0xE3840053,0xDF840069,0xDB8800F5,0x83FC0274,0xFD7400F3,0xE78C00F4,0xF3700092,0xE77C0049,0xE1800090,0xF16000F3,0xE7680032,0xE168004D,0xDB7400F4,0xC3F80274, -0xE74000F3,0xE13C0090,0xDB3000F4,0xD4000278,0xFFA00136,0xFDA801FA,0xFFAC0217,0xFF940053,0xFD8C0002,0xEF8C0002,0xE78C0006,0xE5880006,0xFF98012A,0xFF8C0042,0xE9840035,0xE168004D,0xB3FC0274,0x1B800F3,0xFFB0003E,0xEFAC0032,0xE7AC0032,0xFFA40049,0xF1A00002,0xE7A40005,0xE9A00049,0xE5A0000E,0xE1A00049,0x95FC00F3,0xF78C0032,0xE79C0032,0xEF880049,0xE78C0001, -0xE1900049,0xCBFC00F3,0xE7640032,0xE1600048,0xDA0000F4,0x95FC00F3,0xF78C0032,0xE79C0032,0xEF880049,0xE78C0001,0xE1900049,0xCBFC00F3,0xE7640032,0xE1600048,0xDA0000F4,0xCBFC00F3,0xE7640032,0xE1600048,0xDA0000F4,0xDA0000F4,0xFBAC0082,0xF3B400CB,0xF3B400CB,0xFF980029,0xFD8C0001,0xEF8C0001,0xE7940001,0xE7840004,0xFFA40082,0xFF90001E,0xE9800033,0xE1600048, -0xBFF800F3,0x1A000F3,0x1A000F3,0x1A000F3,0x1A000F3,0xF998004A,0xF998004A,0xF998004A,0xE394004A,0xE394004A,0xDB94004A,0xF78C0033,0xF78C0033,0xF78C0033,0xE58C0002,0xE58C0002,0xDD8C000E,0xDF880033,0xDF880033,0xDB880005,0xD5880035,0x71FC00F3,0x71FC00F3,0x71FC00F3,0xE77C0049,0xE77C0049,0xDB84004A,0xE56C0032,0xE56C0032,0xDB740001,0xD7780034,0xB9FC00F3, -0xB9FC00F3,0xDB540048,0xD5480034,0xD00000F4,0xFB980075,0xF9A000BA,0x1A000F3,0xFF90001A,0xF98C0002,0xED8C0002,0xEB8C0003,0xE3880001,0xFF940064,0xFD8C0019,0xE5880033,0xDB740001,0xA7FC00F3,0x1AC0032,0x1AC0032,0x1AC0032,0x1AC0032,0xEDA40001,0xEDA40001,0xEDA40001,0xDFA40001,0xDFA40001,0xDBA00001,0x83FC0032,0x83FC0032,0x83FC0032,0xE3900001,0xE3900001, -0xDB980000,0xC3F80032,0xC3F80032,0xDB780000,0xD4000034,0x83FC0032,0x83FC0032,0x83FC0032,0xE3900001,0xE3900001,0xDB980000,0xC3F80032,0xC3F80032,0xDB780000,0xD4000034,0xC3F80032,0xC3F80032,0xDB780000,0xD4000034,0xD4000034,0xFFA00014,0xFDA80019,0x1AC0032,0xFF980005,0xF3940001,0xEB900000,0xE5940001,0xE3880000,0xF5A00019,0xFB900005,0xB3FC0032,0xDB780000, -0xB3FC0032,0x1C0004A,0xFDB80001,0xEDB40001,0xE7B40001,0xA5FC0048,0xF1A00001,0xE7A80001,0xD3FC0048,0xE7840000,0xE0000048,0xA5FC0048,0xF1A00001,0xE7A80001,0xD3FC0048,0xE7840000,0xE0000048,0xD3FC0048,0xE7840000,0xE0000048,0xE0000048,0xA5FC0048,0xF1A00001,0xE7A80001,0xD3FC0048,0xE7840000,0xE0000048,0xD3FC0048,0xE7840000,0xE0000048,0xE0000048,0xD3FC0048, -0xE7840000,0xE0000048,0xE0000048,0xE0000048,0xFFB40029,0x1E00048,0xF9C00032,0xFFA00011,0xFD880001,0xF1840000,0xE7940000,0xE7700000,0xFFAC002D,0xFD9C000D,0xE9980000,0xE0000048,0xC7FC0048,0x194004A,0x194004A,0x194004A,0x194004A,0x194004A,0x194004A,0x194004A,0x194004A,0x194004A,0x194004A,0xE78C0001,0xE78C0001,0xE78C0001,0xE78C0001,0xE78C0001, -0xE78C0001,0xD7880001,0xD7880001,0xD7880001,0xD1880001,0x63FC0048,0x63FC0048,0x63FC0048,0x63FC0048,0x63FC0048,0x63FC0048,0xDB740001,0xDB740001,0xDB740001,0xD17C0001,0xB3F80048,0xB3F80048,0xB3F80048,0xD1580000,0xCA000048,0xF3940029,0x194004A,0x194004A,0xFD8C000A,0xFB8C0001,0xEF8C0001,0xEF8C0001,0xE18C0001,0xFD8C0019,0xF9880008,0xDB880001,0xDB740001, -0x9FF80048,}; -static const uint32_t g_etc1_to_bc7_m6_table59[] = { -0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0xB5FC0000, -0xB5FC0000,0xB5FC0000,0xB5FC0000,0xCC000000,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x3B40000,0x3B40000,0x3B40000,0x69FC0000,0xA1FC0000,0x1B00001,0x1B00001,0x1B00001,0x1B00001,0x1B00001,0x1B00001,0x1B00001,0x1B00001,0x1B00001,0x1B00001,0x8DFC0000,0x8DFC0000,0x8DFC0000,0x8DFC0000,0x8DFC0000, -0x8DFC0000,0xC7FC0000,0xC7FC0000,0xC7FC0000,0xD8000000,0x8DFC0000,0x8DFC0000,0x8DFC0000,0x8DFC0000,0x8DFC0000,0x8DFC0000,0xC7FC0000,0xC7FC0000,0xC7FC0000,0xD8000000,0xC7FC0000,0xC7FC0000,0xC7FC0000,0xD8000000,0xD8000000,0xFCC0000,0x1B00001,0x1B00001,0x1F80000,0x47FC0000,0x71FC0000,0x71FC0000,0xA3FC0000,0x1F80000,0x47FC0000,0xB9FC0000,0xC7FC0000, -0xB9FC0000,0x1C40001,0x1C40001,0x1C40001,0x1C40001,0xABFC0000,0xABFC0000,0xABFC0000,0xD7F80000,0xD7F80000,0xE2000000,0xABFC0000,0xABFC0000,0xABFC0000,0xD7F80000,0xD7F80000,0xE2000000,0xD7F80000,0xD7F80000,0xE2000000,0xE2000000,0xABFC0000,0xABFC0000,0xABFC0000,0xD7F80000,0xD7F80000,0xE2000000,0xD7F80000,0xD7F80000,0xE2000000,0xE2000000,0xD7F80000, -0xD7F80000,0xE2000000,0xE2000000,0xE2000000,0x3BFC0000,0x1E40000,0x1C40001,0x97FC0000,0xBBFC0000,0xCBFC0000,0xD1FC0000,0xD9FC0000,0x77FC0000,0xABFC0000,0xCBFC0000,0xE2000000,0xCBFC0000,0x1DC0001,0xCFFC0000,0xE7FC0000,0xEE000000,0xCFFC0000,0xE7FC0000,0xEE000000,0xE7FC0000,0xEE000000,0xEE000000,0xCFFC0000,0xE7FC0000,0xEE000000,0xE7FC0000,0xEE000000, -0xEE000000,0xE7FC0000,0xEE000000,0xEE000000,0xEE000000,0xCFFC0000,0xE7FC0000,0xEE000000,0xE7FC0000,0xEE000000,0xEE000000,0xE7FC0000,0xEE000000,0xEE000000,0xEE000000,0xE7FC0000,0xEE000000,0xEE000000,0xEE000000,0xEE000000,0xB3FC0000,0x7FC0000,0x7FC0000,0xD9FC0000,0xE5FC0000,0xEBFC0000,0xEE000000,0xEE000000,0xC5FC0000,0xDDFC0000,0xEFD00000,0xEE000000, -0xE1FC0000,0x1BC0274,0xFFB4014C,0xF9B000F4,0xEFB000F3,0xFFB000E2,0xFBA8004F,0xF1A80067,0xF3A80092,0xEFA4004E,0xE9A40092,0xFFA40121,0xFF9C0033,0xF1A00051,0xF798004A,0xEF9C0002,0xE99C004D,0xEF9800F4,0xEB940053,0xE7940069,0xE39800F5,0x9BFC0274,0xFD9000FD,0xEF9C00F4,0xFB800092,0xEF8C0049,0xE9900090,0xF97000F3,0xEF780032,0xE978004D,0xE38400F4,0xCFF80274, -0xEF5000F3,0xE94C0090,0xE34000F4,0xDC000278,0xFFB4016B,0xF5B80224,0xF7BC0234,0xFFAC009E,0xFF9C0013,0xF79C0002,0xEF9C0006,0xED980006,0xFDB0016B,0xFFA00086,0xF1940035,0xE978004D,0xC1FC0274,0x1C800F3,0xFDC00053,0xF7BC0032,0xEFBC0032,0xFDB80059,0xF9B00002,0xEFB40005,0xF1B00049,0xEDB0000E,0xE9B00049,0xAFFC00F3,0xFF9C0032,0xEFAC0032,0xF7980049,0xEF9C0001, -0xE9A00049,0xD7FC00F3,0xEF740032,0xE9700048,0xE20000F4,0xAFFC00F3,0xFF9C0032,0xEFAC0032,0xF7980049,0xEF9C0001,0xE9A00049,0xD7FC00F3,0xEF740032,0xE9700048,0xE20000F4,0xD7FC00F3,0xEF740032,0xE9700048,0xE20000F4,0xE20000F4,0xFFBC0095,0xFBC400CB,0xFBC400CB,0xFFB00042,0xFFA40009,0xF79C0001,0xEFA40001,0xEF940004,0xFFB4009E,0xFFAC003D,0xF1900033,0xE9700048, -0xCDFC00F3,0x1B000F3,0x1B000F3,0x1B000F3,0x1B000F3,0xFFA8004B,0xFFA8004B,0xFFA8004B,0xEBA4004A,0xEBA4004A,0xE3A4004A,0xFF9C0033,0xFF9C0033,0xFF9C0033,0xED9C0002,0xED9C0002,0xE59C000E,0xE7980033,0xE7980033,0xE3980005,0xDD980035,0x89FC00F3,0x89FC00F3,0x89FC00F3,0xEF8C0049,0xEF8C0049,0xE394004A,0xED7C0032,0xED7C0032,0xE3840001,0xDF880034,0xC5FC00F3, -0xC5FC00F3,0xE3640048,0xDD580034,0xD80000F4,0xFFAC0082,0xFFAC00BE,0x1B000F3,0xFFA40033,0xFF9C0003,0xF59C0002,0xF39C0003,0xEB980001,0xFFA00079,0xFF9C002A,0xED980033,0xE3840001,0xB7FC00F3,0x1BC0032,0x1BC0032,0x1BC0032,0x1BC0032,0xF5B40001,0xF5B40001,0xF5B40001,0xE7B40001,0xE7B40001,0xE3B00001,0x9BFC0032,0x9BFC0032,0x9BFC0032,0xEBA00001,0xEBA00001, -0xE3A80000,0xCFF80032,0xCFF80032,0xE3880000,0xDC000034,0x9BFC0032,0x9BFC0032,0x9BFC0032,0xEBA00001,0xEBA00001,0xE3A80000,0xCFF80032,0xCFF80032,0xE3880000,0xDC000034,0xCFF80032,0xCFF80032,0xE3880000,0xDC000034,0xDC000034,0xFBB40019,0xF5B80022,0x1BC0032,0xFBAC000D,0xFBA40001,0xF3A00000,0xEDA40001,0xEB980000,0xFDB00019,0xFFA40008,0xC1FC0032,0xE3880000, -0xC1FC0032,0x1D0004A,0xFFC80005,0xF5C40001,0xEFC40001,0xBDFC0048,0xF9B00001,0xEFB80001,0xDFF80048,0xEF940000,0xE8000048,0xBDFC0048,0xF9B00001,0xEFB80001,0xDFF80048,0xEF940000,0xE8000048,0xDFF80048,0xEF940000,0xE8000048,0xE8000048,0xBDFC0048,0xF9B00001,0xEFB80001,0xDFF80048,0xEF940000,0xE8000048,0xDFF80048,0xEF940000,0xE8000048,0xE8000048,0xDFF80048, -0xEF940000,0xE8000048,0xE8000048,0xE8000048,0xFBC80034,0x1F00048,0xFFCC003A,0xFDBC0019,0xFFA40005,0xF9940000,0xEFA40000,0xEF800000,0xF9C80032,0xFBB80019,0xF1A80000,0xE8000048,0xD7FC0048,0x1A4004A,0x1A4004A,0x1A4004A,0x1A4004A,0x1A4004A,0x1A4004A,0x1A4004A,0x1A4004A,0x1A4004A,0x1A4004A,0xEF9C0001,0xEF9C0001,0xEF9C0001,0xEF9C0001,0xEF9C0001, -0xEF9C0001,0xDF980001,0xDF980001,0xDF980001,0xD9980001,0x7BFC0048,0x7BFC0048,0x7BFC0048,0x7BFC0048,0x7BFC0048,0x7BFC0048,0xE3840001,0xE3840001,0xE3840001,0xD98C0001,0xBFF80048,0xBFF80048,0xBFF80048,0xD9680000,0xD2000048,0xFBA40029,0x1A4004A,0x1A4004A,0xFFA0000D,0xFD9C0002,0xF79C0001,0xF79C0001,0xE99C0001,0xFD9C0020,0xFB98000D,0xE3980001,0xE3840001, -0xADFC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table60[] = { -0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0xC3F80000, -0xC3F80000,0xC3F80000,0xC3F80000,0xD4000001,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1C80000,0x1C80000,0x1C80000,0x83FC0000,0xB3FC0000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000, -0xA9FC0000,0xD5F80000,0xD5F80000,0xD5F80000,0xE0000001,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xD5F80000,0xD5F80000,0xD5F80000,0xE0000001,0xD5F80000,0xD5F80000,0xD5F80000,0xE0000001,0xE0000001,0x9E00000,0x1C40000,0x1C40000,0x35FC0000,0x73FC0000,0x93FC0000,0x93FC0000,0xB9FC0000,0x35FC0000,0x73FC0000,0xC9FC0000,0xD5F80000, -0xC9FC0000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0xC7FC0000,0xC7FC0000,0xC7FC0000,0xE3FC0000,0xE3FC0000,0xEA000001,0xC7FC0000,0xC7FC0000,0xC7FC0000,0xE3FC0000,0xE3FC0000,0xEA000001,0xE3FC0000,0xE3FC0000,0xEA000001,0xEA000001,0xC7FC0000,0xC7FC0000,0xC7FC0000,0xE3FC0000,0xE3FC0000,0xEA000001,0xE3FC0000,0xE3FC0000,0xEA000001,0xEA000001,0xE3FC0000, -0xE3FC0000,0xEA000001,0xEA000001,0xEA000001,0x7BFC0000,0x1F80000,0x1D80000,0xB9FC0000,0xD1FC0000,0xDDF80000,0xDFFC0000,0xE5FC0000,0xA3FC0000,0xC7FC0000,0xDDF80000,0xEA000001,0xDDF80000,0x1F00000,0xEBFC0000,0xF5FC0000,0xF6000001,0xEBFC0000,0xF5FC0000,0xF6000001,0xF5FC0000,0xF6000001,0xF6000001,0xEBFC0000,0xF5FC0000,0xF6000001,0xF5FC0000,0xF6000001, -0xF6000001,0xF5FC0000,0xF6000001,0xF6000001,0xF6000001,0xEBFC0000,0xF5FC0000,0xF6000001,0xF5FC0000,0xF6000001,0xF6000001,0xF5FC0000,0xF6000001,0xF6000001,0xF6000001,0xF5FC0000,0xF6000001,0xF6000001,0xF6000001,0xF6000001,0xDFFC0000,0x97FC0000,0x97FC0000,0xEFFC0000,0xF5F80000,0xF7F40000,0xF6000001,0xF6000001,0xE7FC0000,0xF1FC0000,0xF9C40000,0xF6000001, -0xF3FC0000,0x1CC0278,0xFFC80181,0xFFC400FD,0xF7C400F5,0xFFC40120,0xFFBC0069,0xFBBC0069,0xFDB80090,0xF7B8004D,0xF1B80092,0xFFBC015B,0xFFB4005D,0xFBB40053,0xFFAC0049,0xF7AC0002,0xF3AC004E,0xF7AC00F4,0xF5A80051,0xF1A80067,0xEDAC00F3,0xB7FC0274,0xFFAC011F,0xF7AC00F4,0xFF9C00AA,0xF99C004A,0xF1A40092,0xFF8800F5,0xF78C0033,0xF18C004F,0xED9400F4,0xDDF40274, -0xF76800F4,0xF1600092,0xED4C00F3,0xE6000274,0xFFC401B8,0xFFCC0220,0xFFCC0234,0xFFC000F1,0xFFB40059,0xFFAC0002,0xF9B00006,0xF7AC0006,0xFFC401A8,0xFFB800EA,0xF9A80035,0xF18C004F,0xD3FC0274,0x1D800F4,0xFFD00074,0xFFCC0035,0xF7CC0035,0xFFD00074,0xFFC40009,0xF9C40005,0xF9C4004A,0xF7C0000E,0xF1C4004A,0xC9FC00F3,0xFFB80042,0xF9BC0033,0xFFAC0049,0xF7AC0002, -0xF1B4004A,0xE5F800F3,0xF78C0033,0xF184004A,0xEC0000F3,0xC9FC00F3,0xFFB80042,0xF9BC0033,0xFFAC0049,0xF7AC0002,0xF1B4004A,0xE5F800F3,0xF78C0033,0xF184004A,0xEC0000F3,0xE5F800F3,0xF78C0033,0xF184004A,0xEC0000F3,0xEC0000F3,0xFFD000AB,0xF5D800DE,0xF5D800DD,0xFFC80076,0xFFBC002E,0xFFAC0002,0xF9B40001,0xF7A40003,0xFDD000B5,0xFFC80063,0xFBA00032,0xF184004A, -0xDFF800F3,0x1C400F4,0x1C400F4,0x1C400F4,0x1C400F4,0xFFBC0059,0xFFBC0059,0xFFBC0059,0xF3B80049,0xF3B80049,0xEBB80049,0xFFB0003E,0xFFB0003E,0xFFB0003E,0xF7AC0001,0xF7AC0001,0xEDB0000E,0xEFAC0032,0xEFAC0032,0xEBAC0005,0xE7AC0032,0xA5FC00F3,0xA5FC00F3,0xA5FC00F3,0xF99C0049,0xF99C0049,0xEBA80049,0xF78C0032,0xF78C0032,0xEB980002,0xE79C0032,0xD3FC00F3, -0xD3FC00F3,0xEB780049,0xE7640032,0xE00000F3,0xFFBC0095,0xF9C000CC,0x1C400F4,0xFDB80056,0xFFB00018,0xFFAC0001,0xFBAC0004,0xF3AC0001,0xFFB40091,0xFFB00045,0xF7A80032,0xEB980002,0xC7FC00F3,0x1CC0034,0x1CC0034,0x1CC0034,0x1CC0034,0xFFC40000,0xFFC40000,0xFFC40000,0xF1C40000,0xF1C40000,0xEBC40001,0xB7FC0032,0xB7FC0032,0xB7FC0032,0xF3B40001,0xF3B40001, -0xEBBC0001,0xDDF40032,0xDDF40032,0xEBA00001,0xE6000032,0xB7FC0032,0xB7FC0032,0xB7FC0032,0xF3B40001,0xF3B40001,0xEBBC0001,0xDDF40032,0xDDF40032,0xEBA00001,0xE6000032,0xDDF40032,0xDDF40032,0xEBA00001,0xE6000032,0xE6000032,0xFDC80020,0xFFCC0020,0x1CC0034,0xFDC00014,0xFDBC0005,0xFDB00000,0xF9B40000,0xF3AC0001,0xFFC40020,0xFBC00012,0xD3FC0032,0xEBA00001, -0xD3FC0032,0x1E40048,0xFFDC0014,0xFDD80001,0xF7D80001,0xD9FC0048,0xFFC80005,0xF7CC0001,0xEDF80048,0xF7A80001,0xF000004A,0xD9FC0048,0xFFC80005,0xF7CC0001,0xEDF80048,0xF7A80001,0xF000004A,0xEDF80048,0xF7A80001,0xF000004A,0xF000004A,0xD9FC0048,0xFFC80005,0xF7CC0001,0xEDF80048,0xF7A80001,0xF000004A,0xEDF80048,0xF7A80001,0xF000004A,0xF000004A,0xEDF80048, -0xF7A80001,0xF000004A,0xF000004A,0xF000004A,0xF5E4003D,0x37FC0048,0xFBE4003D,0xFFD40029,0xFDCC0019,0xFFAC0001,0xF7B80001,0xF7980001,0xFFDC0032,0xFFCC0028,0xF9BC0001,0xF000004A,0xE7FC0048,0x1B80048,0x1B80048,0x1B80048,0x1B80048,0x1B80048,0x1B80048,0x1B80048,0x1B80048,0x1B80048,0x1B80048,0xFBAC0000,0xFBAC0000,0xFBAC0000,0xFBAC0000,0xFBAC0000, -0xFBAC0000,0xE7AC0001,0xE7AC0001,0xE7AC0001,0xE1AC0001,0x95FC0048,0x95FC0048,0x95FC0048,0x95FC0048,0x95FC0048,0x95FC0048,0xEB980001,0xEB980001,0xEB980001,0xE1A00001,0xCBFC0048,0xCBFC0048,0xCBFC0048,0xE17C0001,0xDA00004A,0xF3B40034,0x1B80048,0x1B80048,0xFBB40019,0xFDB00008,0xFFAC0001,0xFFAC0001,0xF3AC0000,0xF9B00029,0xFBAC0014,0xEFA80000,0xEB980001, -0xBFF80048,}; -static const uint32_t g_etc1_to_bc7_m6_table61[] = { -0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0xCFF80000, -0xCFF80000,0xCFF80000,0xCFF80000,0xDC000001,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x5D80000,0x5D80000,0x5D80000,0x9BFC0000,0xC1FC0000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000, -0xC1FC0000,0xE1F80000,0xE1F80000,0xE1F80000,0xE8000001,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xE1F80000,0xE1F80000,0xE1F80000,0xE8000001,0xE1F80000,0xE1F80000,0xE1F80000,0xE8000001,0xE8000001,0x1F40000,0x1D40000,0x1D40000,0x6DFC0000,0x9BFC0000,0xB1FC0000,0xB1FC0000,0xCDFC0000,0x6DFC0000,0x9BFC0000,0xD9FC0000,0xE1F80000, -0xD9FC0000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0xDFFC0000,0xDFFC0000,0xDFFC0000,0xEFFC0000,0xEFFC0000,0xF2000001,0xDFFC0000,0xDFFC0000,0xDFFC0000,0xEFFC0000,0xEFFC0000,0xF2000001,0xEFFC0000,0xEFFC0000,0xF2000001,0xF2000001,0xDFFC0000,0xDFFC0000,0xDFFC0000,0xEFFC0000,0xEFFC0000,0xF2000001,0xEFFC0000,0xEFFC0000,0xF2000001,0xF2000001,0xEFFC0000, -0xEFFC0000,0xF2000001,0xF2000001,0xF2000001,0xB5FC0000,0x57FC0000,0x1E80000,0xD7FC0000,0xE5FC0000,0xEBFC0000,0xEDFC0000,0xF1F80000,0xCBFC0000,0xDFFC0000,0xEBFC0000,0xF2000001,0xEBFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1DC0255,0xFFDC01AD,0xFFD40125,0xFFD400F4,0xFFD00159,0xFFCC00B4,0xFFCC0074,0xFFCC0099,0xFFC8004C,0xF9C80085,0xFFD00179,0xFFC800AF,0xFFC40054,0xFFC00069,0xFFBC0001,0xF9C00045,0xFFBC00DD,0xFBBC004A,0xF9B8005A,0xF5BC00DE,0xCFFC0253,0xFFC00158,0xFFBC00F3,0xFFB800D3,0xFFAC004E,0xF9B40085,0xFFAC0106,0xFF9C0032,0xF99C0042,0xF5A400DF,0xE7FC0253, -0xFF7800F3,0xF9700085,0xF55C00DE,0xEE000253,0xFFD801C7,0xF7DC0229,0xF7DC0234,0xFFD40139,0xFFCC00B5,0xFFC0003C,0xFFC0000C,0xFFBC0005,0xFFD401B7,0xFFD00122,0xFFB80036,0xF99C0042,0xE1FC0253,0x1E800DD,0xFDE40095,0xFFE0004D,0xFFDC0034,0xFFDC0089,0xFFD8002C,0xFFD80008,0xFFD40041,0xFDD0000C,0xF9D4003D,0xDFFC00DD,0xFFD80068,0xFFCC0034,0xFFCC0059,0xFFBC0001, -0xF9C4003D,0xEFFC00DD,0xFF9C0032,0xF994003D,0xF40000DE,0xDFFC00DD,0xFFD80068,0xFFCC0034,0xFFCC0059,0xFFBC0001,0xF9C4003D,0xEFFC00DD,0xFF9C0032,0xF994003D,0xF40000DE,0xEFFC00DD,0xFF9C0032,0xF994003D,0xF40000DE,0xF40000DE,0xFFE400B5,0xFDE800C9,0xFDE800C8,0xFFDC0089,0xFFD40062,0xFFC8001E,0xFFC40001,0xFFB40002,0xFFE000BA,0xFFDC0082,0xFFB80036,0xF994003D, -0xEBFC00DD,0x1D400F4,0x1D400F4,0x1D400F4,0x1D400F4,0xFDCC0074,0xFDCC0074,0xFDCC0074,0xFBC80049,0xFBC80049,0xF3C80049,0xFFC40054,0xFFC40054,0xFFC40054,0xFFBC0001,0xFFBC0001,0xF5C0000E,0xF7BC0032,0xF7BC0032,0xF3BC0005,0xEFBC0032,0xBDFC00F3,0xBDFC00F3,0xBDFC00F3,0xFDB4004E,0xFDB4004E,0xF3B80049,0xFF9C0032,0xFF9C0032,0xF3A80002,0xEFAC0032,0xDFF800F3, -0xDFF800F3,0xF3880049,0xEF740032,0xE80000F3,0xFFCC00A8,0xFFCC00E0,0x1D400F4,0xFFCC0071,0xFFC40038,0xFFC00018,0xFFC0000C,0xFBBC0001,0xFDCC00A3,0xFFC40068,0xFFB80032,0xF3A80002,0xD7FC00F3,0x1DC0034,0x1DC0034,0x1DC0034,0x1DC0034,0xFDD80008,0xFDD80008,0xFDD80008,0xF9D40000,0xF9D40000,0xF3D40001,0xCFFC0032,0xCFFC0032,0xCFFC0032,0xFBC40001,0xFBC40001, -0xF3CC0001,0xE7FC0032,0xE7FC0032,0xF3B00001,0xEE000032,0xCFFC0032,0xCFFC0032,0xCFFC0032,0xFBC40001,0xFBC40001,0xF3CC0001,0xE7FC0032,0xE7FC0032,0xF3B00001,0xEE000032,0xE7FC0032,0xE7FC0032,0xF3B00001,0xEE000032,0xEE000032,0xFFD80022,0xF7DC0029,0x1DC0034,0xFDD80019,0xFDD0000D,0xFFC80005,0xFFC40001,0xFBBC0001,0xF1DC0029,0xFBD40019,0xE1FC0032,0xF3B00001, -0xE1FC0032,0x1F4003D,0xFFEC0028,0xFFEC000D,0xFFE80000,0xEFFC003D,0xFFE40014,0xFFDC0000,0xF7F8003D,0xFFB80000,0xF800003D,0xEFFC003D,0xFFE40014,0xFFDC0000,0xF7F8003D,0xFFB80000,0xF800003D,0xF7F8003D,0xFFB80000,0xF800003D,0xF800003D,0xEFFC003D,0xFFE40014,0xFFDC0000,0xF7F8003D,0xFFB80000,0xF800003D,0xF7F8003D,0xFFB80000,0xF800003D,0xF800003D,0xF7F8003D, -0xFFB80000,0xF800003D,0xF800003D,0xF800003D,0xFBF00034,0xA7FC003D,0xF3F4003D,0xFFEC0029,0xFFE80020,0xFFD40011,0xFFC80000,0xFFA80000,0xFFF00032,0xFFEC0032,0xFFD00001,0xF800003D,0xF5FC003D,0x1C80048,0x1C80048,0x1C80048,0x1C80048,0x1C80048,0x1C80048,0x1C80048,0x1C80048,0x1C80048,0x1C80048,0xFFBC0001,0xFFBC0001,0xFFBC0001,0xFFBC0001,0xFFBC0001, -0xFFBC0001,0xEFBC0001,0xEFBC0001,0xEFBC0001,0xE9BC0001,0xAFFC0048,0xAFFC0048,0xAFFC0048,0xAFFC0048,0xAFFC0048,0xAFFC0048,0xF3A80001,0xF3A80001,0xF3A80001,0xE9B00001,0xD7FC0048,0xD7FC0048,0xD7FC0048,0xE98C0001,0xE200004A,0xFBC40034,0x1C80048,0x1C80048,0xFBC40020,0xFDC00014,0xFDC00008,0xFDC00008,0xFBBC0000,0xF5C40032,0xFFBC001D,0xF7B80000,0xF3A80001, -0xCDFC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table62[] = { -0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xDBF80000, -0xDBF80000,0xDBF80000,0xDBF80000,0xE4000001,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0xDE80000,0xDE80000,0xDE80000,0xB5FC0000,0xD1FC0000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000, -0xD9FC0000,0xEDF80000,0xEDF80000,0xEDF80000,0xF0000001,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xEDF80000,0xEDF80000,0xEDF80000,0xF0000001,0xEDF80000,0xEDF80000,0xEDF80000,0xF0000001,0xF0000001,0x37FC0000,0x1E40000,0x1E40000,0xA7FC0000,0xC1FC0000,0xCFFC0000,0xCFFC0000,0xE1FC0000,0xA7FC0000,0xC1FC0000,0xE7FC0000,0xEDF80000, -0xE7FC0000,0x1F80000,0x1F80000,0x1F80000,0x1F80000,0xF7FC0000,0xF7FC0000,0xF7FC0000,0xFBFC0000,0xFBFC0000,0xFA000001,0xF7FC0000,0xF7FC0000,0xF7FC0000,0xFBFC0000,0xFBFC0000,0xFA000001,0xFBFC0000,0xFBFC0000,0xFA000001,0xFA000001,0xF7FC0000,0xF7FC0000,0xF7FC0000,0xFBFC0000,0xFBFC0000,0xFA000001,0xFBFC0000,0xFBFC0000,0xFA000001,0xFA000001,0xFBFC0000, -0xFBFC0000,0xFA000001,0xFA000001,0xFA000001,0xEDFC0000,0xD7FC0000,0x1F80000,0xF5FC0000,0xF9FC0000,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xF3FC0000,0xF7FC0000,0xFBFC0000,0xFA000001,0xFBFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1E80181,0xFFE80139,0xFFE40104,0xFFE400F4,0xFFE000FD,0xFFE000B2,0xFFDC0090,0xFFD80075,0xFFD80051,0xFDD8004D,0xFFDC00FD,0xFFDC00A8,0xFFDC0084,0xFFD80051,0xFFD00021,0xFFD00011,0xFFCC0072,0xFFCC0032,0xFDCC000E,0xF9CC005E,0xE3FC017F,0xFFD80118,0xFFD400F3,0xFFCC00A9,0xFFCC0069,0xFDC8004D,0xFFC000AE,0xFFB80045,0xFDB40006,0xF9B8005E,0xF1F8017F, -0xFFA800F3,0xFD90004D,0xF97C005E,0xF400017F,0xFFE40135,0xFDE8015D,0xFDE8016C,0xFFE000F5,0xFFDC00A3,0xFFD40055,0xFFD00038,0xFFD0000E,0xFFE40142,0xFFDC00E3,0xFFD0004E,0xFDB40006,0xEDFC017F,0x1F4005D,0xFDF0004D,0xFFF00038,0xFFEC0034,0xFFEC003D,0xFFEC0022,0xFFE80014,0xFFE40011,0xFFE40001,0xFDE40005,0xF1FC005D,0xFFEC0043,0xFFE40034,0xFFE40021,0xFFD80009, -0xFDD80005,0xF9F8005D,0xFFCC0032,0xFDB40005,0xF800005E,0xF1FC005D,0xFFEC0043,0xFFE40034,0xFFE40021,0xFFD80009,0xFDD80005,0xF9F8005D,0xFFCC0032,0xFDB40005,0xF800005E,0xF9F8005D,0xFFCC0032,0xFDB40005,0xF800005E,0xF800005E,0xFDF00051,0xF3F4005D,0xF3F4005D,0xFFEC0042,0xFFE80030,0xFFE4001B,0xFFE0000D,0xFFD80008,0xFBF00051,0xFFE80044,0xFFDC0033,0xFDB40005, -0xF7FC005D,0x1E400F4,0x1E400F4,0x1E400F4,0x1E400F4,0xFFDC0090,0xFFDC0090,0xFFDC0090,0xFFD80051,0xFFD80051,0xFBD80049,0xFFDC0084,0xFFDC0084,0xFFDC0084,0xFFD00021,0xFFD00021,0xFDD0000E,0xFFCC0032,0xFFCC0032,0xFBCC0005,0xF7CC0032,0xD5FC00F3,0xD5FC00F3,0xD5FC00F3,0xFFCC0069,0xFFCC0069,0xFBC80049,0xFFB80045,0xFFB80045,0xFBB80002,0xF7BC0032,0xEBF800F3, -0xEBF800F3,0xFB980049,0xF7840032,0xF00000F3,0xFBE000C9,0xF9E000E1,0x1E400F4,0xFFDC0095,0xFFD80068,0xFFD40045,0xFFD00038,0xFFD0000E,0xFDE000CA,0xFFDC0092,0xFFD0004D,0xFBB80002,0xE5FC00F3,0x1EC0034,0x1EC0034,0x1EC0034,0x1EC0034,0xFFE80014,0xFFE80014,0xFFE80014,0xFFE40001,0xFFE40001,0xFBE40001,0xE9FC0032,0xE9FC0032,0xE9FC0032,0xFFD80009,0xFFD80009, -0xFBDC0001,0xF3FC0032,0xF3FC0032,0xFBC00001,0xF6000032,0xE9FC0032,0xE9FC0032,0xE9FC0032,0xFFD80009,0xFFD80009,0xFBDC0001,0xF3FC0032,0xF3FC0032,0xFBC00001,0xF6000032,0xF3FC0032,0xF3FC0032,0xFBC00001,0xF6000032,0xF6000032,0xFBEC0029,0xFFEC0029,0x1EC0034,0xF9EC0029,0xFDE80020,0xFFE40012,0xFFE0000D,0xFFD80008,0xF9EC0029,0xFFE80020,0xF1FC0032,0xFBC00001, -0xF1FC0032,0x1FC0005,0xFFF80004,0xFFF80001,0xFFF80000,0xFBFC0005,0xFFF80002,0xFFF40000,0xFDF80005,0xFFEC0000,0xFC000005,0xFBFC0005,0xFFF80002,0xFFF40000,0xFDF80005,0xFFEC0000,0xFC000005,0xFDF80005,0xFFEC0000,0xFC000005,0xFC000005,0xFBFC0005,0xFFF80002,0xFFF40000,0xFDF80005,0xFFEC0000,0xFC000005,0xFDF80005,0xFFEC0000,0xFC000005,0xFC000005,0xFDF80005, -0xFFEC0000,0xFC000005,0xFC000005,0xFC000005,0xFFF80004,0xE7FC0005,0xF7FC0005,0xFBFC0005,0xFFF80002,0xFFF00001,0xFFF00000,0xFFE40000,0xF9FC0005,0xFBFC0005,0xFFF00000,0xFC000005,0xFDF80005,0x1D80048,0x1D80048,0x1D80048,0x1D80048,0x1D80048,0x1D80048,0x1D80048,0x1D80048,0x1D80048,0x1D80048,0xFFD00008,0xFFD00008,0xFFD00008,0xFFD00008,0xFFD00008, -0xFFD00008,0xF7CC0001,0xF7CC0001,0xF7CC0001,0xF1CC0001,0xC7FC0048,0xC7FC0048,0xC7FC0048,0xC7FC0048,0xC7FC0048,0xC7FC0048,0xFBB80001,0xFBB80001,0xFBB80001,0xF1C00001,0xE3FC0048,0xE3FC0048,0xE3FC0048,0xF19C0001,0xEA00004A,0xF5D8003D,0x1D80048,0x1D80048,0xFDD40029,0xFFD0001D,0xFDD00014,0xFDD00014,0xFFCC0004,0xFDD40032,0xFFD00022,0xFFC80000,0xFBB80001, -0xDDF80048,}; -static const uint32_t g_etc1_to_bc7_m6_table63[] = { -0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xE7F80000, -0xE7F80000,0xE7F80000,0xE7F80000,0xEC000001,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1FC0000,0x1FC0000,0x1FC0000,0xCDFC0000,0xDFFC0000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0xF1FC0000,0xF1FC0000,0xF1FC0000,0xF1FC0000,0xF1FC0000, -0xF1FC0000,0xF9F80000,0xF9F80000,0xF9F80000,0xF8000001,0xF1FC0000,0xF1FC0000,0xF1FC0000,0xF1FC0000,0xF1FC0000,0xF1FC0000,0xF9F80000,0xF9F80000,0xF9F80000,0xF8000001,0xF9F80000,0xF9F80000,0xF9F80000,0xF8000001,0xF8000001,0xB7FC0000,0x1F40000,0x1F40000,0xDFFC0000,0xE9FC0000,0xEFFC0000,0xEFFC0000,0xF3FC0000,0xDFFC0000,0xE9FC0000,0xF7FC0000,0xF9F80000, -0xF7FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1F400C2,0xFDF000B2,0xFFF0009D,0xFFF00099,0xFFEC0098,0xFFEC007D,0xFFEC0074,0xFFEC0062,0xFFE80058,0xFFE80048,0xFFEC0089,0xFFE80069,0xFFE80059,0xFFE40040,0xFFE40030,0xFFE0001D,0xFFE40031,0xFFE0001D,0xFFDC0001,0xFDDC0011,0xEFFC00C2,0xFFEC00A7,0xFFE80099,0xFFE40070,0xFFE40060,0xFFDC0048,0xFFD80059,0xFFD80035,0xFFCC0009,0xFDCC0011,0xF7F800C2, -0xFFD40099,0xFFB80048,0xFD9C0011,0xF80000C2,0xFDF000B0,0xF3F400C2,0xF3F400C2,0xFFF0008E,0xFFEC006D,0xFFE8004A,0xFFE8003D,0xFFE40022,0xFFF000A2,0xFFF0008E,0xFFE0003F,0xFFCC0009,0xF5FC00C2,0x1FC0012,0xFFF80011,0xFFF8000E,0xFFF8000D,0xFFF8000C,0xFFF80009,0xFFF80008,0xFFF80006,0xFFF40004,0xFFF40000,0xFBFC0011,0xFFF8000E,0xFFF4000D,0xFFF00008,0xFFF00004, -0xFFF00000,0xFDF80011,0xFFEC000D,0xFFE00000,0xFC000011,0xFBFC0011,0xFFF8000E,0xFFF4000D,0xFFF00008,0xFFF00004,0xFFF00000,0xFDF80011,0xFFEC000D,0xFFE00000,0xFC000011,0xFDF80011,0xFFEC000D,0xFFE00000,0xFC000011,0xFC000011,0xFFF80011,0xF7FC0012,0xF7FC0012,0xFFF8000C,0xFFF4000E,0xFFF40006,0xFFF40005,0xFFEC0004,0xFFF8000C,0xFBFC0011,0xFFF0000D,0xFFE00000, -0xFDF80011,0x1F00099,0x1F00099,0x1F00099,0x1F00099,0xFFEC0074,0xFFEC0074,0xFFEC0074,0xFFE80058,0xFFE80058,0xFFE80048,0xFFE80059,0xFFE80059,0xFFE80059,0xFFE40030,0xFFE40030,0xFFE0001D,0xFFE0001D,0xFFE0001D,0xFFDC0001,0xFDDC000D,0xEBFC0099,0xEBFC0099,0xEBFC0099,0xFFE40060,0xFFE40060,0xFFDC0048,0xFFD80035,0xFFD80035,0xFFCC0009,0xFDCC000D,0xF5FC0099, -0xF5FC0099,0xFFB80048,0xFD9C000D,0xF600009A,0xFFEC0089,0xFFEC0090,0x1F00099,0xFFE80074,0xFFEC005D,0xFFE80046,0xFFE8003D,0xFFE40022,0xFBEC0089,0xFFE80072,0xFFE0003E,0xFFCC0009,0xF3FC0099,0x1F8000D,0x1F8000D,0x1F8000D,0x1F8000D,0xFFF80008,0xFFF80008,0xFFF80008,0xFFF40004,0xFFF40004,0xFFF40000,0xF7FC000D,0xF7FC000D,0xF7FC000D,0xFFF00004,0xFFF00004, -0xFFF00000,0xFBFC000D,0xFBFC000D,0xFFE00000,0xFC00000D,0xF7FC000D,0xF7FC000D,0xF7FC000D,0xFFF00004,0xFFF00004,0xFFF00000,0xFBFC000D,0xFBFC000D,0xFFE00000,0xFC00000D,0xFBFC000D,0xFBFC000D,0xFFE00000,0xFC00000D,0xFC00000D,0xF5FC000D,0xF5F8000D,0x1F8000D,0xFFF80008,0xFFF4000A,0xFFF40005,0xFFF40005,0xFFEC0004,0xFFF80008,0xFFF4000A,0xFBFC000D,0xFFE00000, -0xFBFC000D,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1E80048,0x1E80048,0x1E80048,0x1E80048,0x1E80048,0x1E80048,0x1E80048,0x1E80048,0x1E80048,0x1E80048,0xFFE0001D,0xFFE0001D,0xFFE0001D,0xFFE0001D,0xFFE0001D, -0xFFE0001D,0xFFDC0001,0xFFDC0001,0xFFDC0001,0xF9DC0001,0xDFFC0048,0xDFFC0048,0xDFFC0048,0xDFFC0048,0xDFFC0048,0xDFFC0048,0xFFCC0009,0xFFCC0009,0xFFCC0009,0xF9D00001,0xEFFC0048,0xEFFC0048,0xEFFC0048,0xF9AC0001,0xF200004A,0xFDE8003D,0x1E80048,0x1E80048,0xFDE40034,0xFFE40029,0xFFE00028,0xFFE00028,0xFFE00014,0xF9E8003D,0xFFE40032,0xFFDC000A,0xFFCC0009, -0xEBFC0048,}; -static const uint32_t g_etc1_to_bc7_m6_table64[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x100001,0x100001,0x100001,0x100001,0x2180000,0x2180000,0x2180000,0x300000,0x300000,0x8000000,0x2180000,0x2180000,0x2180000,0x300000,0x300000,0x8000000,0x300000,0x300000,0x8000000,0x8000000,0x2180000,0x2180000,0x2180000,0x300000,0x300000,0x8000000,0x300000,0x300000,0x8000000,0x8000000,0x300000, -0x300000,0x8000000,0x8000000,0x8000000,0x140000,0x140000,0x100001,0x180000,0x1C0000,0x240000,0x280000,0x3C0000,0x2140000,0x2180000,0x240000,0x8000000,0x240000,0x380001,0x540000,0xAC0000,0x1C000000,0x540000,0xAC0000,0x1C000000,0xAC0000,0x1C000000,0x1C000000,0x540000,0xAC0000,0x1C000000,0xAC0000,0x1C000000, -0x1C000000,0xAC0000,0x1C000000,0x1C000000,0x1C000000,0x540000,0xAC0000,0x1C000000,0xAC0000,0x1C000000,0x1C000000,0xAC0000,0x1C000000,0x1C000000,0x1C000000,0xAC0000,0x1C000000,0x1C000000,0x1C000000,0x1C000000,0x480000,0x63C0000,0x63C0000,0x600000,0x8C0000,0x1140000,0x1C000000,0x1C000000,0x24C0000,0x6C0000,0xFD00000,0x1C000000, -0x780000,0x140232,0x4E080039,0x28080039,0x1C040039,0x380000C8,0x28000011,0x1C000001,0x1C0000C8,0x16000048,0x120000C8,0x260001B9,0x22000096,0x18000051,0x18000110,0x16000088,0x120000EC,0x120001B9,0x1000010D,0x10000149,0xC0001B9,0x1C0232,0x1E000109,0x18000091,0x18000150,0x140000BB,0x12000110,0x120001DD,0x10000131,0xE000165,0xC0001C9,0x300232, -0x10000186,0xA0001AA,0xA0001F2,0x8000232,0x7400007B,0xFA0400D2,0xFE0C0082,0x3400007B,0x2200008A,0x1A00007B,0x16000061,0x140000B5,0x460000F1,0x280000BD,0x160000CB,0xE000165,0x240232,0x1801BA,0x4E080029,0x28080029,0x1C080029,0x380000C8,0x28000011,0x1C000001,0x1C0000C8,0x16000048,0x120000C8,0x22401B9,0x22000096,0x18000051,0x18000110,0x16000088, -0x120000EC,0x4C01B9,0x1000010D,0x10000149,0xC0001B9,0x22401B9,0x22000096,0x18000051,0x18000110,0x16000088,0x120000EC,0x4C01B9,0x1000010D,0x10000149,0xC0001B9,0x4C01B9,0x1000010D,0x10000149,0xC0001B9,0xC0001B9,0x7400007B,0xFA0400CE,0xFE0C005E,0x3400007B,0x2200008A,0x1A00007B,0x16000061,0x140000B5,0x500000DB,0x280000B4,0x160000CA,0x10000149, -0x3401B9,0x40039,0x40039,0x40039,0x40039,0x1A000000,0x1A000000,0x1A000000,0xC000000,0xC000000,0x8000000,0xA000029,0xA000029,0xA000029,0xC000010,0xC000010,0x6000008,0x6000029,0x6000029,0x4000019,0x4000029,0x80036,0x80036,0x80036,0x6000018,0x6000018,0x600000C,0x600002D,0x600002D,0x400001D,0x400002D,0xC0036, -0xC0036,0x4000022,0x2000031,0x2000036,0x3600000A,0x88000000,0x40039,0x1C000011,0x1000000D,0xC00000D,0xC00000A,0xA000011,0x1600001A,0x10000013,0x6000029,0x400001D,0xC0036,0x80029,0x80029,0x80029,0x80029,0x1A000000,0x1A000000,0x1A000000,0xC000000,0xC000000,0x8000000,0xC0029,0xC0029,0xC0029,0xC000010,0xC000010, -0x6000008,0x140029,0x140029,0x4000019,0x4000029,0xC0029,0xC0029,0xC0029,0xC000010,0xC000010,0x6000008,0x140029,0x140029,0x4000019,0x4000029,0x140029,0x140029,0x4000019,0x4000029,0x4000029,0x3600000A,0x88000000,0x80029,0x1C000011,0x1000000D,0xC00000D,0xC00000A,0xA000011,0x16000019,0x10000012,0x100029,0x4000019, -0x100029,0x2400CA,0x42140001,0x26100001,0x1C100001,0x3800C8,0x28000011,0x1C000001,0x7000C8,0x16000048,0x120000C8,0x3800C8,0x28000011,0x1C000001,0x7000C8,0x16000048,0x120000C8,0x7000C8,0x16000048,0x120000C8,0x120000C8,0x3800C8,0x28000011,0x1C000001,0x7000C8,0x16000048,0x120000C8,0x7000C8,0x16000048,0x120000C8,0x120000C8,0x7000C8, -0x16000048,0x120000C8,0x120000C8,0x120000C8,0x7400004A,0x2800C8,0xF418000D,0x3C000041,0x2A000049,0x1E000041,0x18000034,0x18000061,0x50000062,0x34000050,0x1C000028,0x120000C8,0x5000C8,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table65[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x200001,0x200001,0x200001,0x200001,0x2300000,0x2300000,0x2300000,0x640000,0x640000,0x10000000,0x2300000,0x2300000,0x2300000,0x640000,0x640000,0x10000000,0x640000,0x640000,0x10000000,0x10000000,0x2300000,0x2300000,0x2300000,0x640000,0x640000,0x10000000,0x640000,0x640000,0x10000000,0x10000000,0x640000, -0x640000,0x10000000,0x10000000,0x10000000,0x280000,0x240000,0x200001,0x22C0000,0x380000,0x480000,0x500000,0x780000,0x2280000,0x2300000,0x480000,0x10000000,0x480000,0x480001,0x6C0000,0xDC0000,0x24000000,0x6C0000,0xDC0000,0x24000000,0xDC0000,0x24000000,0x24000000,0x6C0000,0xDC0000,0x24000000,0xDC0000,0x24000000, -0x24000000,0xDC0000,0x24000000,0x24000000,0x24000000,0x6C0000,0xDC0000,0x24000000,0xDC0000,0x24000000,0x24000000,0xDC0000,0x24000000,0x24000000,0x24000000,0xDC0000,0x24000000,0x24000000,0x24000000,0x24000000,0x5C0000,0xE4C0000,0xE4C0000,0x7C0000,0xB40000,0x1600000,0x24000000,0x24000000,0x640000,0x880000,0x17E00000,0x24000000, -0x9C0000,0x1C03A2,0x620C00C1,0x320C00C2,0x240C00C1,0x500000C8,0x34000001,0x26000014,0x260000CA,0x20000029,0x1A0000C8,0x360002AE,0x2E0000FE,0x220000A1,0x22000153,0x1E000098,0x18000110,0x1A0002AE,0x160001A9,0x160001C9,0x100002B1,0x2803A2,0x280001B3,0x2200011A,0x220001CC,0x1E0000FC,0x18000150,0x180002F1,0x160001E9,0x14000205,0x100002D5,0x4C03A2, -0x1600028A,0x10000282,0x1000032A,0xC0003A2,0x96000085,0xFE0C0142,0xF2140189,0x48000099,0x360000A2,0x26000098,0x1E000065,0x1C0000E0,0x6400015C,0x3C0000E9,0x1E000145,0x14000205,0x3403A2,0x2402AE,0x5E100091,0x32100091,0x24100091,0x500000C8,0x34000001,0x26040011,0x260000CA,0x20000029,0x1A0000C8,0x3402AE,0x2E0000FE,0x220000A1,0x22000153,0x1E000098, -0x18000110,0x6802AE,0x160001A9,0x160001C9,0x100002B1,0x3402AE,0x2E0000FE,0x220000A1,0x22000153,0x1E000098,0x18000110,0x6802AE,0x160001A9,0x160001C9,0x100002B1,0x6802AE,0x160001A9,0x160001C9,0x100002B1,0x100002B1,0x96000085,0xFE0C011E,0xF418010D,0x48000099,0x360000A2,0x26000098,0x1E000065,0x1C0000E0,0x64000138,0x3C0000D9,0x1E000141,0x160001C9, -0x4C02AE,0xC00C1,0xC00C1,0xC00C1,0xC00C1,0x32000000,0x32000000,0x32000000,0x18000000,0x18000000,0x10000000,0x16000091,0x16000091,0x16000091,0x12000034,0x12000034,0xE00001D,0xC000091,0xC000091,0xA000055,0x8000091,0x1000C1,0x1000C1,0x1000C1,0x12000058,0x12000058,0xC000030,0xC0000A1,0xC0000A1,0xA000065,0x6000099,0x2000C1, -0x2000C1,0xA000086,0x60000AE,0x40000C2,0x6000002D,0xF8000001,0xC00C1,0x3200003A,0x1E000034,0x1A00003A,0x1600002D,0xE000048,0x3400005E,0x26000054,0xE000092,0xA000065,0x1800C1,0x100091,0x100091,0x100091,0x100091,0x32000000,0x32000000,0x32000000,0x18000000,0x18000000,0x10000000,0x180091,0x180091,0x180091,0x12000034,0x12000034, -0xE00001D,0x2C0091,0x2C0091,0xA000055,0x8000091,0x180091,0x180091,0x180091,0x12000034,0x12000034,0xE00001D,0x2C0091,0x2C0091,0xA000055,0x8000091,0x2C0091,0x2C0091,0xA000055,0x8000091,0x8000091,0x6000002D,0xF8000001,0x100091,0x3200003A,0x1E000034,0x1A00003A,0x1600002D,0xE000048,0x34000055,0x26000050,0x200091,0xA000055, -0x200091,0x3400CA,0x4A240001,0x2E200001,0x24200001,0x5000C8,0x34000001,0x240C0001,0xA000C8,0x20000029,0x1A0000C8,0x5000C8,0x34000001,0x240C0001,0xA000C8,0x20000029,0x1A0000C8,0xA000C8,0x20000029,0x1A0000C8,0x1A0000C8,0x5000C8,0x34000001,0x240C0001,0xA000C8,0x20000029,0x1A0000C8,0xA000C8,0x20000029,0x1A0000C8,0x1A0000C8,0xA000C8, -0x20000029,0x1A0000C8,0x1A0000C8,0x1A0000C8,0xA400002D,0x43800C8,0xFC28000D,0x52000022,0x36000029,0x28000029,0x22000014,0x20000041,0x72000048,0x46000034,0x2400000D,0x1A0000C8,0x7000C8,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table66[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0x140000,0x140000,0x140000,0x140000,0x140000, -0x140000,0x240000,0x240000,0x240000,0x6000000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x240000,0x240000,0x240000,0x6000000,0x240000,0x240000,0x240000,0x6000000,0x6000000,0xE0C0000,0xC0001,0xC0001,0x100000,0x100000,0x2100000,0x2100000,0x2140000,0x100000,0x100000,0x1C0000,0x240000, -0x1C0000,0x300001,0x300001,0x300001,0x300001,0x480000,0x480000,0x480000,0x940000,0x940000,0x18000000,0x480000,0x480000,0x480000,0x940000,0x940000,0x18000000,0x940000,0x940000,0x18000000,0x18000000,0x480000,0x480000,0x480000,0x940000,0x940000,0x18000000,0x940000,0x940000,0x18000000,0x18000000,0x940000, -0x940000,0x18000000,0x18000000,0x18000000,0x4380000,0x2340000,0x300001,0x440000,0x540000,0x680000,0x780000,0xB40000,0x23C0000,0x480000,0x680000,0x18000000,0x680000,0x580001,0x840000,0x10C0000,0x2C000000,0x840000,0x10C0000,0x2C000000,0x10C0000,0x2C000000,0x2C000000,0x840000,0x10C0000,0x2C000000,0x10C0000,0x2C000000, -0x2C000000,0x10C0000,0x2C000000,0x2C000000,0x2C000000,0x840000,0x10C0000,0x2C000000,0x10C0000,0x2C000000,0x2C000000,0x10C0000,0x2C000000,0x2C000000,0x2C000000,0x10C0000,0x2C000000,0x2C000000,0x2C000000,0x2C000000,0x700000,0x600000,0x600000,0x2940000,0xDC0000,0x1B00000,0x2C000000,0x2C000000,0x2780000,0xA80000,0x1FF00000,0x2C000000, -0xBC0000,0x2804C3,0x7414014E,0x3C14014F,0x2C14014E,0x600800E1,0x4008001A,0x2C0C0049,0x300800E3,0x28040036,0x220800E1,0x4E0002D3,0x3A0000CD,0x2C0000A4,0x2E00011A,0x2800003D,0x220000D8,0x260002D3,0x2000017A,0x1C0001A0,0x180002D4,0x3804C1,0x34000216,0x2800016D,0x280001F7,0x280000E6,0x22000151,0x22000354,0x200001F3,0x1C000204,0x18000314,0x7004C1, -0x1C00031D,0x1A000305,0x160003C9,0x120004C1,0xC2000036,0xF41801F1,0xF8200266,0x6600003E,0x40000049,0x34000043,0x2A000015,0x24000084,0x82000121,0x5000009F,0x26000118,0x1C000204,0x5004C1,0x3402D3,0x662000A2,0x3A2000A2,0x2C2000A2,0x581000C9,0x3C100002,0x2E100015,0x300C00C9,0x2A080026,0x220C00C9,0x4C02D3,0x3A0000CD,0x2C0400A2,0x2E00011A,0x2800003D, -0x220000D8,0x9802D3,0x2000017A,0x1C0001A0,0x180002D4,0x4C02D3,0x3A0000CD,0x2C0400A2,0x2E00011A,0x2800003D,0x220000D8,0x9802D3,0x2000017A,0x1C0001A0,0x180002D4,0x9802D3,0x2000017A,0x1C0001A0,0x180002D4,0x180002D4,0xC2000036,0xF8200139,0xFC280126,0x6600003E,0x40000049,0x34000043,0x2A000015,0x24000084,0x900000DD,0x50000086,0x26000114,0x1C0001A0, -0x6C02D3,0x14014E,0x14014E,0x14014E,0x14014E,0x42080019,0x42080019,0x42080019,0x22080019,0x22080019,0x18080019,0x300000A2,0x300000A2,0x300000A2,0x22000011,0x22000011,0x18000001,0x160000A4,0x160000A4,0x14000041,0xE0000A4,0x20014D,0x20014D,0x20014D,0x1E00007D,0x1E00007D,0x14000041,0x120000D8,0x120000D8,0x1200006C,0xE0000BD,0x3C014D, -0x3C014D,0x100000C9,0xA000105,0xA00014D,0xA000000D,0xFE0C002E,0x14014E,0x50000019,0x32000019,0x2A000014,0x2600000D,0x20000025,0x56000063,0x3C000042,0x1E0000A6,0x1200006C,0x2C014D,0x2000A2,0x2000A2,0x2000A2,0x2000A2,0x3A100001,0x3A100001,0x3A100001,0x20100001,0x20100001,0x180C0001,0x3000A2,0x3000A2,0x3000A2,0x22000011,0x22000011, -0x18000001,0x5C00A2,0x5C00A2,0x14000041,0xE0000A4,0x3000A2,0x3000A2,0x3000A2,0x22000011,0x22000011,0x18000001,0x5C00A2,0x5C00A2,0x14000041,0xE0000A4,0x5C00A2,0x5C00A2,0x14000041,0xE0000A4,0xE0000A4,0xA000000D,0xF0100005,0x2000A2,0x50000019,0x32000019,0x2A000014,0x2600000D,0x20000025,0x64000041,0x3C000032,0x4000A2,0x14000041, -0x4000A2,0x4400CA,0x52340001,0x36300001,0x2C300001,0x6800C8,0x3C100001,0x2C1C0001,0xD000C8,0x2A000014,0x220000C8,0x6800C8,0x3C100001,0x2C1C0001,0xD000C8,0x2A000014,0x220000C8,0xD000C8,0x2A000014,0x220000C8,0x220000C8,0x6800C8,0x3C100001,0x2C1C0001,0xD000C8,0x2A000014,0x220000C8,0xD000C8,0x2A000014,0x220000C8,0x220000C8,0xD000C8, -0x2A000014,0x220000C8,0x220000C8,0x220000C8,0xC4040019,0xC4800C8,0xF4380012,0x6600000D,0x42040019,0x36000011,0x2C000004,0x28000029,0x94000032,0x58000019,0x2E000001,0x220000C8,0x9400C8,0x80019,0x80019,0x80019,0x80019,0x80019,0x80019,0x80019,0x80019,0x80019,0x80019,0x14000000,0x14000000,0x14000000,0x14000000,0x14000000, -0x14000000,0xA000000,0xA000000,0xA000000,0x6000000,0x80019,0x80019,0x80019,0x80019,0x80019,0x80019,0x6000008,0x6000008,0x6000008,0x6000004,0xC0019,0xC0019,0xC0019,0x400000D,0x2000019,0x68000000,0x80019,0x80019,0x2E000000,0x20000000,0x18000000,0x18000000,0x10000000,0x20000008,0x20000004,0xC000001,0x6000008, -0xC0019,}; -static const uint32_t g_etc1_to_bc7_m6_table67[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000, -0x2C0000,0x580000,0x580000,0x580000,0xE000000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x580000,0x580000,0x580000,0xE000000,0x580000,0x580000,0x580000,0xE000000,0xE000000,0x200000,0x1C0001,0x1C0001,0x6200000,0x240000,0x280000,0x280000,0x300000,0x6200000,0x240000,0x3C0000,0x580000, -0x3C0000,0x400001,0x400001,0x400001,0x400001,0x600000,0x600000,0x600000,0xC40000,0xC40000,0x20000000,0x600000,0x600000,0x600000,0xC40000,0xC40000,0x20000000,0xC40000,0xC40000,0x20000000,0x20000000,0x600000,0x600000,0x600000,0xC40000,0xC40000,0x20000000,0xC40000,0xC40000,0x20000000,0x20000000,0xC40000, -0xC40000,0x20000000,0x20000000,0x20000000,0x4C0000,0xA440000,0x400001,0x580000,0x26C0000,0x8C0000,0xA00000,0xF00000,0x4500000,0x600000,0x8C0000,0x20000000,0x8C0000,0x680001,0x9C0000,0x13C0000,0x34000000,0x9C0000,0x13C0000,0x34000000,0x13C0000,0x34000000,0x34000000,0x9C0000,0x13C0000,0x34000000,0x13C0000,0x34000000, -0x34000000,0x13C0000,0x34000000,0x34000000,0x34000000,0x9C0000,0x13C0000,0x34000000,0x13C0000,0x34000000,0x34000000,0x13C0000,0x34000000,0x34000000,0x34000000,0x13C0000,0x34000000,0x34000000,0x34000000,0x34000000,0x840000,0x700000,0x700000,0xB00000,0x1000000,0x3F00000,0x34000000,0x34000000,0x900000,0xC40000,0x29C40000,0x34000000, -0xE00000,0x340627,0x80200222,0x46200222,0x34200222,0x72100139,0x4C100076,0x361400C1,0x3A10013B,0x320C0082,0x2A100139,0x660002D3,0x4C0000A5,0x360800C8,0x3C0000E9,0x32000009,0x2A0000C9,0x320002D3,0x2C00012A,0x26000161,0x200002D4,0x480625,0x400002BE,0x34000225,0x3400024F,0x2E000106,0x2800017D,0x2E0003BC,0x280001FB,0x24000204,0x1E00034C,0x940625, -0x26000412,0x200003A5,0x1C00047D,0x18000625,0xF8000009,0xFA2402DD,0xFE2C038E,0x7E00000B,0x5600000E,0x40000009,0x36000004,0x30000032,0xA40000F6,0x66000062,0x340000D4,0x24000204,0x680625,0x4402D3,0x6E3000A2,0x423000A2,0x343000A2,0x602000C9,0x44200002,0x36200015,0x381C00C9,0x32180026,0x2A1C00C9,0x6402D3,0x4C0000A5,0x341400A2,0x3C0000E9,0x32000009, -0x2A0400C8,0xCC02D3,0x2C00012A,0x26000161,0x200002D4,0x6402D3,0x4C0000A5,0x341400A2,0x3C0000E9,0x32000009,0x2A0400C8,0xCC02D3,0x2C00012A,0x26000161,0x200002D4,0xCC02D3,0x2C00012A,0x26000161,0x200002D4,0x200002D4,0xF8000009,0xFE2C0141,0xF438013B,0x7E00000B,0x5600000E,0x40000009,0x34040002,0x30000032,0xAC000086,0x68000035,0x340000CB,0x26000161, -0x9002D3,0x200222,0x200222,0x200222,0x200222,0x52100071,0x52100071,0x52100071,0x2C100071,0x2C100071,0x20100071,0x480000A2,0x480000A2,0x480000A2,0x2E000001,0x2E000001,0x2204000C,0x220000A2,0x220000A2,0x1C000020,0x160000A4,0x300222,0x300222,0x300222,0x280000C6,0x280000C6,0x1E000081,0x1E000118,0x1E000118,0x1A00007E,0x160000E4,0x5C0222, -0x5C0222,0x16000145,0x14000178,0xE000225,0xE8000000,0xF21400BD,0x200222,0x7A000004,0x50000001,0x3C000002,0x36000000,0x2A000005,0x8200006D,0x56000032,0x2C0000AB,0x1A00007E,0x400222,0x3000A2,0x3000A2,0x3000A2,0x3000A2,0x42200001,0x42200001,0x42200001,0x28200001,0x28200001,0x201C0001,0x24400A2,0x24400A2,0x24400A2,0x2E000001,0x2E000001, -0x200C0000,0x8C00A2,0x8C00A2,0x1C000020,0x160000A4,0x24400A2,0x24400A2,0x24400A2,0x2E000001,0x2E000001,0x200C0000,0x8C00A2,0x8C00A2,0x1C000020,0x160000A4,0x8C00A2,0x8C00A2,0x1C000020,0x160000A4,0x160000A4,0xE8000000,0xF8200005,0x3000A2,0x7A000004,0x50000001,0x3C000002,0x36000000,0x2A000005,0x96000028,0x5A000012,0x6400A2,0x1C000020, -0x6400A2,0x5400CA,0x5A440001,0x3E400001,0x34400001,0x8000C8,0x44200001,0x342C0001,0x10000C8,0x32000005,0x2A0000C8,0x8000C8,0x44200001,0x342C0001,0x10000C8,0x32000005,0x2A0000C8,0x10000C8,0x32000005,0x2A0000C8,0x2A0000C8,0x8000C8,0x44200001,0x342C0001,0x10000C8,0x32000005,0x2A0000C8,0x10000C8,0x32000005,0x2A0000C8,0x2A0000C8,0x10000C8, -0x32000005,0x2A0000C8,0x2A0000C8,0x2A0000C8,0xF8000008,0x5C00C8,0xFC480012,0x7E000002,0x56000005,0x3E040005,0x34080000,0x32000014,0xB6000020,0x6E00000A,0x36100001,0x2A0000C8,0xB400C8,0x100071,0x100071,0x100071,0x100071,0x100071,0x100071,0x100071,0x100071,0x100071,0x100071,0x2C000000,0x2C000000,0x2C000000,0x2C000000,0x2C000000, -0x2C000000,0x16000000,0x16000000,0x16000000,0xE000000,0x140071,0x140071,0x140071,0x140071,0x140071,0x140071,0x12000028,0x12000028,0x12000028,0xC000014,0x240071,0x240071,0x240071,0xA000041,0x6000071,0xE8000000,0x100071,0x100071,0x68000000,0x48000000,0x36000000,0x36000000,0x24000000,0x52000022,0x42000011,0x1C000004,0x12000028, -0x1C0071,}; -static const uint32_t g_etc1_to_bc7_m6_table68[] = { -0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0x140000, -0x140000,0x140000,0x140000,0x2000001,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0xC0000,0x100000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000, -0x2440000,0x8C0000,0x8C0000,0x8C0000,0x16000001,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x8C0000,0x8C0000,0x8C0000,0x16000001,0x8C0000,0x8C0000,0x8C0000,0x16000001,0x16000001,0x340000,0x300000,0x300000,0x380000,0x3C0000,0x400000,0x400000,0x500000,0x380000,0x3C0000,0x640000,0x8C0000, -0x640000,0x540000,0x540000,0x540000,0x540000,0x7C0000,0x7C0000,0x7C0000,0xFC0000,0xFC0000,0x28000001,0x7C0000,0x7C0000,0x7C0000,0xFC0000,0xFC0000,0x28000001,0xFC0000,0xFC0000,0x28000001,0x28000001,0x7C0000,0x7C0000,0x7C0000,0xFC0000,0xFC0000,0x28000001,0xFC0000,0xFC0000,0x28000001,0x28000001,0xFC0000, -0xFC0000,0x28000001,0x28000001,0x28000001,0x2600000,0x4580000,0x540000,0x2700000,0x8C0000,0xB00000,0xCC0000,0x1340000,0x680000,0x7C0000,0xB00000,0x28000001,0xB00000,0x7C0000,0xB80000,0x1740000,0x3C000001,0xB80000,0x1740000,0x3C000001,0x1740000,0x3C000001,0x3C000001,0xB80000,0x1740000,0x3C000001,0x1740000,0x3C000001, -0x3C000001,0x1740000,0x3C000001,0x3C000001,0x3C000001,0xB80000,0x1740000,0x3C000001,0x1740000,0x3C000001,0x3C000001,0x1740000,0x3C000001,0x3C000001,0x3C000001,0x1740000,0x3C000001,0x3C000001,0x3C000001,0x3C000001,0x9C0000,0x840000,0x840000,0xD00000,0x12C0000,0xDF80000,0x3C000001,0x3C000001,0xA80000,0xE80000,0x31F40000,0x3C000001, -0x1080000,0x400738,0x8A3002D4,0x503002D4,0x3C3002D5,0x801C0190,0x581C00D5,0x3E24013A,0x461C0190,0x3A1C00D5,0x321C0192,0x7A0802D3,0x5A0800A2,0x421400FF,0x4A0800D1,0x3C080002,0x320C00D7,0x3C0802D4,0x380000FD,0x3000013B,0x2A0802D3,0x600734,0x5200031F,0x3C0802D4,0x46000252,0x3A000107,0x320001A4,0x3A0003C9,0x340001AE,0x2E0001B3,0x2800032C,0xC40734, -0x2C000479,0x2C0003F2,0x260004DF,0x20000734,0xFC100021,0xF23403D0,0xF63C049C,0x92080000,0x64080001,0x4A080002,0x3E0C0012,0x3A080011,0xD000008E,0x7E000018,0x3E0000AB,0x2E0001B3,0x8C0734,0x5402D4,0x764400A4,0x4A4400A4,0x3C4000A5,0x6C3000C8,0x4E300001,0x3E340015,0x403000C8,0x3A2C0026,0x323000CA,0x8002D3,0x580C00A2,0x3C2800A3,0x4C0000C9,0x3C080002, -0x341400CA,0x10002D3,0x380000ED,0x3000012B,0x2A0002D3,0x8002D3,0x580C00A2,0x3C2800A3,0x4C0000C9,0x3C080002,0x341400CA,0x10002D3,0x380000ED,0x3000012B,0x2A0002D3,0x10002D3,0x380000ED,0x3000012B,0x2A0002D3,0x2A0002D3,0xFE140009,0xFA44014C,0xFE4C0138,0x92080000,0x64080001,0x4A080002,0x3E140002,0x3A00000A,0xD000003D,0x7E000008,0x3E0000AB,0x3000012B, -0xB402D3,0x3002D4,0x3002D4,0x3002D4,0x3002D4,0x621C00C8,0x621C00C8,0x621C00C8,0x361C00C8,0x361C00C8,0x281C00C9,0x5A0800A2,0x5A0800A2,0x5A0800A2,0x3C080001,0x3C080001,0x2C0C0026,0x2E0800A2,0x2E0800A2,0x28040015,0x200800A2,0x4402D3,0x4402D3,0x4402D3,0x340000FE,0x340000FE,0x2A0000C9,0x2E00011D,0x2E00011D,0x2400005A,0x1E0000CE,0x8802D3, -0x8802D3,0x20000199,0x1C0001A3,0x160002D3,0xFE0C0009,0xFA240139,0x3002D4,0x92080000,0x60080001,0x4A080001,0x440C0006,0x36080002,0xB400004B,0x76000012,0x3E0000A2,0x2400005A,0x6002D3,0x4000A4,0x4000A4,0x4000A4,0x4000A4,0x4E300000,0x4E300000,0x4E300000,0x32300000,0x32300000,0x28300001,0x6000A2,0x6000A2,0x6000A2,0x38100001,0x38100001, -0x28200001,0xC400A2,0xC400A2,0x2600000D,0x200000A2,0x6000A2,0x6000A2,0x6000A2,0x38100001,0x38100001,0x28200001,0xC400A2,0xC400A2,0x2600000D,0x200000A2,0xC400A2,0xC400A2,0x2600000D,0x200000A2,0x200000A2,0xEA140000,0xF2340008,0x4000A4,0x92080000,0x5E0C0000,0x480C0000,0x3E140001,0x38040000,0xC200000D,0x7C000002,0x8C00A2,0x2600000D, -0x8C00A2,0x6800C8,0x66540000,0x46540001,0x3C540001,0x29800C8,0x4E300001,0x3C400001,0x13800C8,0x3C040001,0x320000CA,0x29800C8,0x4E300001,0x3C400001,0x13800C8,0x3C040001,0x320000CA,0x13800C8,0x3C040001,0x320000CA,0x320000CA,0x29800C8,0x4E300001,0x3C400001,0x13800C8,0x3C040001,0x320000CA,0x13800C8,0x3C040001,0x320000CA,0x320000CA,0x13800C8, -0x3C040001,0x320000CA,0x320000CA,0x320000CA,0xFE140008,0xE6C00C8,0xF65C0019,0x8E0C0000,0x64040001,0x4C040000,0x3C1C0001,0x3A00000A,0xD6040012,0x7E040002,0x40200000,0x320000CA,0xDC00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x40080000,0x40080000,0x40080000,0x40080000,0x40080000, -0x40080000,0x20080001,0x20080001,0x20080001,0x16080001,0x2800C8,0x2800C8,0x2800C8,0x2800C8,0x2800C8,0x2800C8,0x1E00002D,0x1E00002D,0x1E00002D,0x16000011,0x5000C8,0x5000C8,0x5000C8,0x10000062,0xC0000CA,0xFE0C0008,0x1C00C8,0x1C00C8,0x8E080000,0x64080000,0x4E080000,0x4E080000,0x34080000,0x92000022,0x74000009,0x28040001,0x1E00002D, -0x3800C8,}; -static const uint32_t g_etc1_to_bc7_m6_table69[] = { -0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x240000,0x240000,0x240000,0x240000,0x240000,0x240000,0x240000,0x240000,0x240000,0x240000,0x440000, -0x440000,0x440000,0x440000,0xA000001,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x4180000,0x4180000,0x4180000,0x240000,0x300000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000, -0x25C0000,0xBC0000,0xBC0000,0xBC0000,0x1E000001,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0xBC0000,0xBC0000,0xBC0000,0x1E000001,0xBC0000,0xBC0000,0xBC0000,0x1E000001,0x1E000001,0x440000,0x400000,0x400000,0x4480000,0x500000,0x2540000,0x2540000,0x6C0000,0x4480000,0x500000,0x880000,0xBC0000, -0x880000,0x640000,0x640000,0x640000,0x640000,0x940000,0x940000,0x940000,0x12C0000,0x12C0000,0x30000001,0x940000,0x940000,0x940000,0x12C0000,0x12C0000,0x30000001,0x12C0000,0x12C0000,0x30000001,0x30000001,0x940000,0x940000,0x940000,0x12C0000,0x12C0000,0x30000001,0x12C0000,0x12C0000,0x30000001,0x30000001,0x12C0000, -0x12C0000,0x30000001,0x30000001,0x30000001,0x740000,0xC680000,0x640000,0x880000,0xA80000,0xD40000,0xF40000,0x1700000,0x7C0000,0x940000,0xD40000,0x30000001,0xD40000,0x8C0000,0xD00000,0x1A40000,0x44000001,0xD00000,0x1A40000,0x44000001,0x1A40000,0x44000001,0x44000001,0xD00000,0x1A40000,0x44000001,0x1A40000,0x44000001, -0x44000001,0x1A40000,0x44000001,0x44000001,0x44000001,0xD00000,0x1A40000,0x44000001,0x1A40000,0x44000001,0x44000001,0x1A40000,0x44000001,0x44000001,0x44000001,0x1A40000,0x44000001,0x44000001,0x44000001,0x44000001,0xB00000,0x2940000,0x2940000,0xEC0000,0x1540000,0x17F80000,0x44000001,0x44000001,0x2BC0000,0x1040000,0x3BC80000,0x44000001, -0x1280000,0x500738,0x924002D4,0x584002D4,0x444002D5,0x882C0190,0x602C00D5,0x4634013A,0x4E2C0190,0x422C00D5,0x3A2C0192,0x821802D3,0x621800A2,0x4A2400FF,0x521800D1,0x44180002,0x3A1C00D7,0x441802D4,0x401000FD,0x3810013B,0x321802D3,0x780734,0x620002DD,0x441802D4,0x520001E2,0x460000CF,0x3A080192,0x46000361,0x4000011E,0x3800012F,0x300002EB,0xF40734, -0x38000401,0x32000352,0x2C000477,0x28000734,0xFE20002A,0xFA4403D0,0xFE4C049C,0x9A180000,0x6C180001,0x52180002,0x461C0012,0x42180011,0xF6000022,0x8E080002,0x480C00A9,0x3800012F,0xAC0734,0x6402D4,0x7E5400A4,0x525400A4,0x445000A5,0x744000C8,0x56400001,0x46440015,0x484000C8,0x423C0026,0x3A4000CA,0x9802D3,0x601C00A2,0x443800A3,0x541000C9,0x44180002, -0x3C2400CA,0x13002D3,0x420000CB,0x380000FE,0x320002D3,0x9802D3,0x601C00A2,0x443800A3,0x541000C9,0x44180002,0x3C2400CA,0x13002D3,0x420000CB,0x380000FE,0x320002D3,0x13002D3,0x420000CB,0x380000FE,0x320002D3,0x320002D3,0xF8280011,0xF254015E,0xF65C014D,0x9A180000,0x6C180001,0x52180002,0x46240002,0x440C0005,0xFA000013,0x8E080001,0x4A0400A2,0x380000FE, -0xD802D3,0x4002D4,0x4002D4,0x4002D4,0x4002D4,0x6A2C00C8,0x6A2C00C8,0x6A2C00C8,0x3E2C00C8,0x3E2C00C8,0x302C00C9,0x621800A2,0x621800A2,0x621800A2,0x44180001,0x44180001,0x341C0026,0x361800A2,0x361800A2,0x30140015,0x281800A2,0x5C02D3,0x5C02D3,0x5C02D3,0x460000CE,0x460000CE,0x321000C9,0x3A0000D5,0x3A0000D5,0x2E000012,0x280000A3,0xB802D3, -0xB802D3,0x2A00015B,0x2400016B,0x1E0002D3,0xFC1C0011,0xF234014C,0x4002D4,0x9A180000,0x68180001,0x52180001,0x4C1C0006,0x3E180002,0xE400000D,0x8E080001,0x461000A2,0x2E000012,0x8002D3,0x5000A4,0x5000A4,0x5000A4,0x5000A4,0x56400000,0x56400000,0x56400000,0x3A400000,0x3A400000,0x30400001,0x7800A2,0x7800A2,0x7800A2,0x40200001,0x40200001, -0x30300001,0xF400A2,0xF400A2,0x30000002,0x280000A2,0x7800A2,0x7800A2,0x7800A2,0x40200001,0x40200001,0x30300001,0xF400A2,0xF400A2,0x30000002,0x280000A2,0xF400A2,0xF400A2,0x30000002,0x280000A2,0x280000A2,0xF2240000,0xFA440008,0x5000A4,0x9A180000,0x661C0000,0x501C0000,0x46240001,0x40140000,0xF4000004,0x8E080000,0xAC00A2,0x30000002, -0xAC00A2,0x7800C8,0x6E640000,0x4E640001,0x44640001,0x2B000C8,0x56400001,0x44500001,0x16800C8,0x44140001,0x3A0000CA,0x2B000C8,0x56400001,0x44500001,0x16800C8,0x44140001,0x3A0000CA,0x16800C8,0x44140001,0x3A0000CA,0x3A0000CA,0x2B000C8,0x56400001,0x44500001,0x16800C8,0x44140001,0x3A0000CA,0x16800C8,0x44140001,0x3A0000CA,0x3A0000CA,0x16800C8, -0x44140001,0x3A0000CA,0x3A0000CA,0x3A0000CA,0xF828000D,0x8000C8,0xFE6C0019,0x961C0000,0x6C140001,0x54140000,0x442C0001,0x44000002,0xF8040008,0x90080000,0x48300000,0x3A0000CA,0xFC00C8,0x2C00C8,0x2C00C8,0x2C00C8,0x2C00C8,0x2C00C8,0x2C00C8,0x2C00C8,0x2C00C8,0x2C00C8,0x2C00C8,0x48180000,0x48180000,0x48180000,0x48180000,0x48180000, -0x48180000,0x28180001,0x28180001,0x28180001,0x1E180001,0x4000C8,0x4000C8,0x4000C8,0x4000C8,0x4000C8,0x4000C8,0x2E000009,0x2E000009,0x2E000009,0x1E040001,0x8000C8,0x8000C8,0x8000C8,0x1C00003A,0x140000CA,0xF61C000D,0x2C00C8,0x2C00C8,0x96180000,0x6C180000,0x56180000,0x56180000,0x3C180000,0xE0000004,0x86080001,0x30140001,0x2E000009, -0x5C00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table70[] = { -0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x740000, -0x740000,0x740000,0x740000,0x12000001,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0xC280000,0xC280000,0xC280000,0x3C0000,0x540000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000, -0x2740000,0xF00000,0xF00000,0xF00000,0x26000001,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0xF00000,0xF00000,0xF00000,0x26000001,0xF00000,0xF00000,0xF00000,0x26000001,0x26000001,0x2540000,0x500000,0x500000,0x5C0000,0x640000,0x6C0000,0x6C0000,0x840000,0x5C0000,0x640000,0xA80000,0xF00000, -0xA80000,0x740000,0x740000,0x740000,0x740000,0xAC0000,0xAC0000,0xAC0000,0x15C0000,0x15C0000,0x38000001,0xAC0000,0xAC0000,0xAC0000,0x15C0000,0x15C0000,0x38000001,0x15C0000,0x15C0000,0x38000001,0x38000001,0xAC0000,0xAC0000,0xAC0000,0x15C0000,0x15C0000,0x38000001,0x15C0000,0x15C0000,0x38000001,0x38000001,0x15C0000, -0x15C0000,0x38000001,0x38000001,0x38000001,0x6840000,0x7C0000,0x740000,0x9C0000,0x2C00000,0xF40000,0x11C0000,0x1AC0000,0x900000,0xAC0000,0xF40000,0x38000001,0xF40000,0x9C0000,0xE80000,0x1D80000,0x4C000001,0xE80000,0x1D80000,0x4C000001,0x1D80000,0x4C000001,0x4C000001,0xE80000,0x1D80000,0x4C000001,0x1D80000,0x4C000001, -0x4C000001,0x1D80000,0x4C000001,0x4C000001,0x4C000001,0xE80000,0x1D80000,0x4C000001,0x1D80000,0x4C000001,0x4C000001,0x1D80000,0x4C000001,0x4C000001,0x4C000001,0x1D80000,0x4C000001,0x4C000001,0x4C000001,0x4C000001,0xC40000,0xAA40000,0xAA40000,0x1040000,0x17C0000,0x21FC0000,0x4C000001,0x4C000001,0xD40000,0x1240000,0x43D80000,0x4C000001, -0x14C0000,0x600738,0x9A5002D4,0x605002D4,0x4C5002D5,0x903C0190,0x683C00D5,0x4E44013A,0x563C0190,0x4A3C00D5,0x423C0192,0x8A2802D3,0x6A2800A2,0x523400FF,0x5A2800D1,0x4C280002,0x422C00D7,0x4C2802D4,0x482000FD,0x4020013B,0x3A2802D3,0x900734,0x720402D3,0x4C2802D4,0x6200019A,0x4E1000CF,0x42180192,0x52000319,0x4A0000C6,0x420000E7,0x3A0002D4,0x1240734, -0x440003A9,0x3E0002E2,0x3600041C,0x30000734,0xFE34003D,0xF2540402,0xF65C04C1,0xA2280000,0x74280001,0x5A280002,0x4E2C0012,0x4A280011,0xFE100022,0x96180002,0x501C00A9,0x420000E7,0xD00734,0x7402D4,0x866400A4,0x5A6400A4,0x4C6000A5,0x7C5000C8,0x5E500001,0x4E540015,0x505000C8,0x4A4C0026,0x425000CA,0xB002D3,0x682C00A2,0x4C4800A3,0x5C2000C9,0x4C280002, -0x443400CA,0x16402D3,0x4A0000AD,0x420000E3,0x3A0002D3,0xB002D3,0x682C00A2,0x4C4800A3,0x5C2000C9,0x4C280002,0x443400CA,0x16402D3,0x4A0000AD,0x420000E3,0x3A0002D3,0x16402D3,0x4A0000AD,0x420000E3,0x3A0002D3,0x3A0002D3,0xFC380018,0xFA64015E,0xFE6C014D,0xA2280000,0x74280001,0x5A280002,0x4E340002,0x4C1C0005,0xFE100019,0x96180001,0x521400A2,0x420000E3, -0xF802D3,0x5002D4,0x5002D4,0x5002D4,0x5002D4,0x723C00C8,0x723C00C8,0x723C00C8,0x463C00C8,0x463C00C8,0x383C00C9,0x6A2800A2,0x6A2800A2,0x6A2800A2,0x4C280001,0x4C280001,0x3C2C0026,0x3E2800A2,0x3E2800A2,0x38240015,0x302800A2,0x7402D3,0x7402D3,0x7402D3,0x520800C9,0x520800C9,0x3A2000C9,0x460000AD,0x460000AD,0x38040002,0x300C00A2,0xE802D3, -0xE802D3,0x32000119,0x2C000126,0x260002D3,0xFE2C0016,0xFA44014C,0x5002D4,0xA2280000,0x70280001,0x5A280001,0x542C0006,0x46280002,0xFA0C0005,0x96180001,0x4E2000A2,0x38040002,0xA402D3,0x6000A4,0x6000A4,0x6000A4,0x6000A4,0x5E500000,0x5E500000,0x5E500000,0x42500000,0x42500000,0x38500001,0x9000A2,0x9000A2,0x9000A2,0x48300001,0x48300001, -0x38400001,0x12400A2,0x12400A2,0x38080001,0x300000A2,0x9000A2,0x9000A2,0x9000A2,0x48300001,0x48300001,0x38400001,0x12400A2,0x12400A2,0x38080001,0x300000A2,0x12400A2,0x12400A2,0x38080001,0x300000A2,0x300000A2,0xFA340000,0xF254000D,0x6000A4,0xA2280000,0x6E2C0000,0x582C0000,0x4E340001,0x48240000,0xFE0C0002,0x96180000,0xD000A2,0x38080001, -0xD000A2,0x8800C8,0x76740000,0x56740001,0x4C740001,0xC800C8,0x5E500001,0x4C600001,0x19800C8,0x4C240001,0x420000CA,0xC800C8,0x5E500001,0x4C600001,0x19800C8,0x4C240001,0x420000CA,0x19800C8,0x4C240001,0x420000CA,0x420000CA,0xC800C8,0x5E500001,0x4C600001,0x19800C8,0x4C240001,0x420000CA,0x19800C8,0x4C240001,0x420000CA,0x420000CA,0x19800C8, -0x4C240001,0x420000CA,0x420000CA,0x420000CA,0xFA3C0012,0x9000C8,0xF67C0020,0x9E2C0000,0x74240001,0x5C240000,0x4C3C0001,0x4C080001,0xFC14000A,0x98180000,0x50400000,0x420000CA,0x12000C8,0x3C00C8,0x3C00C8,0x3C00C8,0x3C00C8,0x3C00C8,0x3C00C8,0x3C00C8,0x3C00C8,0x3C00C8,0x3C00C8,0x50280000,0x50280000,0x50280000,0x50280000,0x50280000, -0x50280000,0x30280001,0x30280001,0x30280001,0x26280001,0x5800C8,0x5800C8,0x5800C8,0x5800C8,0x5800C8,0x5800C8,0x38040001,0x38040001,0x38040001,0x26140001,0xB000C8,0xB000C8,0xB000C8,0x24000022,0x1C0000CA,0xFE2C000D,0x3C00C8,0x3C00C8,0x9E280000,0x74280000,0x5E280000,0x5E280000,0x44280000,0xF40C0001,0x8E180001,0x38240001,0x38040001, -0x7C00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table71[] = { -0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000,0xA40000, -0xA40000,0xA40000,0xA40000,0x1A000001,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x3C0000,0x3C0000,0x3C0000,0x2500000,0x740000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000, -0x28C0000,0x1200000,0x1200000,0x1200000,0x2E000001,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x1200000,0x1200000,0x1200000,0x2E000001,0x1200000,0x1200000,0x1200000,0x2E000001,0x2E000001,0xA640000,0x600000,0x600000,0x700000,0x780000,0x2800000,0x2800000,0xA00000,0x700000,0x780000,0xCC0000,0x1200000, -0xCC0000,0x840000,0x840000,0x840000,0x840000,0xC40000,0xC40000,0xC40000,0x18C0000,0x18C0000,0x40000001,0xC40000,0xC40000,0xC40000,0x18C0000,0x18C0000,0x40000001,0x18C0000,0x18C0000,0x40000001,0x40000001,0xC40000,0xC40000,0xC40000,0x18C0000,0x18C0000,0x40000001,0x18C0000,0x18C0000,0x40000001,0x40000001,0x18C0000, -0x18C0000,0x40000001,0x40000001,0x40000001,0x2980000,0x8C0000,0x840000,0xB40000,0xDC0000,0x1180000,0x1400000,0x1E80000,0xA40000,0xC40000,0x1180000,0x40000001,0x1180000,0xAC0000,0x1000000,0x5F80000,0x54000001,0x1000000,0x5F80000,0x54000001,0x5F80000,0x54000001,0x54000001,0x1000000,0x5F80000,0x54000001,0x5F80000,0x54000001, -0x54000001,0x5F80000,0x54000001,0x54000001,0x54000001,0x1000000,0x5F80000,0x54000001,0x5F80000,0x54000001,0x54000001,0x5F80000,0x54000001,0x54000001,0x54000001,0x5F80000,0x54000001,0x54000001,0x54000001,0x54000001,0xD80000,0xB80000,0xB80000,0x1200000,0x1A40000,0x2BFC0000,0x54000001,0x54000001,0x2E80000,0x3400000,0x4BE80000,0x54000001, -0x16C0000,0x700738,0xA26002D4,0x686002D4,0x546002D5,0x984C0190,0x704C00D5,0x5654013A,0x5E4C0190,0x524C00D5,0x4A4C0192,0x923802D3,0x723800A2,0x5A4400FF,0x623800D1,0x54380002,0x4A3C00D7,0x543802D4,0x503000FD,0x4830013B,0x423802D3,0xA80734,0x7A1402D3,0x543802D4,0x6C080192,0x562000CF,0x4A280192,0x620002E3,0x540000A3,0x4A0000D7,0x421002D4,0x1580734, -0x4A000361,0x44000272,0x3E0003BF,0x38000734,0xFE440056,0xFA640402,0xFE6C04C1,0xAA380000,0x7C380001,0x62380002,0x563C0012,0x52380011,0xFE24003B,0x9E280002,0x582C00A9,0x4A0000D7,0xF00734,0x8402D4,0x8E7400A4,0x627400A4,0x547000A5,0x846000C8,0x66600001,0x56640015,0x586000C8,0x525C0026,0x4A6000CA,0x2C402D3,0x703C00A2,0x545800A3,0x643000C9,0x54380002, -0x4C4400CA,0x19402D3,0x540000A3,0x4A0000CE,0x420002D3,0x2C402D3,0x703C00A2,0x545800A3,0x643000C9,0x54380002,0x4C4400CA,0x19402D3,0x540000A3,0x4A0000CE,0x420002D3,0x19402D3,0x540000A3,0x4A0000CE,0x420002D3,0x420002D3,0xFC4C001D,0xF2740174,0xF67C0164,0xAA380000,0x7C380001,0x62380002,0x56440002,0x542C0005,0xFE240022,0x9E280001,0x5A2400A2,0x4A0000CE, -0x11C02D3,0x6002D4,0x6002D4,0x6002D4,0x6002D4,0x7A4C00C8,0x7A4C00C8,0x7A4C00C8,0x4E4C00C8,0x4E4C00C8,0x404C00C9,0x723800A2,0x723800A2,0x723800A2,0x54380001,0x54380001,0x443C0026,0x463800A2,0x463800A2,0x40340015,0x383800A2,0x8C02D3,0x8C02D3,0x8C02D3,0x5A1800C9,0x5A1800C9,0x423000C9,0x540000A2,0x540000A2,0x40140002,0x381C00A2,0x11802D3, -0x11802D3,0x3E0000F1,0x360000FB,0x2E0002D3,0xF840001D,0xF2540161,0x6002D4,0xAA380000,0x78380001,0x62380001,0x5C3C0006,0x4E380002,0xFC1C000A,0x9E280001,0x563000A2,0x40140002,0xC802D3,0x7000A4,0x7000A4,0x7000A4,0x7000A4,0x66600000,0x66600000,0x66600000,0x4A600000,0x4A600000,0x40600001,0xA800A2,0xA800A2,0xA800A2,0x50400001,0x50400001, -0x40500001,0x15800A2,0x15800A2,0x40180001,0x380000A2,0xA800A2,0xA800A2,0xA800A2,0x50400001,0x50400001,0x40500001,0x15800A2,0x15800A2,0x40180001,0x380000A2,0x15800A2,0x15800A2,0x40180001,0x380000A2,0x380000A2,0xF6480001,0xFA64000D,0x7000A4,0xAA380000,0x763C0000,0x603C0000,0x56440001,0x50340000,0xF6240005,0x9E280000,0xF000A2,0x40180001, -0xF000A2,0x9800C8,0x7E840000,0x5E840001,0x54840001,0xE000C8,0x66600001,0x54700001,0x1CC00C8,0x54340001,0x4A0000CA,0xE000C8,0x66600001,0x54700001,0x1CC00C8,0x54340001,0x4A0000CA,0x1CC00C8,0x54340001,0x4A0000CA,0x4A0000CA,0xE000C8,0x66600001,0x54700001,0x1CC00C8,0x54340001,0x4A0000CA,0x1CC00C8,0x54340001,0x4A0000CA,0x4A0000CA,0x1CC00C8, -0x54340001,0x4A0000CA,0x4A0000CA,0x4A0000CA,0xFC4C0014,0x8A000C8,0xFE8C0020,0xA63C0000,0x7C340001,0x64340000,0x544C0001,0x54180001,0xFE2C000D,0xA0280000,0x58500000,0x4A0000CA,0x14000C8,0x4C00C8,0x4C00C8,0x4C00C8,0x4C00C8,0x4C00C8,0x4C00C8,0x4C00C8,0x4C00C8,0x4C00C8,0x4C00C8,0x58380000,0x58380000,0x58380000,0x58380000,0x58380000, -0x58380000,0x38380001,0x38380001,0x38380001,0x2E380001,0x7000C8,0x7000C8,0x7000C8,0x7000C8,0x7000C8,0x7000C8,0x40140001,0x40140001,0x40140001,0x2E240001,0xE400C8,0xE400C8,0xE400C8,0x2C00000D,0x240000CA,0xF63C0014,0x4C00C8,0x4C00C8,0xA6380000,0x7C380000,0x66380000,0x66380000,0x4C380000,0xFC1C0001,0x96280001,0x40340001,0x40140001, -0xA000C8,}; -static const uint32_t g_etc1_to_bc7_m6_table72[] = { -0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x6C0000,0x6C0000,0x6C0000,0x6C0000,0x6C0000,0x6C0000,0x6C0000,0x6C0000,0x6C0000,0x6C0000,0xDC0000, -0xDC0000,0xDC0000,0xDC0000,0x24000000,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0x480001,0xE4C0000,0xE4C0000,0xE4C0000,0x6C0000,0x9C0000,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000, -0xA80000,0x1580000,0x1580000,0x1580000,0x38000000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0x1580000,0x1580000,0x1580000,0x38000000,0x1580000,0x1580000,0x1580000,0x38000000,0x38000000,0x4780000,0x700001,0x700001,0x840000,0x48C0000,0x2980000,0x2980000,0xC00000,0x840000,0x48C0000,0xF00000,0x1580000, -0xF00000,0x940001,0x940001,0x940001,0x940001,0x2DC0000,0x2DC0000,0x2DC0000,0x1C40000,0x1C40000,0x4A000000,0x2DC0000,0x2DC0000,0x2DC0000,0x1C40000,0x1C40000,0x4A000000,0x1C40000,0x1C40000,0x4A000000,0x4A000000,0x2DC0000,0x2DC0000,0x2DC0000,0x1C40000,0x1C40000,0x4A000000,0x1C40000,0x1C40000,0x4A000000,0x4A000000,0x1C40000, -0x1C40000,0x4A000000,0x4A000000,0x4A000000,0x4AC0000,0xA00000,0x940001,0xCC0000,0xFC0000,0x13C0000,0x16C0000,0xBF80000,0xBC0000,0x2DC0000,0x13C0000,0x4A000000,0x13C0000,0xBC0001,0x3180000,0x11FC0000,0x5E000000,0x3180000,0x11FC0000,0x5E000000,0x11FC0000,0x5E000000,0x5E000000,0x3180000,0x11FC0000,0x5E000000,0x11FC0000,0x5E000000, -0x5E000000,0x11FC0000,0x5E000000,0x5E000000,0x5E000000,0x3180000,0x11FC0000,0x5E000000,0x11FC0000,0x5E000000,0x5E000000,0x11FC0000,0x5E000000,0x5E000000,0x5E000000,0x11FC0000,0x5E000000,0x5E000000,0x5E000000,0x5E000000,0x2EC0000,0xCC80000,0xCC80000,0x1400000,0x1D00000,0x37F40000,0x5E000000,0x5E000000,0x3000000,0x1640000,0x55DC0000,0x5E000000, -0x1940000,0x840734,0xAE7002D3,0x707002D4,0x5E7002D3,0xA0600192,0x786000D7,0x6264013B,0x66600192,0x5C5C00D7,0x545C0192,0x984C02D4,0x7A4C00A3,0x625400FD,0x6A4800CF,0x5E4C0002,0x545000D5,0x5E4802D4,0x584000FF,0x5048013A,0x4A4802D5,0xC40734,0x822802D3,0x5E4C02D4,0x76180192,0x5E3000D1,0x54380190,0x700002D3,0x5E1000A2,0x541400D5,0x4A2402D4,0x18C0734, -0x5600031B,0x50000212,0x48000378,0x40000738,0xFE580072,0xF4780434,0xF67C04EC,0xB24C0002,0x84480003,0x6C4C0002,0x5E500011,0x5C440012,0xFE34004E,0xAA380000,0x604000A9,0x541400D5,0x1180734,0x9802D3,0x988400A2,0x6C8400A2,0x5E8400A2,0x8A7400C9,0x6E740002,0x60740015,0x627000C9,0x5C6C0026,0x547000C9,0xE002D3,0x7A4C00A2,0x5E6800A2,0x6E4000C9,0x5E4C0001, -0x545800C8,0x1CC02D3,0x5E0C00A2,0x540000C8,0x4A0002D4,0xE002D3,0x7A4C00A2,0x5E6800A2,0x6E4000C9,0x5E4C0001,0x545800C8,0x1CC02D3,0x5E0C00A2,0x540000C8,0x4A0002D4,0x1CC02D3,0x5E0C00A2,0x540000C8,0x4A0002D4,0x4A0002D4,0xFC60002D,0xFC880173,0xFE8C016B,0xB24C0001,0x84480002,0x6C4C0001,0x5E580002,0x5C3C0006,0xFE3C0033,0xAA380000,0x623800A2,0x540000C8, -0x14002D3,0x7002D3,0x7002D3,0x7002D3,0x7002D3,0x806000CA,0x806000CA,0x806000CA,0x585C00CA,0x585C00CA,0x4A5C00CA,0x7A4C00A3,0x7A4C00A3,0x7A4C00A3,0x5C4C0002,0x5C4C0002,0x4C500026,0x4E4C00A3,0x4E4C00A3,0x48480015,0x404800A5,0x2A402D3,0x2A402D3,0x2A402D3,0x622C00C9,0x622C00C9,0x4A4400C8,0x5C1400A2,0x5C1400A2,0x4A240001,0x403000A4,0x15002D3, -0x15002D3,0x480000D4,0x3E0000CD,0x380002D4,0xFC540026,0xFC68015E,0x7002D3,0xB6480001,0x804C0002,0x6A4C0002,0x644C0005,0x58480002,0xFE34000E,0xAA380000,0x5E4400A3,0x4A240001,0xEC02D3,0x8400A2,0x8400A2,0x8400A2,0x8400A2,0x6C740001,0x6C740001,0x6C740001,0x52740001,0x52740001,0x4A700001,0xC400A2,0xC400A2,0xC400A2,0x58540001,0x58540001, -0x4A600000,0x18C00A2,0x18C00A2,0x4A280000,0x400000A4,0xC400A2,0xC400A2,0xC400A2,0x58540001,0x58540001,0x4A600000,0x18C00A2,0x18C00A2,0x4A280000,0x400000A4,0x18C00A2,0x18C00A2,0x4A280000,0x400000A4,0x400000A4,0xFE580001,0xF4780012,0x8400A2,0xB6480000,0x7C500000,0x68500000,0x60540000,0x58480001,0xFE340005,0xAA380000,0x11800A2,0x4A280000, -0x11800A2,0xA800CA,0x84980001,0x68940001,0x5E940001,0xFC00C8,0x6E740001,0x5E800001,0x3F800C8,0x5E440000,0x540000C8,0xFC00C8,0x6E740001,0x5E800001,0x3F800C8,0x5E440000,0x540000C8,0x3F800C8,0x5E440000,0x540000C8,0x540000C8,0xFC00C8,0x6E740001,0x5E800001,0x3F800C8,0x5E440000,0x540000C8,0x3F800C8,0x5E440000,0x540000C8,0x540000C8,0x3F800C8, -0x5E440000,0x540000C8,0x540000C8,0x540000C8,0xF6680019,0x2B400C8,0xF8A00029,0xB24C0000,0x84480001,0x6E440000,0x5E5C0000,0x5E240000,0xF4480012,0xAA380000,0x60640001,0x540000C8,0x16800C8,0x5C00CA,0x5C00CA,0x5C00CA,0x5C00CA,0x5C00CA,0x5C00CA,0x5C00CA,0x5C00CA,0x5C00CA,0x5C00CA,0x5E4C0001,0x5E4C0001,0x5E4C0001,0x5E4C0001,0x5E4C0001, -0x5E4C0001,0x42480001,0x42480001,0x42480001,0x38480001,0x8C00C8,0x8C00C8,0x8C00C8,0x8C00C8,0x8C00C8,0x8C00C8,0x48280001,0x48280001,0x48280001,0x38340001,0x11800C8,0x11800C8,0x11800C8,0x38000001,0x2E0000C8,0xF0500019,0x5C00CA,0x5C00CA,0xAA4C0001,0x824C0001,0x6C4C0001,0x6C4C0001,0x544C0001,0xF8300002,0xA8380000,0x4C440000,0x48280001, -0xC800C8,}; -static const uint32_t g_etc1_to_bc7_m6_table73[] = { -0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x10C0000, -0x10C0000,0x10C0000,0x10C0000,0x2C000000,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x580001,0x600000,0x600000,0x600000,0x840000,0xBC0000,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000, -0xC00000,0x1880000,0x1880000,0x1880000,0x40000000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0x1880000,0x1880000,0x1880000,0x40000000,0x1880000,0x1880000,0x1880000,0x40000000,0x40000000,0xC880000,0x800001,0x800001,0x980000,0x4A00000,0xB00000,0xB00000,0xD80000,0x980000,0x4A00000,0x1140000,0x1880000, -0x1140000,0xA40001,0xA40001,0xA40001,0xA40001,0x2F40000,0x2F40000,0x2F40000,0x1F40000,0x1F40000,0x52000000,0x2F40000,0x2F40000,0x2F40000,0x1F40000,0x1F40000,0x52000000,0x1F40000,0x1F40000,0x52000000,0x52000000,0x2F40000,0x2F40000,0x2F40000,0x1F40000,0x1F40000,0x52000000,0x1F40000,0x1F40000,0x52000000,0x52000000,0x1F40000, -0x1F40000,0x52000000,0x52000000,0x52000000,0xC00000,0xB00000,0xA40001,0xE00000,0x3140000,0x1600000,0x1940000,0x15FC0000,0xD00000,0x2F40000,0x1600000,0x52000000,0x1600000,0xCC0001,0x3300000,0x1DFC0000,0x66000000,0x3300000,0x1DFC0000,0x66000000,0x1DFC0000,0x66000000,0x66000000,0x3300000,0x1DFC0000,0x66000000,0x1DFC0000,0x66000000, -0x66000000,0x1DFC0000,0x66000000,0x66000000,0x66000000,0x3300000,0x1DFC0000,0x66000000,0x1DFC0000,0x66000000,0x66000000,0x1DFC0000,0x66000000,0x66000000,0x66000000,0x1DFC0000,0x66000000,0x66000000,0x66000000,0x66000000,0x3000000,0xDC0000,0xDC0000,0x1580000,0x1F80000,0x41F40000,0x66000000,0x66000000,0x1180000,0x3800000,0x5DEC0000,0x66000000, -0x1B40000,0x940734,0xB68002D3,0x788002D4,0x668002D3,0xA8700192,0x807000D7,0x6A74013B,0x6E700192,0x646C00D7,0x5C6C0192,0xA05C02D4,0x825C00A3,0x6A6400FD,0x725800CF,0x665C0002,0x5C6000D5,0x665802D4,0x605000FF,0x5858013A,0x525802D5,0xDC0734,0x8A3802D3,0x665C02D4,0x7E280192,0x664000D1,0x5C480190,0x781002D3,0x662000A2,0x5C2400D5,0x523402D4,0x1BC0734, -0x600002F8,0x5A0001E0,0x50000339,0x48000738,0xFE6C0096,0xFC880434,0xFE8C04EC,0xBA5C0002,0x8C580003,0x745C0002,0x66600011,0x64540012,0xFC4C006E,0xB2480000,0x685000A9,0x5C2400D5,0x1380734,0xA802D3,0xA09400A2,0x749400A2,0x669400A2,0x928400C9,0x76840002,0x68840015,0x6A8000C9,0x647C0026,0x5C8000C9,0xF802D3,0x825C00A2,0x667800A2,0x765000C9,0x665C0001, -0x5C6800C8,0x1FC02D3,0x661C00A2,0x5C1000C8,0x520002D4,0xF802D3,0x825C00A2,0x667800A2,0x765000C9,0x665C0001,0x5C6800C8,0x1FC02D3,0x661C00A2,0x5C1000C8,0x520002D4,0x1FC02D3,0x661C00A2,0x5C1000C8,0x520002D4,0x520002D4,0xFC740036,0xF4980189,0xF8A0017A,0xBA5C0001,0x8C580002,0x745C0001,0x66680002,0x644C0006,0xFC50003D,0xB2480000,0x6A4800A2,0x5C1000C8, -0x16402D3,0x8002D3,0x8002D3,0x8002D3,0x8002D3,0x887000CA,0x887000CA,0x887000CA,0x606C00CA,0x606C00CA,0x526C00CA,0x825C00A3,0x825C00A3,0x825C00A3,0x645C0002,0x645C0002,0x54600026,0x565C00A3,0x565C00A3,0x50580015,0x485800A5,0x2BC02D3,0x2BC02D3,0x2BC02D3,0x6A3C00C9,0x6A3C00C9,0x525400C8,0x642400A2,0x642400A2,0x52340001,0x484000A4,0x18002D3, -0x18002D3,0x520000CA,0x480000B4,0x400002D4,0xFC64002D,0xF4780173,0x8002D3,0xBE580001,0x885C0002,0x725C0002,0x6C5C0005,0x60580002,0xFE440019,0xB2480000,0x665400A3,0x52340001,0x11002D3,0x9400A2,0x9400A2,0x9400A2,0x9400A2,0x74840001,0x74840001,0x74840001,0x5A840001,0x5A840001,0x52800001,0xDC00A2,0xDC00A2,0xDC00A2,0x60640001,0x60640001, -0x52700000,0x1BC00A2,0x1BC00A2,0x52380000,0x480000A4,0xDC00A2,0xDC00A2,0xDC00A2,0x60640001,0x60640001,0x52700000,0x1BC00A2,0x1BC00A2,0x52380000,0x480000A4,0x1BC00A2,0x1BC00A2,0x52380000,0x480000A4,0x480000A4,0xFA6C0002,0xFC880012,0x9400A2,0xBE580000,0x84600000,0x70600000,0x68640000,0x60580001,0xFA48000A,0xB2480000,0x13800A2,0x52380000, -0x13800A2,0xB800CA,0x8CA80001,0x70A40001,0x66A40001,0x11400C8,0x76840001,0x66900001,0xFF800C8,0x66540000,0x5C0000C8,0x11400C8,0x76840001,0x66900001,0xFF800C8,0x66540000,0x5C0000C8,0xFF800C8,0x66540000,0x5C0000C8,0x5C0000C8,0x11400C8,0x76840001,0x66900001,0xFF800C8,0x66540000,0x5C0000C8,0xFF800C8,0x66540000,0x5C0000C8,0x5C0000C8,0xFF800C8, -0x66540000,0x5C0000C8,0x5C0000C8,0x5C0000C8,0xFE780019,0xAC400C8,0xFEAC002D,0xBA5C0000,0x8C580001,0x76540000,0x666C0000,0x66340000,0xFC580012,0xB2480000,0x68740001,0x5C0000C8,0x18C00C8,0x6C00CA,0x6C00CA,0x6C00CA,0x6C00CA,0x6C00CA,0x6C00CA,0x6C00CA,0x6C00CA,0x6C00CA,0x6C00CA,0x665C0001,0x665C0001,0x665C0001,0x665C0001,0x665C0001, -0x665C0001,0x4A580001,0x4A580001,0x4A580001,0x40580001,0xA400C8,0xA400C8,0xA400C8,0xA400C8,0xA400C8,0xA400C8,0x50380001,0x50380001,0x50380001,0x40440001,0x14C00C8,0x14C00C8,0x14C00C8,0x40080000,0x360000C8,0xF8600019,0x6C00CA,0x6C00CA,0xB25C0001,0x8A5C0001,0x745C0001,0x745C0001,0x5C5C0001,0xF4440005,0xB0480000,0x54540000,0x50380001, -0xE800C8,}; -static const uint32_t g_etc1_to_bc7_m6_table74[] = { -0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x13C0000, -0x13C0000,0x13C0000,0x13C0000,0x34000000,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x700000,0x700000,0x700000,0x9C0000,0xE00000,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000, -0xD80000,0x1B80000,0x1B80000,0x1B80000,0x48000000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0x1B80000,0x1B80000,0x1B80000,0x48000000,0x1B80000,0x1B80000,0x1B80000,0x48000000,0x48000000,0x9C0000,0x900001,0x900001,0x2A80000,0x4B40000,0x2C40000,0x2C40000,0xF40000,0x2A80000,0x4B40000,0x1340000,0x1B80000, -0x1340000,0xB40001,0xB40001,0xB40001,0xB40001,0x30C0000,0x30C0000,0x30C0000,0xBFC0000,0xBFC0000,0x5A000000,0x30C0000,0x30C0000,0x30C0000,0xBFC0000,0xBFC0000,0x5A000000,0xBFC0000,0xBFC0000,0x5A000000,0x5A000000,0x30C0000,0x30C0000,0x30C0000,0xBFC0000,0xBFC0000,0x5A000000,0xBFC0000,0xBFC0000,0x5A000000,0x5A000000,0xBFC0000, -0xBFC0000,0x5A000000,0x5A000000,0x5A000000,0xD40000,0x8C00000,0xB40001,0xF80000,0x1300000,0x1800000,0x1BC0000,0x21F40000,0xE40000,0x30C0000,0x1800000,0x5A000000,0x1800000,0xDC0001,0x1480000,0x29FC0000,0x6E000000,0x1480000,0x29FC0000,0x6E000000,0x29FC0000,0x6E000000,0x6E000000,0x1480000,0x29FC0000,0x6E000000,0x29FC0000,0x6E000000, -0x6E000000,0x29FC0000,0x6E000000,0x6E000000,0x6E000000,0x1480000,0x29FC0000,0x6E000000,0x29FC0000,0x6E000000,0x6E000000,0x29FC0000,0x6E000000,0x6E000000,0x6E000000,0x29FC0000,0x6E000000,0x6E000000,0x6E000000,0x6E000000,0x3140000,0xEC0000,0xEC0000,0x1740000,0xDFC0000,0x4BF40000,0x6E000000,0x6E000000,0x32C0000,0x1A00000,0x65FC0000,0x6E000000, -0x1D80000,0xA40734,0xBE9002D3,0x809002D4,0x6E9002D3,0xB0800192,0x888000D7,0x7284013B,0x76800192,0x6C7C00D7,0x647C0192,0xA86C02D4,0x8A6C00A3,0x727400FD,0x7A6800CF,0x6E6C0002,0x647000D5,0x6E6802D4,0x686000FF,0x6068013A,0x5A6802D5,0xF40734,0x924802D3,0x6E6C02D4,0x86380192,0x6E5000D1,0x64580190,0x802002D3,0x6E3000A2,0x643400D5,0x5A4402D4,0x1F00734, -0x6C0002D8,0x600001B8,0x5A000314,0x50000738,0xFC7C00BA,0xF498046A,0xF8A00513,0xC26C0002,0x94680003,0x7C6C0002,0x6E700011,0x6C640012,0xFE5C0082,0xBA580000,0x706000A9,0x643400D5,0x15C0734,0xB802D3,0xA8A400A2,0x7CA400A2,0x6EA400A2,0x9A9400C9,0x7E940002,0x70940015,0x729000C9,0x6C8C0026,0x649000C9,0x11002D3,0x8A6C00A2,0x6E8800A2,0x7E6000C9,0x6E6C0001, -0x647800C8,0xDFC02D3,0x6E2C00A2,0x642000C8,0x5A0002D4,0x11002D3,0x8A6C00A2,0x6E8800A2,0x7E6000C9,0x6E6C0001,0x647800C8,0xDFC02D3,0x6E2C00A2,0x642000C8,0x5A0002D4,0xDFC02D3,0x6E2C00A2,0x642000C8,0x5A0002D4,0x5A0002D4,0xFE800049,0xFCA80189,0xFEAC0186,0xC26C0001,0x94680002,0x7C6C0001,0x6E780002,0x6C5C0006,0xFE600051,0xBA580000,0x725800A2,0x642000C8, -0x18802D3,0x9002D3,0x9002D3,0x9002D3,0x9002D3,0x908000CA,0x908000CA,0x908000CA,0x687C00CA,0x687C00CA,0x5A7C00CA,0x8A6C00A3,0x8A6C00A3,0x8A6C00A3,0x6C6C0002,0x6C6C0002,0x5C700026,0x5E6C00A3,0x5E6C00A3,0x58680015,0x506800A5,0xD402D3,0xD402D3,0xD402D3,0x724C00C9,0x724C00C9,0x5A6400C8,0x6C3400A2,0x6C3400A2,0x5A440001,0x505000A4,0x1B002D3, -0x1B002D3,0x5A0C00C8,0x500000A5,0x480002D4,0xFE74003B,0xFC880173,0x9002D3,0xC6680001,0x906C0002,0x7A6C0002,0x746C0005,0x68680002,0xFE580021,0xBA580000,0x6E6400A3,0x5A440001,0x13002D3,0xA400A2,0xA400A2,0xA400A2,0xA400A2,0x7C940001,0x7C940001,0x7C940001,0x62940001,0x62940001,0x5A900001,0xF400A2,0xF400A2,0xF400A2,0x68740001,0x68740001, -0x5A800000,0x1F000A2,0x1F000A2,0x5A480000,0x500000A4,0xF400A2,0xF400A2,0xF400A2,0x68740001,0x68740001,0x5A800000,0x1F000A2,0x1F000A2,0x5A480000,0x500000A4,0x1F000A2,0x1F000A2,0x5A480000,0x500000A4,0x500000A4,0xF6800005,0xF4980019,0xA400A2,0xC6680000,0x8C700000,0x78700000,0x70740000,0x68680001,0xF260000D,0xBA580000,0x15C00A2,0x5A480000, -0x15C00A2,0xC800CA,0x94B80001,0x78B40001,0x6EB40001,0x12C00C8,0x7E940001,0x6EA00001,0x1BF800C8,0x6E640000,0x640000C8,0x12C00C8,0x7E940001,0x6EA00001,0x1BF800C8,0x6E640000,0x640000C8,0x1BF800C8,0x6E640000,0x640000C8,0x640000C8,0x12C00C8,0x7E940001,0x6EA00001,0x1BF800C8,0x6E640000,0x640000C8,0x1BF800C8,0x6E640000,0x640000C8,0x640000C8,0x1BF800C8, -0x6E640000,0x640000C8,0x640000C8,0x640000C8,0xFA8C0020,0xD800C8,0xF8C00032,0xC26C0000,0x94680001,0x7E640000,0x6E7C0000,0x6E440000,0xFC6C0019,0xBA580000,0x70840001,0x640000C8,0x1AC00C8,0x7C00CA,0x7C00CA,0x7C00CA,0x7C00CA,0x7C00CA,0x7C00CA,0x7C00CA,0x7C00CA,0x7C00CA,0x7C00CA,0x6E6C0001,0x6E6C0001,0x6E6C0001,0x6E6C0001,0x6E6C0001, -0x6E6C0001,0x52680001,0x52680001,0x52680001,0x48680001,0xBC00C8,0xBC00C8,0xBC00C8,0xBC00C8,0xBC00C8,0xBC00C8,0x58480001,0x58480001,0x58480001,0x48540001,0x17C00C8,0x17C00C8,0x17C00C8,0x48180000,0x3E0000C8,0xF0700022,0x7C00CA,0x7C00CA,0xBA6C0001,0x926C0001,0x7C6C0001,0x7C6C0001,0x646C0001,0xFC540005,0xB8580000,0x5C640000,0x58480001, -0x10C00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table75[] = { -0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0x1700000, -0x1700000,0x1700000,0x1700000,0x3C000000,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x8800000,0x8800000,0x8800000,0xB40000,0x1000000,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000, -0xF00000,0x1E80000,0x1E80000,0x1E80000,0x50000000,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000,0x1E80000,0x1E80000,0x1E80000,0x50000000,0x1E80000,0x1E80000,0x1E80000,0x50000000,0x50000000,0xAC0000,0xA00001,0xA00001,0xBC0000,0xCC0000,0xDC0000,0xDC0000,0x1100000,0xBC0000,0xCC0000,0x1580000,0x1E80000, -0x1580000,0xC40001,0xC40001,0xC40001,0xC40001,0x3240000,0x3240000,0x3240000,0x17FC0000,0x17FC0000,0x62000000,0x3240000,0x3240000,0x3240000,0x17FC0000,0x17FC0000,0x62000000,0x17FC0000,0x17FC0000,0x62000000,0x62000000,0x3240000,0x3240000,0x3240000,0x17FC0000,0x17FC0000,0x62000000,0x17FC0000,0x17FC0000,0x62000000,0x62000000,0x17FC0000, -0x17FC0000,0x62000000,0x62000000,0x62000000,0x4E40000,0xD40000,0xC40001,0x10C0000,0x14C0000,0x1A40000,0x1E40000,0x2BF80000,0xF80000,0x3240000,0x1A40000,0x62000000,0x1A40000,0xEC0001,0x1600000,0x35FC0000,0x76000000,0x1600000,0x35FC0000,0x76000000,0x35FC0000,0x76000000,0x76000000,0x1600000,0x35FC0000,0x76000000,0x35FC0000,0x76000000, -0x76000000,0x35FC0000,0x76000000,0x76000000,0x76000000,0x1600000,0x35FC0000,0x76000000,0x35FC0000,0x76000000,0x76000000,0x35FC0000,0x76000000,0x76000000,0x76000000,0x35FC0000,0x76000000,0x76000000,0x76000000,0x76000000,0x3280000,0x6FC0000,0x6FC0000,0x1900000,0x1BFC0000,0x55F40000,0x76000000,0x76000000,0x1440000,0x1C00000,0x6FD00000,0x76000000, -0x1F80000,0xB40734,0xC6A002D3,0x88A002D4,0x76A002D3,0xB8900192,0x909000D7,0x7A94013B,0x7E900192,0x748C00D7,0x6C8C0192,0xB07C02D4,0x927C00A3,0x7A8400FD,0x827800CF,0x767C0002,0x6C8000D5,0x767802D4,0x707000FF,0x6878013A,0x627802D5,0x10C0734,0x9A5802D3,0x767C02D4,0x8E480192,0x766000D1,0x6C680190,0x883002D3,0x764000A2,0x6C4400D5,0x625402D4,0xBF80734, -0x760002D3,0x6C000198,0x600002F8,0x58000738,0xFE8C00D4,0xFCA8046A,0xFEAC0517,0xCA7C0002,0x9C780003,0x847C0002,0x76800011,0x74740012,0xFE7000A7,0xC2680000,0x787000A9,0x6C4400D5,0x17C0734,0xC802D3,0xB0B400A2,0x84B400A2,0x76B400A2,0xA2A400C9,0x86A40002,0x78A40015,0x7AA000C9,0x749C0026,0x6CA000C9,0x12802D3,0x927C00A2,0x769800A2,0x867000C9,0x767C0001, -0x6C8800C8,0x19FC02D3,0x763C00A2,0x6C3000C8,0x620002D4,0x12802D3,0x927C00A2,0x769800A2,0x867000C9,0x767C0001,0x6C8800C8,0x19FC02D3,0x763C00A2,0x6C3000C8,0x620002D4,0x19FC02D3,0x763C00A2,0x6C3000C8,0x620002D4,0x620002D4,0xFE940050,0xF4B801A3,0xF8C00193,0xCA7C0001,0x9C780002,0x847C0001,0x76880002,0x746C0006,0xFE780056,0xC2680000,0x7A6800A2,0x6C3000C8, -0x1A802D3,0xA002D3,0xA002D3,0xA002D3,0xA002D3,0x989000CA,0x989000CA,0x989000CA,0x708C00CA,0x708C00CA,0x628C00CA,0x927C00A3,0x927C00A3,0x927C00A3,0x747C0002,0x747C0002,0x64800026,0x667C00A3,0x667C00A3,0x60780015,0x587800A5,0xEC02D3,0xEC02D3,0xEC02D3,0x7A5C00C9,0x7A5C00C9,0x627400C8,0x744400A2,0x744400A2,0x62540001,0x586000A4,0x1E402D3, -0x1E402D3,0x621C00C8,0x580800A4,0x500002D4,0xFE840046,0xF498018A,0xA002D3,0xCE780001,0x987C0002,0x827C0002,0x7C7C0005,0x70780002,0xF86C0032,0xC2680000,0x767400A3,0x62540001,0x15402D3,0xB400A2,0xB400A2,0xB400A2,0xB400A2,0x84A40001,0x84A40001,0x84A40001,0x6AA40001,0x6AA40001,0x62A00001,0x10C00A2,0x10C00A2,0x10C00A2,0x70840001,0x70840001, -0x62900000,0xBF800A2,0xBF800A2,0x62580000,0x580000A4,0x10C00A2,0x10C00A2,0x10C00A2,0x70840001,0x70840001,0x62900000,0xBF800A2,0xBF800A2,0x62580000,0x580000A4,0xBF800A2,0xBF800A2,0x62580000,0x580000A4,0x580000A4,0xFE900005,0xFCA80019,0xB400A2,0xCE780000,0x94800000,0x80800000,0x78840000,0x70780001,0xFA70000D,0xC2680000,0x17C00A2,0x62580000, -0x17C00A2,0xD800CA,0x9CC80001,0x80C40001,0x76C40001,0x14400C8,0x86A40001,0x76B00001,0x27F800C8,0x76740000,0x6C0000C8,0x14400C8,0x86A40001,0x76B00001,0x27F800C8,0x76740000,0x6C0000C8,0x27F800C8,0x76740000,0x6C0000C8,0x6C0000C8,0x14400C8,0x86A40001,0x76B00001,0x27F800C8,0x76740000,0x6C0000C8,0x27F800C8,0x76740000,0x6C0000C8,0x6C0000C8,0x27F800C8, -0x76740000,0x6C0000C8,0x6C0000C8,0x6C0000C8,0xF4A00029,0xE800C8,0xFECC003A,0xCA7C0000,0x9C780001,0x86740000,0x768C0000,0x76540000,0xF8840020,0xC2680000,0x78940001,0x6C0000C8,0x1D000C8,0x8C00CA,0x8C00CA,0x8C00CA,0x8C00CA,0x8C00CA,0x8C00CA,0x8C00CA,0x8C00CA,0x8C00CA,0x8C00CA,0x767C0001,0x767C0001,0x767C0001,0x767C0001,0x767C0001, -0x767C0001,0x5A780001,0x5A780001,0x5A780001,0x50780001,0x2D000C8,0x2D000C8,0x2D000C8,0x2D000C8,0x2D000C8,0x2D000C8,0x60580001,0x60580001,0x60580001,0x50640001,0x1AC00C8,0x1AC00C8,0x1AC00C8,0x50280000,0x460000C8,0xF8800022,0x8C00CA,0x8C00CA,0xC27C0001,0x9A7C0001,0x847C0001,0x847C0001,0x6C7C0001,0xFC640008,0xC0680000,0x64740000,0x60580001, -0x12C00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table76[] = { -0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0xD00000,0xD00000,0xD00000,0xD00000,0xD00000,0xD00000,0xD00000,0xD00000,0xD00000,0xD00000,0x1A40000, -0x1A40000,0x1A40000,0x1A40000,0x44000001,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x2940000,0x2940000,0x2940000,0xD00000,0x1280000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000, -0x10C0000,0xBF80000,0xBF80000,0xBF80000,0x58000001,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0xBF80000,0xBF80000,0xBF80000,0x58000001,0xBF80000,0xBF80000,0xBF80000,0x58000001,0x58000001,0xC00000,0xB40000,0xB40000,0xD00000,0x2E00000,0xF40000,0xF40000,0x32C0000,0xD00000,0x2E00000,0x17C0000,0xBF80000, -0x17C0000,0xD80000,0xD80000,0xD80000,0xD80000,0x1400000,0x1400000,0x1400000,0x25F80000,0x25F80000,0x6A000001,0x1400000,0x1400000,0x1400000,0x25F80000,0x25F80000,0x6A000001,0x25F80000,0x25F80000,0x6A000001,0x6A000001,0x1400000,0x1400000,0x1400000,0x25F80000,0x25F80000,0x6A000001,0x25F80000,0x25F80000,0x6A000001,0x6A000001,0x25F80000, -0x25F80000,0x6A000001,0x6A000001,0x6A000001,0xFC0000,0xAE40000,0xD80000,0x3240000,0x16C0000,0x1CC0000,0x9F80000,0x37FC0000,0x50C0000,0x1400000,0x1CC0000,0x6A000001,0x1CC0000,0x1000000,0x17C0000,0x43F80000,0x7E000001,0x17C0000,0x43F80000,0x7E000001,0x43F80000,0x7E000001,0x7E000001,0x17C0000,0x43F80000,0x7E000001,0x43F80000,0x7E000001, -0x7E000001,0x43F80000,0x7E000001,0x7E000001,0x7E000001,0x17C0000,0x43F80000,0x7E000001,0x43F80000,0x7E000001,0x7E000001,0x43F80000,0x7E000001,0x7E000001,0x7E000001,0x43F80000,0x7E000001,0x7E000001,0x7E000001,0x7E000001,0x1400000,0x1100000,0x1100000,0x3AC0000,0x29FC0000,0x61F00000,0x7E000001,0x7E000001,0x15C0000,0x1E00000,0x79C40000,0x7E000001, -0x11FC0000,0xC40738,0xCCB402D4,0x92B402D4,0x7EB402D5,0xC2A00190,0x9AA000D5,0x80A8013A,0x88A00190,0x7CA000D5,0x74A00192,0xBC8C02D3,0x9C8C00A2,0x849800FF,0x8C8C00D1,0x7E8C0002,0x749000D7,0x7E8C02D4,0x7A8400FD,0x7284013B,0x6C8C02D3,0x3240734,0xA46802D3,0x7E8C02D4,0x965C0192,0x807400CF,0x747C0192,0x904402D3,0x7E5400A3,0x745400D7,0x6C6402D4,0x17FC0734, -0x7E1402D4,0x74080192,0x6C0002DC,0x62000734,0xFEA000FE,0xF6BC04A0,0xF8C0053C,0xD48C0000,0xA68C0001,0x8C8C0002,0x80900012,0x7C8C0011,0xFE8800DC,0xC87C0002,0x828000A9,0x745400D7,0x1A40734,0xD802D4,0xB8C800A4,0x8CC800A4,0x7EC400A5,0xAEB400C8,0x90B40001,0x80B80015,0x82B400C8,0x7CB00026,0x74B400CA,0x14402D3,0x9A9000A2,0x7EAC00A3,0x8E8400C9,0x7E8C0002, -0x769800CA,0x27F802D3,0x7E5400A3,0x744400CA,0x6C0002D3,0x14402D3,0x9A9000A2,0x7EAC00A3,0x8E8400C9,0x7E8C0002,0x769800CA,0x27F802D3,0x7E5400A3,0x744400CA,0x6C0002D3,0x27F802D3,0x7E5400A3,0x744400CA,0x6C0002D3,0x6C0002D3,0xFAAC0065,0xFECC01A0,0xF2D401AD,0xD48C0000,0xA68C0001,0x8C8C0002,0x80980002,0x7E800005,0xFA900071,0xC87C0001,0x847800A2,0x744400CA, -0x1D002D3,0xB402D4,0xB402D4,0xB402D4,0xB402D4,0xA4A000C8,0xA4A000C8,0xA4A000C8,0x78A000C8,0x78A000C8,0x6AA000C9,0x9C8C00A2,0x9C8C00A2,0x9C8C00A2,0x7E8C0001,0x7E8C0001,0x6E900026,0x708C00A2,0x708C00A2,0x6A880015,0x628C00A2,0x10802D3,0x10802D3,0x10802D3,0x846C00C9,0x846C00C9,0x6C8400C9,0x7E5400A2,0x7E5400A2,0x6A680002,0x627000A2,0x9F802D3, -0x9F802D3,0x6A3000C9,0x621400A2,0x580002D3,0xFC9C005A,0xFEAC0189,0xB402D4,0xD48C0000,0xA28C0001,0x8C8C0001,0x86900006,0x788C0002,0xFE80003E,0xC87C0001,0x808400A2,0x6A680002,0x17802D3,0xC400A4,0xC400A4,0xC400A4,0xC400A4,0x90B40000,0x90B40000,0x90B40000,0x74B40000,0x74B40000,0x6AB40001,0x32400A2,0x32400A2,0x32400A2,0x7A940001,0x7A940001, -0x6AA40001,0x17FC00A2,0x17FC00A2,0x6A6C0001,0x620000A2,0x32400A2,0x32400A2,0x32400A2,0x7A940001,0x7A940001,0x6AA40001,0x17FC00A2,0x17FC00A2,0x6A6C0001,0x620000A2,0x17FC00A2,0x17FC00A2,0x6A6C0001,0x620000A2,0x620000A2,0xFEA0000A,0xF6BC0020,0xC400A4,0xD48C0000,0xA0900000,0x8A900000,0x80980001,0x7A880000,0xF8880012,0xC87C0000,0x1A400A2,0x6A6C0001, -0x1A400A2,0xEC00C8,0xA8D80000,0x88D80001,0x7ED80001,0x35C00C8,0x90B40001,0x7EC40001,0x33FC00C8,0x7E880001,0x740000CA,0x35C00C8,0x90B40001,0x7EC40001,0x33FC00C8,0x7E880001,0x740000CA,0x33FC00C8,0x7E880001,0x740000CA,0x740000CA,0x35C00C8,0x90B40001,0x7EC40001,0x33FC00C8,0x7E880001,0x740000CA,0x33FC00C8,0x7E880001,0x740000CA,0x740000CA,0x33FC00C8, -0x7E880001,0x740000CA,0x740000CA,0x740000CA,0xFEB40029,0xFC00C8,0xFAE4003D,0xD0900000,0xA6880001,0x8E880000,0x7EA00001,0x7E6C0001,0xFE980020,0xCA7C0000,0x82A40000,0x740000CA,0x1F400C8,0xA000C8,0xA000C8,0xA000C8,0xA000C8,0xA000C8,0xA000C8,0xA000C8,0xA000C8,0xA000C8,0xA000C8,0x828C0000,0x828C0000,0x828C0000,0x828C0000,0x828C0000, -0x828C0000,0x628C0001,0x628C0001,0x628C0001,0x588C0001,0xEC00C8,0xEC00C8,0xEC00C8,0xEC00C8,0xEC00C8,0xEC00C8,0x6A680001,0x6A680001,0x6A680001,0x58780001,0x1E400C8,0x1E400C8,0x1E400C8,0x583C0001,0x4E0000CA,0xF2940029,0xA000C8,0xA000C8,0xD08C0000,0xA68C0000,0x908C0000,0x908C0000,0x768C0000,0xF47C000D,0xC07C0001,0x6A880001,0x6A680001, -0x15400C8,}; -static const uint32_t g_etc1_to_bc7_m6_table77[] = { -0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0x1D80000, -0x1D80000,0x1D80000,0x1D80000,0x4C000001,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0xAA40000,0xAA40000,0xAA40000,0xE80000,0x14C0000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000, -0x1240000,0x17F80000,0x17F80000,0x17F80000,0x60000001,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x17F80000,0x17F80000,0x17F80000,0x60000001,0x17F80000,0x17F80000,0x17F80000,0x60000001,0x60000001,0xD00000,0xC40000,0xC40000,0xE40000,0x2F40000,0x3080000,0x3080000,0x1480000,0xE40000,0x2F40000,0x1A00000,0x17F80000, -0x1A00000,0xE80000,0xE80000,0xE80000,0xE80000,0x1580000,0x1580000,0x1580000,0x31F80000,0x31F80000,0x72000001,0x1580000,0x1580000,0x1580000,0x31F80000,0x31F80000,0x72000001,0x31F80000,0x31F80000,0x72000001,0x72000001,0x1580000,0x1580000,0x1580000,0x31F80000,0x31F80000,0x72000001,0x31F80000,0x31F80000,0x72000001,0x72000001,0x31F80000, -0x31F80000,0x72000001,0x72000001,0x72000001,0x30C0000,0xF80000,0xE80000,0x13C0000,0x1840000,0x1EC0000,0x15FC0000,0x43F40000,0x5200000,0x1580000,0x1EC0000,0x72000001,0x1EC0000,0x1100000,0x1940000,0x4FF80000,0x86000001,0x1940000,0x4FF80000,0x86000001,0x4FF80000,0x86000001,0x86000001,0x1940000,0x4FF80000,0x86000001,0x4FF80000,0x86000001, -0x86000001,0x4FF80000,0x86000001,0x86000001,0x86000001,0x1940000,0x4FF80000,0x86000001,0x4FF80000,0x86000001,0x86000001,0x4FF80000,0x86000001,0x86000001,0x86000001,0x4FF80000,0x86000001,0x86000001,0x86000001,0x86000001,0x1540000,0x9200000,0x9200000,0x1C80000,0x37FC0000,0x6BF00000,0x86000001,0x86000001,0x3700000,0x3FC0000,0x81D40000,0x86000001, -0x1FFC0000,0xD40738,0xD4C402D4,0x9AC402D4,0x86C402D5,0xCAB00190,0xA2B000D5,0x88B8013A,0x90B00190,0x84B000D5,0x7CB00192,0xC49C02D3,0xA49C00A2,0x8CA800FF,0x949C00D1,0x869C0002,0x7CA000D7,0x869C02D4,0x829400FD,0x7A94013B,0x749C02D3,0x33C0734,0xAC7802D3,0x869C02D4,0x9E6C0192,0x888400CF,0x7C8C0192,0x985402D3,0x866400A3,0x7C6400D7,0x747402D4,0x23FC0734, -0x862402D4,0x7C180192,0x740002D4,0x6A000734,0xFEB4012D,0xFECC04A0,0xFECC0554,0xDC9C0000,0xAE9C0001,0x949C0002,0x88A00012,0x849C0011,0xFE9800FA,0xD08C0002,0x8A9000A9,0x7C6400D7,0x1C80734,0xE802D4,0xC0D800A4,0x94D800A4,0x86D400A5,0xB6C400C8,0x98C40001,0x88C80015,0x8AC400C8,0x84C00026,0x7CC400CA,0x15C02D3,0xA2A000A2,0x86BC00A3,0x969400C9,0x869C0002, -0x7EA800CA,0x33F802D3,0x866400A3,0x7C5400CA,0x740002D3,0x15C02D3,0xA2A000A2,0x86BC00A3,0x969400C9,0x869C0002,0x7EA800CA,0x33F802D3,0x866400A3,0x7C5400CA,0x740002D3,0x33F802D3,0x866400A3,0x7C5400CA,0x740002D3,0x740002D3,0xFEBC0076,0xF6DC01BA,0xFAE401AD,0xDC9C0000,0xAE9C0001,0x949C0002,0x88A80002,0x86900005,0xFEA40081,0xD08C0001,0x8C8800A2,0x7C5400CA, -0x1F002D3,0xC402D4,0xC402D4,0xC402D4,0xC402D4,0xACB000C8,0xACB000C8,0xACB000C8,0x80B000C8,0x80B000C8,0x72B000C9,0xA49C00A2,0xA49C00A2,0xA49C00A2,0x869C0001,0x869C0001,0x76A00026,0x789C00A2,0x789C00A2,0x72980015,0x6A9C00A2,0x12002D3,0x12002D3,0x12002D3,0x8C7C00C9,0x8C7C00C9,0x749400C9,0x866400A2,0x866400A2,0x72780002,0x6A8000A2,0x15F802D3, -0x15F802D3,0x724000C9,0x6A2400A2,0x600002D3,0xFEAC0065,0xF6BC01A0,0xC402D4,0xDC9C0000,0xAA9C0001,0x949C0001,0x8EA00006,0x809C0002,0xFE90004A,0xD08C0001,0x889400A2,0x72780002,0x19C02D3,0xD400A4,0xD400A4,0xD400A4,0xD400A4,0x98C40000,0x98C40000,0x98C40000,0x7CC40000,0x7CC40000,0x72C40001,0x33C00A2,0x33C00A2,0x33C00A2,0x82A40001,0x82A40001, -0x72B40001,0x23FC00A2,0x23FC00A2,0x727C0001,0x6A0000A2,0x33C00A2,0x33C00A2,0x33C00A2,0x82A40001,0x82A40001,0x72B40001,0x23FC00A2,0x23FC00A2,0x727C0001,0x6A0000A2,0x23FC00A2,0x23FC00A2,0x727C0001,0x6A0000A2,0x6A0000A2,0xFAB4000D,0xFECC0020,0xD400A4,0xDC9C0000,0xA8A00000,0x92A00000,0x88A80001,0x82980000,0xF29C0019,0xD08C0000,0x1C800A2,0x727C0001, -0x1C800A2,0xFC00C8,0xB0E80000,0x90E80001,0x86E80001,0x37400C8,0x98C40001,0x86D40001,0x3FFC00C8,0x86980001,0x7C0000CA,0x37400C8,0x98C40001,0x86D40001,0x3FFC00C8,0x86980001,0x7C0000CA,0x3FFC00C8,0x86980001,0x7C0000CA,0x7C0000CA,0x37400C8,0x98C40001,0x86D40001,0x3FFC00C8,0x86980001,0x7C0000CA,0x3FFC00C8,0x86980001,0x7C0000CA,0x7C0000CA,0x3FFC00C8, -0x86980001,0x7C0000CA,0x7C0000CA,0x7C0000CA,0xFAC80032,0x10C00C8,0xF2F40048,0xD8A00000,0xAE980001,0x96980000,0x86B00001,0x867C0001,0xFEAC0029,0xD28C0000,0x8AB40000,0x7C0000CA,0xDFC00C8,0xB000C8,0xB000C8,0xB000C8,0xB000C8,0xB000C8,0xB000C8,0xB000C8,0xB000C8,0xB000C8,0xB000C8,0x8A9C0000,0x8A9C0000,0x8A9C0000,0x8A9C0000,0x8A9C0000, -0x8A9C0000,0x6A9C0001,0x6A9C0001,0x6A9C0001,0x609C0001,0x10400C8,0x10400C8,0x10400C8,0x10400C8,0x10400C8,0x10400C8,0x72780001,0x72780001,0x72780001,0x60880001,0x7FC00C8,0x7FC00C8,0x7FC00C8,0x604C0001,0x560000CA,0xFAA40029,0xB000C8,0xB000C8,0xD89C0000,0xAE9C0000,0x989C0000,0x989C0000,0x7E9C0000,0xFC8C000D,0xC88C0001,0x72980001,0x72780001, -0x17400C8,}; -static const uint32_t g_etc1_to_bc7_m6_table78[] = { -0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0x1000000,0x1000000,0x1000000,0x1000000,0x1000000,0x1000000,0x1000000,0x1000000,0x1000000,0x1000000,0x5F80000, -0x5F80000,0x5F80000,0x5F80000,0x54000001,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xB80000,0xB80000,0xB80000,0x1000000,0x16C0000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000, -0x13C0000,0x21FC0000,0x21FC0000,0x21FC0000,0x68000001,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x21FC0000,0x21FC0000,0x21FC0000,0x68000001,0x21FC0000,0x21FC0000,0x21FC0000,0x68000001,0x68000001,0x8E00000,0xD40000,0xD40000,0x4F40000,0x3080000,0x1200000,0x1200000,0x1640000,0x4F40000,0x3080000,0x1C00000,0x21FC0000, -0x1C00000,0xF80000,0xF80000,0xF80000,0xF80000,0x1700000,0x1700000,0x1700000,0x3DF80000,0x3DF80000,0x7A000001,0x1700000,0x1700000,0x1700000,0x3DF80000,0x3DF80000,0x7A000001,0x3DF80000,0x3DF80000,0x7A000001,0x7A000001,0x1700000,0x1700000,0x1700000,0x3DF80000,0x3DF80000,0x7A000001,0x3DF80000,0x3DF80000,0x7A000001,0x7A000001,0x3DF80000, -0x3DF80000,0x7A000001,0x7A000001,0x7A000001,0x1200000,0x1080000,0xF80000,0x1500000,0x1A00000,0x9FC0000,0x23FC0000,0x4DFC0000,0x5340000,0x1700000,0x9FC0000,0x7A000001,0x9FC0000,0x1200000,0x1AC0000,0x5BF80000,0x8E000001,0x1AC0000,0x5BF80000,0x8E000001,0x5BF80000,0x8E000001,0x8E000001,0x1AC0000,0x5BF80000,0x8E000001,0x5BF80000,0x8E000001, -0x8E000001,0x5BF80000,0x8E000001,0x8E000001,0x8E000001,0x1AC0000,0x5BF80000,0x8E000001,0x5BF80000,0x8E000001,0x8E000001,0x5BF80000,0x8E000001,0x8E000001,0x8E000001,0x5BF80000,0x8E000001,0x8E000001,0x8E000001,0x8E000001,0x1680000,0x1340000,0x1340000,0x1E40000,0x45FC0000,0x75F00000,0x8E000001,0x8E000001,0x1880000,0x13FC0000,0x89E40000,0x8E000001, -0x2FFC0000,0xE40738,0xDCD402D4,0xA2D402D4,0x8ED402D5,0xD2C00190,0xAAC000D5,0x90C8013A,0x98C00190,0x8CC000D5,0x84C00192,0xCCAC02D3,0xACAC00A2,0x94B800FF,0x9CAC00D1,0x8EAC0002,0x84B000D7,0x8EAC02D4,0x8AA400FD,0x82A4013B,0x7CAC02D3,0x1540734,0xB48802D3,0x8EAC02D4,0xA67C0192,0x909400CF,0x849C0192,0xA06402D3,0x8E7400A3,0x847400D7,0x7C8402D4,0x2FFC0734, -0x8E3402D4,0x84280192,0x7C0C02D3,0x72000734,0xFEC40168,0xF6DC04DA,0xF8E00569,0xE4AC0000,0xB6AC0001,0x9CAC0002,0x90B00012,0x8CAC0011,0xFCB0013D,0xD89C0002,0x92A000A9,0x847400D7,0x1E80734,0xF802D4,0xC8E800A4,0x9CE800A4,0x8EE400A5,0xBED400C8,0xA0D40001,0x90D80015,0x92D400C8,0x8CD00026,0x84D400CA,0x17402D3,0xAAB000A2,0x8ECC00A3,0x9EA400C9,0x8EAC0002, -0x86B800CA,0x3FF802D3,0x8E7400A3,0x846400CA,0x7C0002D3,0x17402D3,0xAAB000A2,0x8ECC00A3,0x9EA400C9,0x8EAC0002,0x86B800CA,0x3FF802D3,0x8E7400A3,0x846400CA,0x7C0002D3,0x3FF802D3,0x8E7400A3,0x846400CA,0x7C0002D3,0x7C0002D3,0xFED00083,0xFEEC01BA,0xF2F401C8,0xE4AC0000,0xB6AC0001,0x9CAC0002,0x90B80002,0x8EA00005,0xFEB40095,0xD89C0001,0x949800A2,0x846400CA, -0xBFC02D3,0xD402D4,0xD402D4,0xD402D4,0xD402D4,0xB4C000C8,0xB4C000C8,0xB4C000C8,0x88C000C8,0x88C000C8,0x7AC000C9,0xACAC00A2,0xACAC00A2,0xACAC00A2,0x8EAC0001,0x8EAC0001,0x7EB00026,0x80AC00A2,0x80AC00A2,0x7AA80015,0x72AC00A2,0x13802D3,0x13802D3,0x13802D3,0x948C00C9,0x948C00C9,0x7CA400C9,0x8E7400A2,0x8E7400A2,0x7A880002,0x729000A2,0x21F802D3, -0x21F802D3,0x7A5000C9,0x723400A2,0x680002D3,0xFEBC0075,0xFECC01A0,0xD402D4,0xE4AC0000,0xB2AC0001,0x9CAC0001,0x96B00006,0x88AC0002,0xFEA0005A,0xD89C0001,0x90A400A2,0x7A880002,0x1BC02D3,0xE400A4,0xE400A4,0xE400A4,0xE400A4,0xA0D40000,0xA0D40000,0xA0D40000,0x84D40000,0x84D40000,0x7AD40001,0x15400A2,0x15400A2,0x15400A2,0x8AB40001,0x8AB40001, -0x7AC40001,0x2FFC00A2,0x2FFC00A2,0x7A8C0001,0x720000A2,0x15400A2,0x15400A2,0x15400A2,0x8AB40001,0x8AB40001,0x7AC40001,0x2FFC00A2,0x2FFC00A2,0x7A8C0001,0x720000A2,0x2FFC00A2,0x2FFC00A2,0x7A8C0001,0x720000A2,0x720000A2,0xF6C80012,0xF6DC0029,0xE400A4,0xE4AC0000,0xB0B00000,0x9AB00000,0x90B80001,0x8AA80000,0xFAAC0019,0xD89C0000,0x1E800A2,0x7A8C0001, -0x1E800A2,0x10C00C8,0xB8F80000,0x98F80001,0x8EF80001,0x38C00C8,0xA0D40001,0x8EE40001,0x4BFC00C8,0x8EA80001,0x840000CA,0x38C00C8,0xA0D40001,0x8EE40001,0x4BFC00C8,0x8EA80001,0x840000CA,0x4BFC00C8,0x8EA80001,0x840000CA,0x840000CA,0x38C00C8,0xA0D40001,0x8EE40001,0x4BFC00C8,0x8EA80001,0x840000CA,0x4BFC00C8,0x8EA80001,0x840000CA,0x840000CA,0x4BFC00C8, -0x8EA80001,0x840000CA,0x840000CA,0x840000CA,0xF2E0003D,0x71C00C8,0xFB040048,0xE0B00000,0xB6A80001,0x9EA80000,0x8EC00001,0x8E8C0001,0xF4C80032,0xDA9C0000,0x92C40000,0x840000CA,0x1DF800C8,0xC000C8,0xC000C8,0xC000C8,0xC000C8,0xC000C8,0xC000C8,0xC000C8,0xC000C8,0xC000C8,0xC000C8,0x92AC0000,0x92AC0000,0x92AC0000,0x92AC0000,0x92AC0000, -0x92AC0000,0x72AC0001,0x72AC0001,0x72AC0001,0x68AC0001,0x11C00C8,0x11C00C8,0x11C00C8,0x11C00C8,0x11C00C8,0x11C00C8,0x7A880001,0x7A880001,0x7A880001,0x68980001,0x13FC00C8,0x13FC00C8,0x13FC00C8,0x685C0001,0x5E0000CA,0xF2B40034,0xC000C8,0xC000C8,0xE0AC0000,0xB6AC0000,0xA0AC0000,0xA0AC0000,0x86AC0000,0xFC9C0012,0xD09C0001,0x7AA80001,0x7A880001, -0x19800C8,}; -static const uint32_t g_etc1_to_bc7_m6_table79[] = { -0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x11F80000, -0x11F80000,0x11F80000,0x11F80000,0x5C000001,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xC80000,0xC80000,0xC80000,0x1180000,0x1900000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000, -0x3500000,0x2DFC0000,0x2DFC0000,0x2DFC0000,0x70000001,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000,0x2DFC0000,0x2DFC0000,0x2DFC0000,0x70000001,0x2DFC0000,0x2DFC0000,0x2DFC0000,0x70000001,0x70000001,0xF40000,0xE40000,0xE40000,0x1080000,0x31C0000,0x3340000,0x3340000,0x1800000,0x1080000,0x31C0000,0x1E40000,0x2DFC0000, -0x1E40000,0x1080000,0x1080000,0x1080000,0x1080000,0x1880000,0x1880000,0x1880000,0x49F80000,0x49F80000,0x82000001,0x1880000,0x1880000,0x1880000,0x49F80000,0x49F80000,0x82000001,0x49F80000,0x49F80000,0x82000001,0x82000001,0x1880000,0x1880000,0x1880000,0x49F80000,0x49F80000,0x82000001,0x49F80000,0x49F80000,0x82000001,0x82000001,0x49F80000, -0x49F80000,0x82000001,0x82000001,0x82000001,0x1340000,0x5180000,0x1080000,0x1680000,0x1BC0000,0x19FC0000,0x31FC0000,0x59F40000,0x14C0000,0x1880000,0x19FC0000,0x82000001,0x19FC0000,0x1300000,0x1C40000,0x67F80000,0x96000001,0x1C40000,0x67F80000,0x96000001,0x67F80000,0x96000001,0x96000001,0x1C40000,0x67F80000,0x96000001,0x67F80000,0x96000001, -0x96000001,0x67F80000,0x96000001,0x96000001,0x96000001,0x1C40000,0x67F80000,0x96000001,0x67F80000,0x96000001,0x96000001,0x67F80000,0x96000001,0x96000001,0x96000001,0x67F80000,0x96000001,0x96000001,0x96000001,0x96000001,0x17C0000,0x1440000,0x1440000,0x3FC0000,0x53F80000,0x7FF00000,0x96000001,0x96000001,0x19C0000,0x25FC0000,0x91F40000,0x96000001, -0x3FF80000,0xF40738,0xE4E402D4,0xAAE402D4,0x96E402D5,0xDAD00190,0xB2D000D5,0x98D8013A,0xA0D00190,0x94D000D5,0x8CD00192,0xD4BC02D3,0xB4BC00A2,0x9CC800FF,0xA4BC00D1,0x96BC0002,0x8CC000D7,0x96BC02D4,0x92B400FD,0x8AB4013B,0x84BC02D3,0x16C0734,0xBC9802D3,0x96BC02D4,0xAE8C0192,0x98A400CF,0x8CAC0192,0xA87402D3,0x968400A3,0x8C8400D7,0x849402D4,0x3BFC0734, -0x964402D4,0x8C380192,0x841C02D3,0x7A000734,0xFED8019A,0xFEEC04DA,0xFEEC0585,0xECBC0000,0xBEBC0001,0xA4BC0002,0x98C00012,0x94BC0011,0xFEBC0167,0xE0AC0002,0x9AB000A9,0x8C8400D7,0x7FC0734,0x10802D4,0xD0F800A4,0xA4F800A4,0x96F400A5,0xC6E400C8,0xA8E40001,0x98E80015,0x9AE400C8,0x94E00026,0x8CE400CA,0x18C02D3,0xB2C000A2,0x96DC00A3,0xA6B400C9,0x96BC0002, -0x8EC800CA,0x4BF802D3,0x968400A3,0x8C7400CA,0x840002D3,0x18C02D3,0xB2C000A2,0x96DC00A3,0xA6B400C9,0x96BC0002,0x8EC800CA,0x4BF802D3,0x968400A3,0x8C7400CA,0x840002D3,0x4BF802D3,0x968400A3,0x8C7400CA,0x840002D3,0x840002D3,0xFEE400A6,0xF90001D4,0xFB0401C8,0xECBC0000,0xBEBC0001,0xA4BC0002,0x98C80002,0x96B00005,0xFACC00B5,0xE0AC0001,0x9CA800A2,0x8C7400CA, -0x1BFC02D3,0xE402D4,0xE402D4,0xE402D4,0xE402D4,0xBCD000C8,0xBCD000C8,0xBCD000C8,0x90D000C8,0x90D000C8,0x82D000C9,0xB4BC00A2,0xB4BC00A2,0xB4BC00A2,0x96BC0001,0x96BC0001,0x86C00026,0x88BC00A2,0x88BC00A2,0x82B80015,0x7ABC00A2,0x15002D3,0x15002D3,0x15002D3,0x9C9C00C9,0x9C9C00C9,0x84B400C9,0x968400A2,0x968400A2,0x82980002,0x7AA000A2,0x2DF802D3, -0x2DF802D3,0x826000C9,0x7A4400A2,0x700002D3,0xFECC0084,0xF6DC01B9,0xE402D4,0xECBC0000,0xBABC0001,0xA4BC0001,0x9EC00006,0x90BC0002,0xFEB40065,0xE0AC0001,0x98B400A2,0x82980002,0x1E002D3,0xF400A4,0xF400A4,0xF400A4,0xF400A4,0xA8E40000,0xA8E40000,0xA8E40000,0x8CE40000,0x8CE40000,0x82E40001,0x16C00A2,0x16C00A2,0x16C00A2,0x92C40001,0x92C40001, -0x82D40001,0x3BFC00A2,0x3BFC00A2,0x829C0001,0x7A0000A2,0x16C00A2,0x16C00A2,0x16C00A2,0x92C40001,0x92C40001,0x82D40001,0x3BFC00A2,0x3BFC00A2,0x829C0001,0x7A0000A2,0x3BFC00A2,0x3BFC00A2,0x829C0001,0x7A0000A2,0x7A0000A2,0xFED80012,0xFEEC0029,0xF400A4,0xECBC0000,0xB8C00000,0xA2C00000,0x98C80001,0x92B80000,0xFCC00020,0xE0AC0000,0x7FC00A2,0x829C0001, -0x7FC00A2,0x11C00C8,0xC1080000,0xA1080001,0x97080001,0x3A400C8,0xA8E40001,0x96F40001,0x57FC00C8,0x96B80001,0x8C0000CA,0x3A400C8,0xA8E40001,0x96F40001,0x57FC00C8,0x96B80001,0x8C0000CA,0x57FC00C8,0x96B80001,0x8C0000CA,0x8C0000CA,0x3A400C8,0xA8E40001,0x96F40001,0x57FC00C8,0x96B80001,0x8C0000CA,0x57FC00C8,0x96B80001,0x8C0000CA,0x8C0000CA,0x57FC00C8, -0x96B80001,0x8C0000CA,0x8C0000CA,0x8C0000CA,0xFAF0003D,0xF2C00C8,0xF3140055,0xE8C00000,0xBEB80001,0xA6B80000,0x96D00001,0x969C0001,0xFCD80032,0xE2AC0000,0x9AD40000,0x8C0000CA,0x2BFC00C8,0xD000C8,0xD000C8,0xD000C8,0xD000C8,0xD000C8,0xD000C8,0xD000C8,0xD000C8,0xD000C8,0xD000C8,0x9ABC0000,0x9ABC0000,0x9ABC0000,0x9ABC0000,0x9ABC0000, -0x9ABC0000,0x7ABC0001,0x7ABC0001,0x7ABC0001,0x70BC0001,0x13400C8,0x13400C8,0x13400C8,0x13400C8,0x13400C8,0x13400C8,0x82980001,0x82980001,0x82980001,0x70A80001,0x1FF800C8,0x1FF800C8,0x1FF800C8,0x706C0001,0x660000CA,0xFAC40034,0xD000C8,0xD000C8,0xE8BC0000,0xBEBC0000,0xA8BC0000,0xA8BC0000,0x8EBC0000,0xF8B00019,0xD8AC0001,0x82B80001,0x82980001, -0x1B800C8,}; -static const uint32_t g_etc1_to_bc7_m6_table80[] = { -0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0x3300000,0x3300000,0x3300000,0x3300000,0x3300000,0x3300000,0x3300000,0x3300000,0x3300000,0x3300000,0x1DFC0000, -0x1DFC0000,0x1DFC0000,0x1DFC0000,0x66000000,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0xDC0000,0xDC0000,0xDC0000,0x3300000,0x1B40000,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000, -0x16C0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x7A000000,0xB040000,0xF40001,0xF40001,0x51C0000,0x1340000,0x34C0000,0x34C0000,0x19C0000,0x51C0000,0x1340000,0x7FC0000,0x3BFC0000, -0x7FC0000,0x1180001,0x1180001,0x1180001,0x1180001,0x1A40000,0x1A40000,0x1A40000,0x57F80000,0x57F80000,0x8C000000,0x1A40000,0x1A40000,0x1A40000,0x57F80000,0x57F80000,0x8C000000,0x57F80000,0x57F80000,0x8C000000,0x8C000000,0x1A40000,0x1A40000,0x1A40000,0x57F80000,0x57F80000,0x8C000000,0x57F80000,0x57F80000,0x8C000000,0x8C000000,0x57F80000, -0x57F80000,0x8C000000,0x8C000000,0x8C000000,0x1480000,0x12C0000,0x1180001,0x1800000,0x1D80000,0x29FC0000,0x3FFC0000,0x65F40000,0x3600000,0x1A40000,0x29FC0000,0x8C000000,0x29FC0000,0x1400001,0x3DC0000,0x73FC0000,0xA0000000,0x3DC0000,0x73FC0000,0xA0000000,0x73FC0000,0xA0000000,0xA0000000,0x3DC0000,0x73FC0000,0xA0000000,0x73FC0000,0xA0000000, -0xA0000000,0x73FC0000,0xA0000000,0xA0000000,0xA0000000,0x3DC0000,0x73FC0000,0xA0000000,0x73FC0000,0xA0000000,0xA0000000,0x73FC0000,0xA0000000,0xA0000000,0xA0000000,0x73FC0000,0xA0000000,0xA0000000,0xA0000000,0xA0000000,0x1940000,0x1580000,0x1580000,0x19FC0000,0x61FC0000,0x89FC0000,0xA0000000,0xA0000000,0x3B40000,0x37FC0000,0x9BE80000,0xA0000000, -0x4FFC0000,0x1080734,0xF0F402D3,0xB2F402D4,0xA0F402D3,0xE2E40192,0xBAE400D7,0xA4E8013B,0xA8E40192,0x9EE000D7,0x96E00192,0xDAD002D4,0xBCD000A3,0xA4D800FD,0xACCC00CF,0xA0D00002,0x96D400D5,0xA0CC02D4,0x9AC400FF,0x92CC013A,0x8CCC02D5,0x1880734,0xC4AC02D3,0xA0D002D4,0xB89C0192,0xA0B400D1,0x96BC0190,0xB28402D3,0xA09400A2,0x969800D5,0x8CA802D4,0x49F80734, -0xA05402D3,0x96440190,0x8C3002D4,0x82000738,0xFEEC01E2,0xF9000514,0xFB040594,0xF4D00002,0xC6CC0003,0xAED00002,0xA0D40011,0x9EC80012,0xFED4019A,0xECBC0000,0xA2C400A9,0x969800D5,0x19FC0734,0x11C02D3,0xDB0800A2,0xAF0800A2,0xA10800A2,0xCCF800C9,0xB0F80002,0xA2F80015,0xA4F400C9,0x9EF00026,0x96F400C9,0x3A402D3,0xBCD000A2,0xA0EC00A2,0xB0C400C9,0xA0D00001, -0x96DC00C8,0x57FC02D3,0xA09000A2,0x968400C8,0x8C0002D4,0x3A402D3,0xBCD000A2,0xA0EC00A2,0xB0C400C9,0xA0D00001,0x96DC00C8,0x57FC02D3,0xA09000A2,0x968400C8,0x8C0002D4,0x57FC02D3,0xA09000A2,0x968400C8,0x8C0002D4,0x8C0002D4,0xFEF800B6,0xFF0C01E3,0xF51801E6,0xF4D00001,0xC6CC0002,0xAED00001,0xA0DC0002,0x9EC00006,0xFEDC00C5,0xECBC0000,0xA4BC00A2,0x968400C8, -0x2BFC02D3,0xF402D3,0xF402D3,0xF402D3,0xF402D3,0xC2E400CA,0xC2E400CA,0xC2E400CA,0x9AE000CA,0x9AE000CA,0x8CE000CA,0xBCD000A3,0xBCD000A3,0xBCD000A3,0x9ED00002,0x9ED00002,0x8ED40026,0x90D000A3,0x90D000A3,0x8ACC0015,0x82CC00A5,0x36802D3,0x36802D3,0x36802D3,0xA4B000C9,0xA4B000C9,0x8CC800C8,0x9E9800A2,0x9E9800A2,0x8CA80001,0x82B400A4,0x39FC02D3, -0x39FC02D3,0x8C7000C8,0x825C00A4,0x7A0002D4,0xFAE000A6,0xFEEC01BE,0xF402D3,0xF8CC0001,0xC2D00002,0xACD00002,0xA6D00005,0x9ACC0002,0xFCCC0080,0xECBC0000,0xA0C800A3,0x8CA80001,0x5FC02D3,0x10800A2,0x10800A2,0x10800A2,0x10800A2,0xAEF80001,0xAEF80001,0xAEF80001,0x94F80001,0x94F80001,0x8CF40001,0x18800A2,0x18800A2,0x18800A2,0x9AD80001,0x9AD80001, -0x8CE40000,0x49F800A2,0x49F800A2,0x8CAC0000,0x820000A4,0x18800A2,0x18800A2,0x18800A2,0x9AD80001,0x9AD80001,0x8CE40000,0x49F800A2,0x49F800A2,0x8CAC0000,0x820000A4,0x49F800A2,0x49F800A2,0x8CAC0000,0x820000A4,0x820000A4,0xFAEC0019,0xF9000032,0x10800A2,0xF8CC0000,0xBED40000,0xAAD40000,0xA2D80000,0x9ACC0001,0xFED00028,0xECBC0000,0x19FC00A2,0x8CAC0000, -0x19FC00A2,0x12C00CA,0xC71C0001,0xAB180001,0xA1180001,0x1C000C8,0xB0F80001,0xA1040001,0x65F800C8,0xA0C80000,0x960000C8,0x1C000C8,0xB0F80001,0xA1040001,0x65F800C8,0xA0C80000,0x960000C8,0x65F800C8,0xA0C80000,0x960000C8,0x960000C8,0x1C000C8,0xB0F80001,0xA1040001,0x65F800C8,0xA0C80000,0x960000C8,0x65F800C8,0xA0C80000,0x960000C8,0x960000C8,0x65F800C8, -0xA0C80000,0x960000C8,0x960000C8,0x960000C8,0xFB040048,0x94000C8,0xFD280055,0xF4D00000,0xC6CC0001,0xB0C80000,0xA0E00000,0xA0A80000,0xFEF0003D,0xECBC0000,0xA2E80001,0x960000C8,0x3DF800C8,0xE000CA,0xE000CA,0xE000CA,0xE000CA,0xE000CA,0xE000CA,0xE000CA,0xE000CA,0xE000CA,0xE000CA,0xA0D00001,0xA0D00001,0xA0D00001,0xA0D00001,0xA0D00001, -0xA0D00001,0x84CC0001,0x84CC0001,0x84CC0001,0x7ACC0001,0x15000C8,0x15000C8,0x15000C8,0x15000C8,0x15000C8,0x15000C8,0x8AAC0001,0x8AAC0001,0x8AAC0001,0x7AB80001,0x2DF800C8,0x2DF800C8,0x2DF800C8,0x7A7C0000,0x700000C8,0xF4D8003D,0xE000CA,0xE000CA,0xECD00001,0xC4D00001,0xAED00001,0xAED00001,0x96D00001,0xF4C40020,0xEABC0000,0x8EC80000,0x8AAC0001, -0x1E000C8,}; -static const uint32_t g_etc1_to_bc7_m6_table81[] = { -0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x29FC0000, -0x29FC0000,0x29FC0000,0x29FC0000,0x6E000000,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xDC0001,0xEC0000,0xEC0000,0xEC0000,0x1480000,0x1D80000,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000, -0x1840000,0x47FC0000,0x47FC0000,0x47FC0000,0x82000000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x47FC0000,0x47FC0000,0x47FC0000,0x82000000,0x47FC0000,0x47FC0000,0x47FC0000,0x82000000,0x82000000,0x1180000,0x1040001,0x1040001,0x1300000,0x1480000,0x1640000,0x1640000,0x1B80000,0x1300000,0x1480000,0x17FC0000,0x47FC0000, -0x17FC0000,0x1280001,0x1280001,0x1280001,0x1280001,0x1BC0000,0x1BC0000,0x1BC0000,0x61FC0000,0x61FC0000,0x94000000,0x1BC0000,0x1BC0000,0x1BC0000,0x61FC0000,0x61FC0000,0x94000000,0x61FC0000,0x61FC0000,0x94000000,0x94000000,0x1BC0000,0x1BC0000,0x1BC0000,0x61FC0000,0x61FC0000,0x94000000,0x61FC0000,0x61FC0000,0x94000000,0x94000000,0x61FC0000, -0x61FC0000,0x94000000,0x94000000,0x94000000,0x5580000,0x73C0000,0x1280001,0x1940000,0x1F40000,0x39FC0000,0x4DFC0000,0x6FFC0000,0x3740000,0x1BC0000,0x39FC0000,0x94000000,0x39FC0000,0x1500001,0x3F40000,0x7FFC0000,0xA8000000,0x3F40000,0x7FFC0000,0xA8000000,0x7FFC0000,0xA8000000,0xA8000000,0x3F40000,0x7FFC0000,0xA8000000,0x7FFC0000,0xA8000000, -0xA8000000,0x7FFC0000,0xA8000000,0xA8000000,0xA8000000,0x3F40000,0x7FFC0000,0xA8000000,0x7FFC0000,0xA8000000,0xA8000000,0x7FFC0000,0xA8000000,0xA8000000,0xA8000000,0x7FFC0000,0xA8000000,0xA8000000,0xA8000000,0xA8000000,0x1A80000,0x1680000,0x1680000,0x2DFC0000,0x6FFC0000,0x93FC0000,0xA8000000,0xA8000000,0x1CC0000,0x49FC0000,0xA3F80000,0xA8000000, -0x5FF80000,0x1180734,0xF90402D3,0xBB0402D4,0xA90402D3,0xEAF40192,0xC2F400D7,0xACF8013B,0xB0F40192,0xA6F000D7,0x9EF00192,0xE2E002D4,0xC4E000A3,0xACE800FD,0xB4DC00CF,0xA8E00002,0x9EE400D5,0xA8DC02D4,0xA2D400FF,0x9ADC013A,0x94DC02D5,0x1A00734,0xCCBC02D3,0xA8E002D4,0xC0AC0192,0xA8C400D1,0x9ECC0190,0xBA9402D3,0xA8A400A2,0x9EA800D5,0x94B802D4,0x55F80734, -0xA86402D3,0x9E540190,0x944002D4,0x8A000738,0xFEF8021F,0xFF0C0524,0xF31405C3,0xFCE00002,0xCEDC0003,0xB6E00002,0xA8E40011,0xA6D80012,0xFCEC01F6,0xF4CC0000,0xAAD400A9,0x9EA800D5,0x27FC0734,0x12C02D3,0xE31800A2,0xB71800A2,0xA91800A2,0xD50800C9,0xB9080002,0xAB080015,0xAD0400C9,0xA7000026,0x9F0400C9,0x3BC02D3,0xC4E000A2,0xA8FC00A2,0xB8D400C9,0xA8E00001, -0x9EEC00C8,0x63FC02D3,0xA8A000A2,0x9E9400C8,0x940002D4,0x3BC02D3,0xC4E000A2,0xA8FC00A2,0xB8D400C9,0xA8E00001,0x9EEC00C8,0x63FC02D3,0xA8A000A2,0x9E9400C8,0x940002D4,0x63FC02D3,0xA8A000A2,0x9E9400C8,0x940002D4,0x940002D4,0xFB0C00DE,0xF92001F1,0xFD2801E6,0xFCE00001,0xCEDC0002,0xB6E00001,0xA8EC0002,0xA6D00006,0xFCF400E1,0xF4CC0000,0xACCC00A2,0x9E9400C8, -0x3BFC02D3,0x10402D3,0x10402D3,0x10402D3,0x10402D3,0xCAF400CA,0xCAF400CA,0xCAF400CA,0xA2F000CA,0xA2F000CA,0x94F000CA,0xC4E000A3,0xC4E000A3,0xC4E000A3,0xA6E00002,0xA6E00002,0x96E40026,0x98E000A3,0x98E000A3,0x92DC0015,0x8ADC00A5,0x38002D3,0x38002D3,0x38002D3,0xACC000C9,0xACC000C9,0x94D800C8,0xA6A800A2,0xA6A800A2,0x94B80001,0x8AC400A4,0x45FC02D3, -0x45FC02D3,0x948000C8,0x8A6C00A4,0x820002D4,0xFEF400BA,0xF90001D3,0x10402D3,0xF6E00002,0xCAE00002,0xB4E00002,0xAEE00005,0xA2DC0002,0xFED800A1,0xF4CC0000,0xA8D800A3,0x94B80001,0x15FC02D3,0x11800A2,0x11800A2,0x11800A2,0x11800A2,0xB7080001,0xB7080001,0xB7080001,0x9D080001,0x9D080001,0x95040001,0x1A000A2,0x1A000A2,0x1A000A2,0xA2E80001,0xA2E80001, -0x94F40000,0x55F800A2,0x55F800A2,0x94BC0000,0x8A0000A4,0x1A000A2,0x1A000A2,0x1A000A2,0xA2E80001,0xA2E80001,0x94F40000,0x55F800A2,0x55F800A2,0x94BC0000,0x8A0000A4,0x55F800A2,0x55F800A2,0x94BC0000,0x8A0000A4,0x8A0000A4,0xF7000020,0xFF0C003A,0x11800A2,0xF4E40001,0xC6E40000,0xB2E40000,0xAAE80000,0xA2DC0001,0xF8EC0029,0xF4CC0000,0x27FC00A2,0x94BC0000, -0x27FC00A2,0x13C00CA,0xCF2C0001,0xB3280001,0xA9280001,0x1D800C8,0xB9080001,0xA9140001,0x71F800C8,0xA8D80000,0x9E0000C8,0x1D800C8,0xB9080001,0xA9140001,0x71F800C8,0xA8D80000,0x9E0000C8,0x71F800C8,0xA8D80000,0x9E0000C8,0x9E0000C8,0x1D800C8,0xB9080001,0xA9140001,0x71F800C8,0xA8D80000,0x9E0000C8,0x71F800C8,0xA8D80000,0x9E0000C8,0x9E0000C8,0x71F800C8, -0xA8D80000,0x9E0000C8,0x9E0000C8,0x9E0000C8,0xFF14004A,0x15400C8,0xF5380062,0xFCE00000,0xCEDC0001,0xB8D80000,0xA8F00000,0xA8B80000,0xF7080048,0xF4CC0000,0xAAF80001,0x9E0000C8,0x4BFC00C8,0xF000CA,0xF000CA,0xF000CA,0xF000CA,0xF000CA,0xF000CA,0xF000CA,0xF000CA,0xF000CA,0xF000CA,0xA8E00001,0xA8E00001,0xA8E00001,0xA8E00001,0xA8E00001, -0xA8E00001,0x8CDC0001,0x8CDC0001,0x8CDC0001,0x82DC0001,0x16800C8,0x16800C8,0x16800C8,0x16800C8,0x16800C8,0x16800C8,0x92BC0001,0x92BC0001,0x92BC0001,0x82C80001,0x39F800C8,0x39F800C8,0x39F800C8,0x828C0000,0x780000C8,0xFCE8003D,0xF000CA,0xF000CA,0xF4E00001,0xCCE00001,0xB6E00001,0xB6E00001,0x9EE00001,0xFCD40020,0xF2CC0000,0x96D80000,0x92BC0001, -0x3FC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table82[] = { -0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x35FC0000, -0x35FC0000,0x35FC0000,0x35FC0000,0x76000000,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0x6FC0000,0x6FC0000,0x6FC0000,0x1600000,0x1F80000,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000, -0x19C0000,0x53FC0000,0x53FC0000,0x53FC0000,0x8A000000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x53FC0000,0x53FC0000,0x53FC0000,0x8A000000,0x53FC0000,0x53FC0000,0x53FC0000,0x8A000000,0x8A000000,0x1280000,0x1140001,0x1140001,0x1440000,0x15C0000,0x3780000,0x3780000,0x1D40000,0x1440000,0x15C0000,0x25FC0000,0x53FC0000, -0x25FC0000,0x1380001,0x1380001,0x1380001,0x1380001,0x3D00000,0x3D00000,0x3D00000,0x6DFC0000,0x6DFC0000,0x9C000000,0x3D00000,0x3D00000,0x3D00000,0x6DFC0000,0x6DFC0000,0x9C000000,0x6DFC0000,0x6DFC0000,0x9C000000,0x9C000000,0x3D00000,0x3D00000,0x3D00000,0x6DFC0000,0x6DFC0000,0x9C000000,0x6DFC0000,0x6DFC0000,0x9C000000,0x9C000000,0x6DFC0000, -0x6DFC0000,0x9C000000,0x9C000000,0x9C000000,0x16C0000,0xF4C0000,0x1380001,0x1AC0000,0xFFC0000,0x47FC0000,0x5BFC0000,0x7BF40000,0x3880000,0x3D00000,0x47FC0000,0x9C000000,0x47FC0000,0x1600001,0x13FC0000,0x8BFC0000,0xB0000000,0x13FC0000,0x8BFC0000,0xB0000000,0x8BFC0000,0xB0000000,0xB0000000,0x13FC0000,0x8BFC0000,0xB0000000,0x8BFC0000,0xB0000000, -0xB0000000,0x8BFC0000,0xB0000000,0xB0000000,0xB0000000,0x13FC0000,0x8BFC0000,0xB0000000,0x8BFC0000,0xB0000000,0xB0000000,0x8BFC0000,0xB0000000,0xB0000000,0xB0000000,0x8BFC0000,0xB0000000,0xB0000000,0xB0000000,0xB0000000,0x1BC0000,0x5780000,0x5780000,0x41FC0000,0x7DF80000,0x9DFC0000,0xB0000000,0xB0000000,0x1E00000,0x59FC0000,0xADCC0000,0xB0000000, -0x6DFC0000,0x1280734,0xFD1402D4,0xC31402D4,0xB11402D3,0xF3040192,0xCB0400D7,0xB508013B,0xB9040192,0xAF0000D7,0xA7000192,0xEAF002D4,0xCCF000A3,0xB4F800FD,0xBCEC00CF,0xB0F00002,0xA6F400D5,0xB0EC02D4,0xAAE400FF,0xA2EC013A,0x9CEC02D5,0x1B80734,0xD4CC02D3,0xB0F002D4,0xC8BC0192,0xB0D400D1,0xA6DC0190,0xC2A402D3,0xB0B400A2,0xA6B800D5,0x9CC802D4,0x61F80734, -0xB07402D3,0xA6640190,0x9C5002D4,0x92000738,0xFF0C0252,0xF9200552,0xFB2405C3,0xFEF00006,0xD6EC0003,0xBEF00002,0xB0F40011,0xAEE80012,0xFEF8021A,0xFCDC0000,0xB2E400A9,0xA6B800D5,0x37FC0734,0x13C02D3,0xEB2800A2,0xBF2800A2,0xB12800A2,0xDD1800C9,0xC1180002,0xB3180015,0xB51400C9,0xAF100026,0xA71400C9,0x1D402D3,0xCCF000A2,0xB10C00A2,0xC0E400C9,0xB0F00001, -0xA6FC00C8,0x6FFC02D3,0xB0B000A2,0xA6A400C8,0x9C0002D4,0x1D402D3,0xCCF000A2,0xB10C00A2,0xC0E400C9,0xB0F00001,0xA6FC00C8,0x6FFC02D3,0xB0B000A2,0xA6A400C8,0x9C0002D4,0x6FFC02D3,0xB0B000A2,0xA6A400C8,0x9C0002D4,0x9C0002D4,0xFD1C00F5,0xFF2C0209,0xF5380203,0xFCF40005,0xD6EC0002,0xBEF00001,0xB0FC0002,0xAEE00006,0xFF0800F4,0xFCDC0000,0xB4DC00A2,0xA6A400C8, -0x49FC02D3,0x11402D3,0x11402D3,0x11402D3,0x11402D3,0xD30400CA,0xD30400CA,0xD30400CA,0xAB0000CA,0xAB0000CA,0x9D0000CA,0xCCF000A3,0xCCF000A3,0xCCF000A3,0xAEF00002,0xAEF00002,0x9EF40026,0xA0F000A3,0xA0F000A3,0x9AEC0015,0x92EC00A5,0x39802D3,0x39802D3,0x39802D3,0xB4D000C9,0xB4D000C9,0x9CE800C8,0xAEB800A2,0xAEB800A2,0x9CC80001,0x92D400A4,0x51FC02D3, -0x51FC02D3,0x9C9000C8,0x927C00A4,0x8A0002D4,0xFF0400CB,0xFF0C01DB,0x11402D3,0xFEF00002,0xD2F00002,0xBCF00002,0xB6F00005,0xAAEC0002,0xFCF000B5,0xFCDC0000,0xB0E800A3,0x9CC80001,0x23FC02D3,0x12800A2,0x12800A2,0x12800A2,0x12800A2,0xBF180001,0xBF180001,0xBF180001,0xA5180001,0xA5180001,0x9D140001,0x1B800A2,0x1B800A2,0x1B800A2,0xAAF80001,0xAAF80001, -0x9D040000,0x61F800A2,0x61F800A2,0x9CCC0000,0x920000A4,0x1B800A2,0x1B800A2,0x1B800A2,0xAAF80001,0xAAF80001,0x9D040000,0x61F800A2,0x61F800A2,0x9CCC0000,0x920000A4,0x61F800A2,0x61F800A2,0x9CCC0000,0x920000A4,0x920000A4,0xFF100020,0xF920003D,0x12800A2,0xFCF40001,0xCEF40000,0xBAF40000,0xB2F80000,0xAAEC0001,0xFEF8002D,0xFCDC0000,0x37FC00A2,0x9CCC0000, -0x37FC00A2,0x14C00CA,0xD73C0001,0xBB380001,0xB1380001,0x1F000C8,0xC1180001,0xB1240001,0x7DF800C8,0xB0E80000,0xA60000C8,0x1F000C8,0xC1180001,0xB1240001,0x7DF800C8,0xB0E80000,0xA60000C8,0x7DF800C8,0xB0E80000,0xA60000C8,0xA60000C8,0x1F000C8,0xC1180001,0xB1240001,0x7DF800C8,0xB0E80000,0xA60000C8,0x7DF800C8,0xB0E80000,0xA60000C8,0xA60000C8,0x7DF800C8, -0xB0E80000,0xA60000C8,0xA60000C8,0xA60000C8,0xFB2C0055,0x16400C8,0xFD480062,0xFEF40001,0xD6EC0001,0xC0E80000,0xB1000000,0xB0C80000,0xFF180048,0xFCDC0000,0xB3080001,0xA60000C8,0x5BFC00C8,0x10000CA,0x10000CA,0x10000CA,0x10000CA,0x10000CA,0x10000CA,0x10000CA,0x10000CA,0x10000CA,0x10000CA,0xB0F00001,0xB0F00001,0xB0F00001,0xB0F00001,0xB0F00001, -0xB0F00001,0x94EC0001,0x94EC0001,0x94EC0001,0x8AEC0001,0x18000C8,0x18000C8,0x18000C8,0x18000C8,0x18000C8,0x18000C8,0x9ACC0001,0x9ACC0001,0x9ACC0001,0x8AD80001,0x45F800C8,0x45F800C8,0x45F800C8,0x8A9C0000,0x800000C8,0xF4F8004A,0x10000CA,0x10000CA,0xFCF00001,0xD4F00001,0xBEF00001,0xBEF00001,0xA6F00001,0xF8E80029,0xFADC0000,0x9EE80000,0x9ACC0001, -0x13FC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table83[] = { -0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x41FC0000, -0x41FC0000,0x41FC0000,0x41FC0000,0x7E000000,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xF0C0000,0xF0C0000,0xF0C0000,0x1780000,0xFFC0000,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000, -0x1B40000,0x5FF80000,0x5FF80000,0x5FF80000,0x92000000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x5FF80000,0x5FF80000,0x5FF80000,0x92000000,0x5FF80000,0x5FF80000,0x5FF80000,0x92000000,0x92000000,0x5380000,0x1240001,0x1240001,0x3540000,0x1700000,0x1900000,0x1900000,0x3EC0000,0x3540000,0x1700000,0x35FC0000,0x5FF80000, -0x35FC0000,0x1480001,0x1480001,0x1480001,0x1480001,0x3E80000,0x3E80000,0x3E80000,0x79FC0000,0x79FC0000,0xA4000000,0x3E80000,0x3E80000,0x3E80000,0x79FC0000,0x79FC0000,0xA4000000,0x79FC0000,0x79FC0000,0xA4000000,0xA4000000,0x3E80000,0x3E80000,0x3E80000,0x79FC0000,0x79FC0000,0xA4000000,0x79FC0000,0x79FC0000,0xA4000000,0xA4000000,0x79FC0000, -0x79FC0000,0xA4000000,0xA4000000,0xA4000000,0x1800000,0x1600000,0x1480001,0x1C00000,0x23FC0000,0x57FC0000,0x69F80000,0x85F80000,0x39C0000,0x3E80000,0x57FC0000,0xA4000000,0x57FC0000,0x1700001,0x2BFC0000,0x97FC0000,0xB8000000,0x2BFC0000,0x97FC0000,0xB8000000,0x97FC0000,0xB8000000,0xB8000000,0x2BFC0000,0x97FC0000,0xB8000000,0x97FC0000,0xB8000000, -0xB8000000,0x97FC0000,0xB8000000,0xB8000000,0xB8000000,0x2BFC0000,0x97FC0000,0xB8000000,0x97FC0000,0xB8000000,0xB8000000,0x97FC0000,0xB8000000,0xB8000000,0xB8000000,0x97FC0000,0xB8000000,0xB8000000,0xB8000000,0xB8000000,0x1D00000,0xD880000,0xD880000,0x53FC0000,0x89FC0000,0xA7FC0000,0xB8000000,0xB8000000,0x1F80000,0x6BFC0000,0xB5DC0000,0xB8000000, -0x7DF80000,0x1380734,0xFF2402DC,0xCB2402D4,0xB92402D3,0xFB140192,0xD31400D7,0xBD18013B,0xC1140192,0xB71000D7,0xAF100192,0xF30002D4,0xD50000A3,0xBD0800FD,0xC4FC00CF,0xB9000002,0xAF0400D5,0xB8FC02D4,0xB2F400FF,0xAAFC013A,0xA4FC02D5,0x1D00734,0xDCDC02D3,0xB90002D4,0xD0CC0192,0xB8E400D1,0xAEEC0190,0xCAB402D3,0xB8C400A2,0xAEC800D5,0xA4D802D4,0x6DF80734, -0xB88402D3,0xAE740190,0xA46002D4,0x9A000738,0xFF20028E,0xFF2C056A,0xF33405F4,0xFF04001E,0xDEFC0003,0xC7000002,0xB9040011,0xB6F80012,0xFF10025E,0xFEF00006,0xBAF400A9,0xAEC800D5,0x45FC0734,0x14C02D3,0xF33800A2,0xC73800A2,0xB93800A2,0xE52800C9,0xC9280002,0xBB280015,0xBD2400C9,0xB7200026,0xAF2400C9,0x1EC02D3,0xD50000A2,0xB91C00A2,0xC8F400C9,0xB9000001, -0xAF0C00C8,0x7BFC02D3,0xB8C000A2,0xAEB400C8,0xA40002D4,0x1EC02D3,0xD50000A2,0xB91C00A2,0xC8F400C9,0xB9000001,0xAF0C00C8,0x7BFC02D3,0xB8C000A2,0xAEB400C8,0xA40002D4,0x7BFC02D3,0xB8C000A2,0xAEB400C8,0xA40002D4,0xA40002D4,0xFD30010A,0xFB44020B,0xFD480203,0xFF08000D,0xDEFC0002,0xC7000001,0xB90C0002,0xB6F00006,0xFF180119,0xFEF00005,0xBCEC00A2,0xAEB400C8, -0x59FC02D3,0x12402D3,0x12402D3,0x12402D3,0x12402D3,0xDB1400CA,0xDB1400CA,0xDB1400CA,0xB31000CA,0xB31000CA,0xA51000CA,0xD50000A3,0xD50000A3,0xD50000A3,0xB7000002,0xB7000002,0xA7040026,0xA90000A3,0xA90000A3,0xA2FC0015,0x9AFC00A5,0x3B002D3,0x3B002D3,0x3B002D3,0xBCE000C9,0xBCE000C9,0xA4F800C8,0xB6C800A2,0xB6C800A2,0xA4D80001,0x9AE400A4,0x5DFC02D3, -0x5DFC02D3,0xA4A000C8,0x9A8C00A4,0x920002D4,0xFF1000F1,0xF92001EE,0x12402D3,0xFF00000B,0xDB000002,0xC5000002,0xBF000005,0xB2FC0002,0xFF0000CB,0xFCF00002,0xB8F800A3,0xA4D80001,0x33FC02D3,0x13800A2,0x13800A2,0x13800A2,0x13800A2,0xC7280001,0xC7280001,0xC7280001,0xAD280001,0xAD280001,0xA5240001,0x1D000A2,0x1D000A2,0x1D000A2,0xB3080001,0xB3080001, -0xA5140000,0x6DF800A2,0x6DF800A2,0xA4DC0000,0x9A0000A4,0x1D000A2,0x1D000A2,0x1D000A2,0xB3080001,0xB3080001,0xA5140000,0x6DF800A2,0x6DF800A2,0xA4DC0000,0x9A0000A4,0x6DF800A2,0x6DF800A2,0xA4DC0000,0x9A0000A4,0x9A0000A4,0xFF200029,0xFF2C0049,0x13800A2,0xFD040004,0xD7040000,0xC3040000,0xBB080000,0xB2FC0001,0xFD100032,0xFCF00001,0x45FC00A2,0xA4DC0000, -0x45FC00A2,0x15C00CA,0xDF4C0001,0xC3480001,0xB9480001,0xDFC00C8,0xC9280001,0xB9340001,0x89F800C8,0xB8F80000,0xAE0000C8,0xDFC00C8,0xC9280001,0xB9340001,0x89F800C8,0xB8F80000,0xAE0000C8,0x89F800C8,0xB8F80000,0xAE0000C8,0xAE0000C8,0xDFC00C8,0xC9280001,0xB9340001,0x89F800C8,0xB8F80000,0xAE0000C8,0x89F800C8,0xB8F80000,0xAE0000C8,0xAE0000C8,0x89F800C8, -0xB8F80000,0xAE0000C8,0xAE0000C8,0xAE0000C8,0xF7400062,0x37400C8,0xF5580071,0xFD100005,0xDEFC0001,0xC8F80000,0xB9100000,0xB8D80000,0xFF2C0055,0xF8FC0002,0xBB180001,0xAE0000C8,0x69FC00C8,0x11000CA,0x11000CA,0x11000CA,0x11000CA,0x11000CA,0x11000CA,0x11000CA,0x11000CA,0x11000CA,0x11000CA,0xB9000001,0xB9000001,0xB9000001,0xB9000001,0xB9000001, -0xB9000001,0x9CFC0001,0x9CFC0001,0x9CFC0001,0x92FC0001,0x19800C8,0x19800C8,0x19800C8,0x19800C8,0x19800C8,0x19800C8,0xA2DC0001,0xA2DC0001,0xA2DC0001,0x92E80001,0x51F800C8,0x51F800C8,0x51F800C8,0x92AC0000,0x880000C8,0xFD08004A,0x11000CA,0x11000CA,0xFD000002,0xDD000001,0xC7000001,0xC7000001,0xAF000001,0xFEF4002D,0xFAF00001,0xA6F80000,0xA2DC0001, -0x21FC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table84[] = { -0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x4FF80000, -0x4FF80000,0x4FF80000,0x4FF80000,0x86000001,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x9200000,0x9200000,0x9200000,0x1940000,0x1FFC0000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000, -0x1D00000,0x6DF80000,0x6DF80000,0x6DF80000,0x9A000001,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x6DF80000,0x6DF80000,0x6DF80000,0x9A000001,0x6DF80000,0x6DF80000,0x6DF80000,0x9A000001,0x9A000001,0x14C0000,0x1380000,0x1380000,0x7680000,0x1880000,0x1A80000,0x1A80000,0xDFC0000,0x7680000,0x1880000,0x45FC0000,0x6DF80000, -0x45FC0000,0x15C0000,0x15C0000,0x15C0000,0x15C0000,0x9FC0000,0x9FC0000,0x9FC0000,0x87FC0000,0x87FC0000,0xAC000001,0x9FC0000,0x9FC0000,0x9FC0000,0x87FC0000,0x87FC0000,0xAC000001,0x87FC0000,0x87FC0000,0xAC000001,0xAC000001,0x9FC0000,0x9FC0000,0x9FC0000,0x87FC0000,0x87FC0000,0xAC000001,0x87FC0000,0x87FC0000,0xAC000001,0xAC000001,0x87FC0000, -0x87FC0000,0xAC000001,0xAC000001,0xAC000001,0x1940000,0x1740000,0x15C0000,0x1D80000,0x39FC0000,0x67FC0000,0x77FC0000,0x91FC0000,0x1B40000,0x9FC0000,0x67FC0000,0xAC000001,0x67FC0000,0x1840000,0x47FC0000,0xA5F80000,0xC0000001,0x47FC0000,0xA5F80000,0xC0000001,0xA5F80000,0xC0000001,0xC0000001,0x47FC0000,0xA5F80000,0xC0000001,0xA5F80000,0xC0000001, -0xC0000001,0xA5F80000,0xC0000001,0xC0000001,0xC0000001,0x47FC0000,0xA5F80000,0xC0000001,0xA5F80000,0xC0000001,0xC0000001,0xA5F80000,0xC0000001,0xC0000001,0xC0000001,0xA5F80000,0xC0000001,0xC0000001,0xC0000001,0xC0000001,0x5E40000,0x79C0000,0x79C0000,0x6BFC0000,0x99FC0000,0xB3F80000,0xC0000001,0xC0000001,0x1BFC0000,0x7DFC0000,0xBFD00000,0xC0000001, -0x8DFC0000,0x1480738,0xFD3802F8,0xD53802D4,0xC13802D5,0xFF240198,0xDD2400D5,0xC32C013A,0xCB240190,0xBF2400D5,0xB7240192,0xFF1002D3,0xDF1000A2,0xC71C00FF,0xCF1000D1,0xC1100002,0xB71400D7,0xC11002D4,0xBD0800FD,0xB508013B,0xAF1002D3,0x3E80734,0xE6EC02D3,0xC11002D4,0xD8E00192,0xC2F800CF,0xB7000192,0xD2C802D3,0xC0D800A3,0xB6D800D7,0xAEE802D4,0x79FC0734, -0xC09802D4,0xB68C0192,0xAE7002D3,0xA4000734,0xFF3402E1,0xFB440590,0xFD4805F4,0xFF18004A,0xE9100001,0xCF100002,0xC3140012,0xBF100011,0xFF2402C7,0xFF080025,0xC50400A9,0xB6D800D7,0x57FC0734,0x15C02D4,0xFB4C00A4,0xCF4C00A4,0xC14800A5,0xF13800C8,0xD3380001,0xC33C0015,0xC53800C8,0xBF340026,0xB73800CA,0xDFC02D3,0xDD1400A2,0xC13000A3,0xD10800C9,0xC1100002, -0xB91C00CA,0x89F802D3,0xC0D800A3,0xB6C800CA,0xAE0002D3,0xDFC02D3,0xDD1400A2,0xC13000A3,0xD10800C9,0xC1100002,0xB91C00CA,0x89F802D3,0xC0D800A3,0xB6C800CA,0xAE0002D3,0x89F802D3,0xC0D800A3,0xB6C800CA,0xAE0002D3,0xAE0002D3,0xFF44013A,0xF558022A,0xF5580225,0xFF20002A,0xE9100001,0xCF100002,0xC31C0002,0xC1040005,0xFF34013A,0xFF0C0012,0xC6FC00A2,0xB6C800CA, -0x69FC02D3,0x13802D4,0x13802D4,0x13802D4,0x13802D4,0xE72400C8,0xE72400C8,0xE72400C8,0xBB2400C8,0xBB2400C8,0xAD2400C9,0xDF1000A2,0xDF1000A2,0xDF1000A2,0xC1100001,0xC1100001,0xB1140026,0xB31000A2,0xB31000A2,0xAD0C0015,0xA51000A2,0x1CC02D3,0x1CC02D3,0x1CC02D3,0xC6F000C9,0xC6F000C9,0xAF0800C9,0xC0D800A2,0xC0D800A2,0xACEC0002,0xA4F400A2,0x6BF802D3, -0x6BF802D3,0xACB400C9,0xA49800A2,0x9A0002D3,0xFD28010A,0xFF2C0209,0x13802D4,0xFF180019,0xE5100001,0xCF100001,0xC9140006,0xBB100002,0xFF1400E5,0xFF04000E,0xC30800A2,0xACEC0002,0x43FC02D3,0x14800A4,0x14800A4,0x14800A4,0x14800A4,0xD3380000,0xD3380000,0xD3380000,0xB7380000,0xB7380000,0xAD380001,0x3E800A2,0x3E800A2,0x3E800A2,0xBD180001,0xBD180001, -0xAD280001,0x79FC00A2,0x79FC00A2,0xACF00001,0xA40000A2,0x3E800A2,0x3E800A2,0x3E800A2,0xBD180001,0xBD180001,0xAD280001,0x79FC00A2,0x79FC00A2,0xACF00001,0xA40000A2,0x79FC00A2,0x79FC00A2,0xACF00001,0xA40000A2,0xA40000A2,0xFB340034,0xFB440048,0x14800A4,0xFF1C0008,0xE3140000,0xCD140000,0xC31C0001,0xBD0C0000,0xF928003D,0xF90C0005,0x57FC00A2,0xACF00001, -0x57FC00A2,0x17000C8,0xEB5C0000,0xCB5C0001,0xC15C0001,0x29FC00C8,0xD3380001,0xC1480001,0x97F800C8,0xC10C0001,0xB60000CA,0x29FC00C8,0xD3380001,0xC1480001,0x97F800C8,0xC10C0001,0xB60000CA,0x97F800C8,0xC10C0001,0xB60000CA,0xB60000CA,0x29FC00C8,0xD3380001,0xC1480001,0x97F800C8,0xC10C0001,0xB60000CA,0x97F800C8,0xC10C0001,0xB60000CA,0xB60000CA,0x97F800C8, -0xC10C0001,0xB60000CA,0xB60000CA,0xB60000CA,0xFF500064,0x18800C8,0xFF6C0071,0xFD28000D,0xE90C0001,0xD10C0000,0xC1240001,0xC0F00001,0xF9480062,0xFF100005,0xC5280000,0xB60000CA,0x7BFC00C8,0x12400C8,0x12400C8,0x12400C8,0x12400C8,0x12400C8,0x12400C8,0x12400C8,0x12400C8,0x12400C8,0x12400C8,0xC5100000,0xC5100000,0xC5100000,0xC5100000,0xC5100000, -0xC5100000,0xA5100001,0xA5100001,0xA5100001,0x9B100001,0x3B000C8,0x3B000C8,0x3B000C8,0x3B000C8,0x3B000C8,0x3B000C8,0xACEC0001,0xACEC0001,0xACEC0001,0x9AFC0001,0x5DFC00C8,0x5DFC00C8,0x5DFC00C8,0x9AC00001,0x900000CA,0xF71C0055,0x12400C8,0x12400C8,0xF9140008,0xE9100000,0xD3100000,0xD3100000,0xB9100000,0xFD0C0032,0xFF000004,0xAD0C0001,0xACEC0001, -0x33FC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table85[] = { -0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x5BF80000, -0x5BF80000,0x5BF80000,0x5BF80000,0x8E000001,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1340000,0x1340000,0x1340000,0x1AC0000,0x2FFC0000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000, -0x1E80000,0x79F80000,0x79F80000,0x79F80000,0xA2000001,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x79F80000,0x79F80000,0x79F80000,0xA2000001,0x79F80000,0x79F80000,0x79F80000,0xA2000001,0xA2000001,0x75C0000,0x1480000,0x1480000,0x37C0000,0x19C0000,0x3BC0000,0x3BC0000,0x21FC0000,0x37C0000,0x19C0000,0x55FC0000,0x79F80000, -0x55FC0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x23FC0000,0x23FC0000,0x23FC0000,0x93FC0000,0x93FC0000,0xB4000001,0x23FC0000,0x23FC0000,0x23FC0000,0x93FC0000,0x93FC0000,0xB4000001,0x93FC0000,0x93FC0000,0xB4000001,0xB4000001,0x23FC0000,0x23FC0000,0x23FC0000,0x93FC0000,0x93FC0000,0xB4000001,0x93FC0000,0x93FC0000,0xB4000001,0xB4000001,0x93FC0000, -0x93FC0000,0xB4000001,0xB4000001,0xB4000001,0x1A80000,0x1840000,0x16C0000,0x1F00000,0x4DFC0000,0x77FC0000,0x85FC0000,0x9DF40000,0x1C80000,0x23FC0000,0x77FC0000,0xB4000001,0x77FC0000,0x1940000,0x5FFC0000,0xB1F80000,0xC8000001,0x5FFC0000,0xB1F80000,0xC8000001,0xB1F80000,0xC8000001,0xC8000001,0x5FFC0000,0xB1F80000,0xC8000001,0xB1F80000,0xC8000001, -0xC8000001,0xB1F80000,0xC8000001,0xC8000001,0xC8000001,0x5FFC0000,0xB1F80000,0xC8000001,0xB1F80000,0xC8000001,0xC8000001,0xB1F80000,0xC8000001,0xC8000001,0xC8000001,0xB1F80000,0xC8000001,0xC8000001,0xC8000001,0xC8000001,0x5F80000,0xFAC0000,0xFAC0000,0x7DFC0000,0xA7F80000,0xBDF80000,0xC8000001,0xC8000001,0x39FC0000,0x8FFC0000,0xC7E00000,0xC8000001, -0x9DF80000,0x1580738,0xFF480314,0xDD4802D4,0xC94802D5,0xFD3801B8,0xE53400D5,0xCB3C013A,0xD3340190,0xC73400D5,0xBF340192,0xFF2402D8,0xE72000A2,0xCF2C00FF,0xD72000D1,0xC9200002,0xBF2400D7,0xC92002D4,0xC51800FD,0xBD18013B,0xB72002D3,0x7FC0734,0xEEFC02D3,0xC92002D4,0xE0F00192,0xCB0800CF,0xBF100192,0xDAD802D3,0xC8E800A3,0xBEE800D7,0xB6F802D4,0x85FC0734, -0xC8A802D4,0xBE9C0192,0xB68002D3,0xAC000734,0xFF44033E,0xFF4C05D0,0xF5580625,0xFF2C0086,0xF1200001,0xD7200002,0xCB240012,0xC7200011,0xFF3402FA,0xFF1C0054,0xCD1400A9,0xBEE800D7,0x65FC0734,0x16C02D4,0xFF5C00A5,0xD75C00A4,0xC95800A5,0xF94800C8,0xDB480001,0xCB4C0015,0xCD4800C8,0xC7440026,0xBF4800CA,0x25FC02D3,0xE52400A2,0xC94000A3,0xD91800C9,0xC9200002, -0xC12C00CA,0x95F802D3,0xC8E800A3,0xBED800CA,0xB60002D3,0x25FC02D3,0xE52400A2,0xC94000A3,0xD91800C9,0xC9200002,0xC12C00CA,0x95F802D3,0xC8E800A3,0xBED800CA,0xB60002D3,0x95F802D3,0xC8E800A3,0xBED800CA,0xB60002D3,0xB60002D3,0xFF500155,0xFD68022A,0xFD680225,0xFF340042,0xF1200001,0xD7200002,0xCB2C0002,0xC9140005,0xFF44015B,0xFF240029,0xCF0C00A2,0xBED800CA, -0x79FC02D3,0x14802D4,0x14802D4,0x14802D4,0x14802D4,0xEF3400C8,0xEF3400C8,0xEF3400C8,0xC33400C8,0xC33400C8,0xB53400C9,0xE72000A2,0xE72000A2,0xE72000A2,0xC9200001,0xC9200001,0xB9240026,0xBB2000A2,0xBB2000A2,0xB51C0015,0xAD2000A2,0x1E402D3,0x1E402D3,0x1E402D3,0xCF0000C9,0xCF0000C9,0xB71800C9,0xC8E800A2,0xC8E800A2,0xB4FC0002,0xAD0400A2,0x77F802D3, -0x77F802D3,0xB4C400C9,0xACA800A2,0xA20002D3,0xFD380124,0xFB44020C,0x14802D4,0xFD280035,0xED200001,0xD7200001,0xD1240006,0xC3200002,0xFB2C0109,0xFF18001A,0xCB1800A2,0xB4FC0002,0x53FC02D3,0x15800A4,0x15800A4,0x15800A4,0x15800A4,0xDB480000,0xDB480000,0xDB480000,0xBF480000,0xBF480000,0xB5480001,0x7FC00A2,0x7FC00A2,0x7FC00A2,0xC5280001,0xC5280001, -0xB5380001,0x85FC00A2,0x85FC00A2,0xB5000001,0xAC0000A2,0x7FC00A2,0x7FC00A2,0x7FC00A2,0xC5280001,0xC5280001,0xB5380001,0x85FC00A2,0x85FC00A2,0xB5000001,0xAC0000A2,0x85FC00A2,0x85FC00A2,0xB5000001,0xAC0000A2,0xAC0000A2,0xF748003D,0xF3540055,0x15800A4,0xFD30000D,0xEB240000,0xD5240000,0xCB2C0001,0xC51C0000,0xFF340041,0xFD200008,0x65FC00A2,0xB5000001, -0x65FC00A2,0x18000C8,0xF36C0000,0xD36C0001,0xC96C0001,0x41FC00C8,0xDB480001,0xC9580001,0xA1FC00C8,0xC91C0001,0xBE0000CA,0x41FC00C8,0xDB480001,0xC9580001,0xA1FC00C8,0xC91C0001,0xBE0000CA,0xA1FC00C8,0xC91C0001,0xBE0000CA,0xBE0000CA,0x41FC00C8,0xDB480001,0xC9580001,0xA1FC00C8,0xC91C0001,0xBE0000CA,0xA1FC00C8,0xC91C0001,0xBE0000CA,0xBE0000CA,0xA1FC00C8, -0xC91C0001,0xBE0000CA,0xBE0000CA,0xBE0000CA,0xF7680071,0x59800C8,0xF77C0080,0xFF400012,0xF11C0001,0xD91C0000,0xC9340001,0xC9000001,0xFD580064,0xFF2C000D,0xCD380000,0xBE0000CA,0x89FC00C8,0x13400C8,0x13400C8,0x13400C8,0x13400C8,0x13400C8,0x13400C8,0x13400C8,0x13400C8,0x13400C8,0x13400C8,0xCD200000,0xCD200000,0xCD200000,0xCD200000,0xCD200000, -0xCD200000,0xAD200001,0xAD200001,0xAD200001,0xA3200001,0x1C800C8,0x1C800C8,0x1C800C8,0x1C800C8,0x1C800C8,0x1C800C8,0xB4FC0001,0xB4FC0001,0xB4FC0001,0xA30C0001,0x69FC00C8,0x69FC00C8,0x69FC00C8,0xA2D00001,0x980000CA,0xFF2C0055,0x13400C8,0x13400C8,0xFB24000D,0xF1200000,0xDB200000,0xDB200000,0xC1200000,0xFD1C003D,0xFF140005,0xB51C0001,0xB4FC0001, -0x41FC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table86[] = { -0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x67F80000, -0x67F80000,0x67F80000,0x67F80000,0x96000001,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1440000,0x1440000,0x1440000,0x1C40000,0x3FF80000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000, -0x3FC0000,0x85F80000,0x85F80000,0x85F80000,0xAA000001,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x85F80000,0x85F80000,0x85F80000,0xAA000001,0x85F80000,0x85F80000,0x85F80000,0xAA000001,0xAA000001,0xF6C0000,0x1580000,0x1580000,0x1900000,0x1B00000,0x1D40000,0x1D40000,0x33FC0000,0x1900000,0x1B00000,0x63FC0000,0x85F80000, -0x63FC0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x9FF80000,0x9FF80000,0xBC000001,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x9FF80000,0x9FF80000,0xBC000001,0x9FF80000,0x9FF80000,0xBC000001,0xBC000001,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x9FF80000,0x9FF80000,0xBC000001,0x9FF80000,0x9FF80000,0xBC000001,0xBC000001,0x9FF80000, -0x9FF80000,0xBC000001,0xBC000001,0xBC000001,0x5B80000,0x3940000,0x17C0000,0xDFC0000,0x61FC0000,0x85FC0000,0x93F80000,0xA7F80000,0x1DC0000,0x3BFC0000,0x85FC0000,0xBC000001,0x85FC0000,0x1A40000,0x77FC0000,0xBDF80000,0xD0000001,0x77FC0000,0xBDF80000,0xD0000001,0xBDF80000,0xD0000001,0xD0000001,0x77FC0000,0xBDF80000,0xD0000001,0xBDF80000,0xD0000001, -0xD0000001,0xBDF80000,0xD0000001,0xD0000001,0xD0000001,0x77FC0000,0xBDF80000,0xD0000001,0xBDF80000,0xD0000001,0xD0000001,0xBDF80000,0xD0000001,0xD0000001,0xD0000001,0xBDF80000,0xD0000001,0xD0000001,0xD0000001,0xD0000001,0x23FC0000,0x1C00000,0x1C00000,0x91FC0000,0xB5F80000,0xC7F80000,0xD0000001,0xD0000001,0x57FC0000,0xA1FC0000,0xCFF00000,0xD0000001, -0xABFC0000,0x1680738,0xFF5C0339,0xE55802D4,0xD15802D5,0xFF4801E0,0xED4400D5,0xD34C013A,0xDB440190,0xCF4400D5,0xC7440192,0xFD3802F8,0xEF3000A2,0xD73C00FF,0xDF3000D1,0xD1300002,0xC73400D7,0xD13002D4,0xCD2800FD,0xC528013B,0xBF3002D3,0x1FFC0734,0xF70C02D3,0xD13002D4,0xE9000192,0xD31800CF,0xC7200192,0xE2E802D3,0xD0F800A3,0xC6F800D7,0xBF0802D4,0x91FC0734, -0xD0B802D4,0xC6AC0192,0xBE9002D3,0xB4000734,0xFF580386,0xFB6405D2,0xFD680625,0xFF4000D3,0xF9300001,0xDF300002,0xD3340012,0xCF300011,0xFD4C036A,0xFF300099,0xD52400A9,0xC6F800D7,0x75FC0734,0x17C02D4,0xFF6C00B4,0xDF6C00A4,0xD16800A5,0xFD5800CA,0xE3580001,0xD35C0015,0xD55800C8,0xCF540026,0xC75800CA,0x3DFC02D3,0xED3400A2,0xD15000A3,0xE12800C9,0xD1300002, -0xC93C00CA,0xA1F802D3,0xD0F800A3,0xC6E800CA,0xBE0002D3,0x3DFC02D3,0xED3400A2,0xD15000A3,0xE12800C9,0xD1300002,0xC93C00CA,0xA1F802D3,0xD0F800A3,0xC6E800CA,0xBE0002D3,0xA1F802D3,0xD0F800A3,0xC6E800CA,0xBE0002D3,0xBE0002D3,0xFF64017A,0xF5780248,0xF77C0244,0xFF48006E,0xF9300001,0xDF300002,0xD33C0002,0xD1240005,0xFF5C016D,0xFD3C004B,0xD71C00A2,0xC6E800CA, -0x87FC02D3,0x15802D4,0x15802D4,0x15802D4,0x15802D4,0xF74400C8,0xF74400C8,0xF74400C8,0xCB4400C8,0xCB4400C8,0xBD4400C9,0xEF3000A2,0xEF3000A2,0xEF3000A2,0xD1300001,0xD1300001,0xC1340026,0xC33000A2,0xC33000A2,0xBD2C0015,0xB53000A2,0x1FC02D3,0x1FC02D3,0x1FC02D3,0xD71000C9,0xD71000C9,0xBF2800C9,0xD0F800A2,0xD0F800A2,0xBD0C0002,0xB51400A2,0x83F802D3, -0x83F802D3,0xBCD400C9,0xB4B800A2,0xAA0002D3,0xFD48013D,0xF3540229,0x15802D4,0xFF3C0048,0xF5300001,0xDF300001,0xD9340006,0xCB300002,0xFF3C0120,0xFD2C0035,0xD32800A2,0xBD0C0002,0x61FC02D3,0x16800A4,0x16800A4,0x16800A4,0x16800A4,0xE3580000,0xE3580000,0xE3580000,0xC7580000,0xC7580000,0xBD580001,0x1FFC00A2,0x1FFC00A2,0x1FFC00A2,0xCD380001,0xCD380001, -0xBD480001,0x91FC00A2,0x91FC00A2,0xBD100001,0xB40000A2,0x1FFC00A2,0x1FFC00A2,0x1FFC00A2,0xCD380001,0xCD380001,0xBD480001,0x91FC00A2,0x91FC00A2,0xBD100001,0xB40000A2,0x91FC00A2,0x91FC00A2,0xBD100001,0xB40000A2,0xB40000A2,0xFF58003D,0xFB640055,0x16800A4,0xFF440012,0xF3340000,0xDD340000,0xD33C0001,0xCD2C0000,0xFD4C0048,0xFF300011,0x75FC00A2,0xBD100001, -0x75FC00A2,0x19000C8,0xFB7C0000,0xDB7C0001,0xD17C0001,0x59FC00C8,0xE3580001,0xD1680001,0xADFC00C8,0xD12C0001,0xC60000CA,0x59FC00C8,0xE3580001,0xD1680001,0xADFC00C8,0xD12C0001,0xC60000CA,0xADFC00C8,0xD12C0001,0xC60000CA,0xC60000CA,0x59FC00C8,0xE3580001,0xD1680001,0xADFC00C8,0xD12C0001,0xC60000CA,0xADFC00C8,0xD12C0001,0xC60000CA,0xC60000CA,0xADFC00C8, -0xD12C0001,0xC60000CA,0xC60000CA,0xC60000CA,0xFF780071,0xDA800C8,0xFF8C0080,0xFF580020,0xF92C0001,0xE12C0000,0xD1440001,0xD1100001,0xFF700071,0xFD480019,0xD5480000,0xC60000CA,0x99FC00C8,0x14400C8,0x14400C8,0x14400C8,0x14400C8,0x14400C8,0x14400C8,0x14400C8,0x14400C8,0x14400C8,0x14400C8,0xD5300000,0xD5300000,0xD5300000,0xD5300000,0xD5300000, -0xD5300000,0xB5300001,0xB5300001,0xB5300001,0xAB300001,0x1E000C8,0x1E000C8,0x1E000C8,0x1E000C8,0x1E000C8,0x1E000C8,0xBD0C0001,0xBD0C0001,0xBD0C0001,0xAB1C0001,0x75FC00C8,0x75FC00C8,0x75FC00C8,0xAAE00001,0xA00000CA,0xF73C0064,0x14400C8,0x14400C8,0xFB340014,0xF9300000,0xE3300000,0xE3300000,0xC9300000,0xF9300048,0xF928000D,0xBD2C0001,0xBD0C0001, -0x51FC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table87[] = { -0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x73F80000, -0x73F80000,0x73F80000,0x73F80000,0x9E000001,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x3540000,0x3540000,0x3540000,0x1DC0000,0x4DFC0000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000, -0x1BFC0000,0x91F80000,0x91F80000,0x91F80000,0xB2000001,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x91F80000,0x91F80000,0x91F80000,0xB2000001,0x91F80000,0x91F80000,0x91F80000,0xB2000001,0xB2000001,0x1800000,0x1680000,0x1680000,0x7A00000,0x1C40000,0x3E80000,0x3E80000,0x47FC0000,0x7A00000,0x1C40000,0x73FC0000,0x91F80000, -0x73FC0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x53FC0000,0x53FC0000,0x53FC0000,0xABF80000,0xABF80000,0xC4000001,0x53FC0000,0x53FC0000,0x53FC0000,0xABF80000,0xABF80000,0xC4000001,0xABF80000,0xABF80000,0xC4000001,0xC4000001,0x53FC0000,0x53FC0000,0x53FC0000,0xABF80000,0xABF80000,0xC4000001,0xABF80000,0xABF80000,0xC4000001,0xC4000001,0xABF80000, -0xABF80000,0xC4000001,0xC4000001,0xC4000001,0x1CC0000,0xBA40000,0x18C0000,0x2BFC0000,0x73FC0000,0x95FC0000,0x9FFC0000,0xB3F40000,0x1F00000,0x53FC0000,0x95FC0000,0xC4000001,0x95FC0000,0x1B40000,0x8FFC0000,0xC9F80000,0xD8000001,0x8FFC0000,0xC9F80000,0xD8000001,0xC9F80000,0xD8000001,0xD8000001,0x8FFC0000,0xC9F80000,0xD8000001,0xC9F80000,0xD8000001, -0xD8000001,0xC9F80000,0xD8000001,0xD8000001,0xD8000001,0x8FFC0000,0xC9F80000,0xD8000001,0xC9F80000,0xD8000001,0xD8000001,0xC9F80000,0xD8000001,0xD8000001,0xD8000001,0xC9F80000,0xD8000001,0xD8000001,0xD8000001,0xD8000001,0x4BFC0000,0x1D00000,0x1D00000,0xA5FC0000,0xC1FC0000,0xD1F80000,0xD8000001,0xD8000001,0x75FC0000,0xB1FC0000,0xD9C40000,0xD8000001, -0xBBFC0000,0x1780738,0xFF6C0378,0xED6802D4,0xD96802D5,0xFF5C0212,0xF55400D5,0xDB5C013A,0xE3540190,0xD75400D5,0xCF540192,0xFF50031B,0xF74000A2,0xDF4C00FF,0xE74000D1,0xD9400002,0xCF4400D7,0xD94002D4,0xD53800FD,0xCD38013B,0xC74002D3,0x37FC0734,0xFF1C02D3,0xD94002D4,0xF1100192,0xDB2800CF,0xCF300192,0xEAF802D3,0xD90800A3,0xCF0800D7,0xC71802D4,0x9DFC0734, -0xD8C802D4,0xCEBC0192,0xC6A002D3,0xBC000734,0xFF6403DE,0xF3740618,0xF5780658,0xFF54012A,0xFF400006,0xE7400002,0xDB440012,0xD7400011,0xFF5C03A6,0xFF4400EA,0xDD3400A9,0xCF0800D7,0x83FC0734,0x18C02D4,0xFF8000CD,0xE77C00A4,0xD97800A5,0xFF6C00D4,0xEB680001,0xDB6C0015,0xDD6800C8,0xD7640026,0xCF6800CA,0x55FC02D3,0xF54400A2,0xD96000A3,0xE93800C9,0xD9400002, -0xD14C00CA,0xADF802D3,0xD90800A3,0xCEF800CA,0xC60002D3,0x55FC02D3,0xF54400A2,0xD96000A3,0xE93800C9,0xD9400002,0xD14C00CA,0xADF802D3,0xD90800A3,0xCEF800CA,0xC60002D3,0xADF802D3,0xD90800A3,0xCEF800CA,0xC60002D3,0xC60002D3,0xFF780191,0xFD880248,0xFF8C0244,0xFF5C0096,0xFD440005,0xE7400002,0xDB4C0002,0xD9340005,0xFF7001A5,0xFF540071,0xDF2C00A2,0xCEF800CA, -0x97FC02D3,0x16802D4,0x16802D4,0x16802D4,0x16802D4,0xFF5400C8,0xFF5400C8,0xFF5400C8,0xD35400C8,0xD35400C8,0xC55400C9,0xF74000A2,0xF74000A2,0xF74000A2,0xD9400001,0xD9400001,0xC9440026,0xCB4000A2,0xCB4000A2,0xC53C0015,0xBD4000A2,0x19FC02D3,0x19FC02D3,0x19FC02D3,0xDF2000C9,0xDF2000C9,0xC73800C9,0xD90800A2,0xD90800A2,0xC51C0002,0xBD2400A2,0x8FF802D3, -0x8FF802D3,0xC4E400C9,0xBCC800A2,0xB20002D3,0xFF58015D,0xFB640229,0x16802D4,0xFF4C0065,0xFD400001,0xE7400001,0xE1440006,0xD3400002,0xFF500139,0xFF3C0054,0xDB3800A2,0xC51C0002,0x71FC02D3,0x17800A4,0x17800A4,0x17800A4,0x17800A4,0xEB680000,0xEB680000,0xEB680000,0xCF680000,0xCF680000,0xC5680001,0x37FC00A2,0x37FC00A2,0x37FC00A2,0xD5480001,0xD5480001, -0xC5580001,0x9DFC00A2,0x9DFC00A2,0xC5200001,0xBC0000A2,0x37FC00A2,0x37FC00A2,0x37FC00A2,0xD5480001,0xD5480001,0xC5580001,0x9DFC00A2,0x9DFC00A2,0xC5200001,0xBC0000A2,0x9DFC00A2,0x9DFC00A2,0xC5200001,0xBC0000A2,0xBC0000A2,0xFB6C0048,0xF3740064,0x17800A4,0xFD580019,0xFB440000,0xE5440000,0xDB4C0001,0xD53C0000,0xF5640055,0xFF480014,0x83FC00A2,0xC5200001, -0x83FC00A2,0x1A000C8,0xFF8C0001,0xE38C0001,0xD98C0001,0x71FC00C8,0xEB680001,0xD9780001,0xB9FC00C8,0xD93C0001,0xCE0000CA,0x71FC00C8,0xEB680001,0xD9780001,0xB9FC00C8,0xD93C0001,0xCE0000CA,0xB9FC00C8,0xD93C0001,0xCE0000CA,0xCE0000CA,0x71FC00C8,0xEB680001,0xD9780001,0xB9FC00C8,0xD93C0001,0xCE0000CA,0xB9FC00C8,0xD93C0001,0xCE0000CA,0xCE0000CA,0xB9FC00C8, -0xD93C0001,0xCE0000CA,0xCE0000CA,0xCE0000CA,0xFF8C0080,0x1BC00C8,0xF79C0091,0xFF6C002D,0xFB480005,0xE93C0000,0xD9540001,0xD9200001,0xF7880080,0xFF600029,0xDD580000,0xCE0000CA,0xA7FC00C8,0x15400C8,0x15400C8,0x15400C8,0x15400C8,0x15400C8,0x15400C8,0x15400C8,0x15400C8,0x15400C8,0x15400C8,0xDD400000,0xDD400000,0xDD400000,0xDD400000,0xDD400000, -0xDD400000,0xBD400001,0xBD400001,0xBD400001,0xB3400001,0x1F800C8,0x1F800C8,0x1F800C8,0x1F800C8,0x1F800C8,0x1F800C8,0xC51C0001,0xC51C0001,0xC51C0001,0xB32C0001,0x81FC00C8,0x81FC00C8,0x81FC00C8,0xB2F00001,0xA80000CA,0xFF4C0064,0x15400C8,0x15400C8,0xFD480019,0xFD400001,0xEB400000,0xEB400000,0xD1400000,0xFF3C0050,0xFD380014,0xC53C0001,0xC51C0001, -0x5FFC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table88[] = { -0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x3F40000,0x3F40000,0x3F40000,0x3F40000,0x3F40000,0x3F40000,0x3F40000,0x3F40000,0x3F40000,0x3F40000,0x7FFC0000, -0x7FFC0000,0x7FFC0000,0x7FFC0000,0xA8000000,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1500001,0x1680000,0x1680000,0x1680000,0x3F40000,0x5FF80000,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000, -0x37FC0000,0x9DFC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x9DFC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x9DFC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0xBC000000,0x1940000,0x1780001,0x1780001,0x1B80000,0x3D80000,0x9FC0000,0x9FC0000,0x5DFC0000,0x1B80000,0x3D80000,0x83FC0000,0x9DFC0000, -0x83FC0000,0x19C0001,0x19C0001,0x19C0001,0x19C0001,0x6FFC0000,0x6FFC0000,0x6FFC0000,0xB9F80000,0xB9F80000,0xCE000000,0x6FFC0000,0x6FFC0000,0x6FFC0000,0xB9F80000,0xB9F80000,0xCE000000,0xB9F80000,0xB9F80000,0xCE000000,0xCE000000,0x6FFC0000,0x6FFC0000,0x6FFC0000,0xB9F80000,0xB9F80000,0xCE000000,0xB9F80000,0xB9F80000,0xCE000000,0xCE000000,0xB9F80000, -0xB9F80000,0xCE000000,0xCE000000,0xCE000000,0x3E00000,0x5B80000,0x19C0001,0x4DFC0000,0x8BFC0000,0xA5FC0000,0xAFFC0000,0xBFF40000,0x15FC0000,0x6FFC0000,0xA5FC0000,0xCE000000,0xA5FC0000,0x1C40001,0xABFC0000,0xD7F80000,0xE2000000,0xABFC0000,0xD7F80000,0xE2000000,0xD7F80000,0xE2000000,0xE2000000,0xABFC0000,0xD7F80000,0xE2000000,0xD7F80000,0xE2000000, -0xE2000000,0xD7F80000,0xE2000000,0xE2000000,0xE2000000,0xABFC0000,0xD7F80000,0xE2000000,0xD7F80000,0xE2000000,0xE2000000,0xD7F80000,0xE2000000,0xE2000000,0xE2000000,0xD7F80000,0xE2000000,0xE2000000,0xE2000000,0xE2000000,0x77FC0000,0x1E40000,0x1E40000,0xBBFC0000,0xD1FC0000,0xDDF40000,0xE2000000,0xE2000000,0x97FC0000,0xC5FC0000,0xE1F40000,0xE2000000, -0xCBFC0000,0x18C0734,0xFF8003BF,0xF57802D4,0xE37802D3,0xFF740272,0xFD6800D7,0xE76C013B,0xEB680192,0xE16400D7,0xD9640192,0xFF680361,0xFF5400A3,0xE75C00FD,0xEF5000CF,0xE3540002,0xD95800D5,0xE35002D4,0xDD4800FF,0xD550013A,0xCF5002D5,0x53FC0734,0xFF3802E3,0xE35402D4,0xFB200192,0xE33800D1,0xD9400190,0xF50802D3,0xE31800A2,0xD91C00D5,0xCF2C02D4,0xABF80734, -0xE2D802D3,0xD8C80190,0xCEB402D4,0xC4000738,0xFF780447,0xFD880614,0xFD88065C,0xFF6C01B7,0xFF58002E,0xF1540002,0xE3580011,0xE14C0012,0xFF700413,0xFF5C016A,0xE54800A9,0xD91C00D5,0x95FC0734,0x1A002D3,0xFF9000FB,0xF18C00A2,0xE38C00A2,0xFF8000F1,0xF37C0002,0xE57C0015,0xE77800C9,0xE1740026,0xD97800C9,0x71FC02D3,0xFF5400A2,0xE37000A2,0xF34800C9,0xE3540001, -0xD96000C8,0xB9FC02D3,0xE31400A2,0xD90800C8,0xCE0002D4,0x71FC02D3,0xFF5400A2,0xE37000A2,0xF34800C9,0xE3540001,0xD96000C8,0xB9FC02D3,0xE31400A2,0xD90800C8,0xCE0002D4,0xB9FC02D3,0xE31400A2,0xD90800C8,0xCE0002D4,0xCE0002D4,0xFD9001C4,0xF79C0269,0xF79C0266,0xFF7800D1,0xFF5C001A,0xF1540001,0xE3600002,0xE1440006,0xFF8801C3,0xFF7000B5,0xE74000A2,0xD90800C8, -0xA7FC02D3,0x17802D3,0x17802D3,0x17802D3,0x17802D3,0xFF6800CE,0xFF6800CE,0xFF6800CE,0xDD6400CA,0xDD6400CA,0xCF6400CA,0xFF5400A3,0xFF5400A3,0xFF5400A3,0xE1540002,0xE1540002,0xD1580026,0xD35400A3,0xD35400A3,0xCD500015,0xC55000A5,0x35FC02D3,0x35FC02D3,0x35FC02D3,0xE73400C9,0xE73400C9,0xCF4C00C8,0xE11C00A2,0xE11C00A2,0xCF2C0001,0xC53800A4,0x9DF402D3, -0x9DF402D3,0xCEF400C8,0xC4E000A4,0xBC0002D4,0xFF680189,0xF374024B,0x17802D3,0xFF600099,0xFF540009,0xEF540002,0xE9540005,0xDD500002,0xFF64016D,0xFF540079,0xE34C00A3,0xCF2C0001,0x81FC02D3,0x18C00A2,0x18C00A2,0x18C00A2,0x18C00A2,0xF17C0001,0xF17C0001,0xF17C0001,0xD77C0001,0xD77C0001,0xCF780001,0x53FC00A2,0x53FC00A2,0x53FC00A2,0xDD5C0001,0xDD5C0001, -0xCF680000,0xABF800A2,0xABF800A2,0xCF300000,0xC40000A4,0x53FC00A2,0x53FC00A2,0x53FC00A2,0xDD5C0001,0xDD5C0001,0xCF680000,0xABF800A2,0xABF800A2,0xCF300000,0xC40000A4,0xABF800A2,0xABF800A2,0xCF300000,0xC40000A4,0xC40000A4,0xF7800055,0xFD880062,0x18C00A2,0xFB700029,0xFF580001,0xED580000,0xE55C0000,0xDD500001,0xFD740055,0xFD640020,0x95FC00A2,0xCF300000, -0x95FC00A2,0x1B000CA,0xFFA4000D,0xED9C0001,0xE39C0001,0x8DFC00C8,0xF37C0001,0xE3880001,0xC7FC00C8,0xE34C0000,0xD80000C8,0x8DFC00C8,0xF37C0001,0xE3880001,0xC7FC00C8,0xE34C0000,0xD80000C8,0xC7FC00C8,0xE34C0000,0xD80000C8,0xD80000C8,0x8DFC00C8,0xF37C0001,0xE3880001,0xC7FC00C8,0xE34C0000,0xD80000C8,0xC7FC00C8,0xE34C0000,0xD80000C8,0xD80000C8,0xC7FC00C8, -0xE34C0000,0xD80000C8,0xD80000C8,0xD80000C8,0xF7A40091,0xFCC00C8,0xFFAC0095,0xFF8C0048,0xFF64000D,0xF34C0000,0xE3640000,0xE32C0000,0xFF980082,0xFD80003D,0xE56C0001,0xD80000C8,0xB9FC00C8,0x16400CA,0x16400CA,0x16400CA,0x16400CA,0x16400CA,0x16400CA,0x16400CA,0x16400CA,0x16400CA,0x16400CA,0xE3540001,0xE3540001,0xE3540001,0xE3540001,0xE3540001, -0xE3540001,0xC7500001,0xC7500001,0xC7500001,0xBD500001,0x19FC00C8,0x19FC00C8,0x19FC00C8,0x19FC00C8,0x19FC00C8,0x19FC00C8,0xCD300001,0xCD300001,0xCD300001,0xBD3C0001,0x8FF800C8,0x8FF800C8,0x8FF800C8,0xBD000000,0xB20000C8,0xF9600071,0x16400CA,0x16400CA,0xFF580022,0xFD540005,0xF1540001,0xF1540001,0xD9540001,0xFD540055,0xFF500019,0xD14C0000,0xCD300001, -0x71FC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table89[] = { -0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x13FC0000,0x13FC0000,0x13FC0000,0x13FC0000,0x13FC0000,0x13FC0000,0x13FC0000,0x13FC0000,0x13FC0000,0x13FC0000,0x8BFC0000, -0x8BFC0000,0x8BFC0000,0x8BFC0000,0xB0000000,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x1600001,0x5780000,0x5780000,0x5780000,0x13FC0000,0x6DFC0000,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000, -0x4FFC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xC4000000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xC4000000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xC4000000,0xC4000000,0x1A40000,0x1880001,0x1880001,0x5C80000,0x3EC0000,0x27FC0000,0x27FC0000,0x71FC0000,0x5C80000,0x3EC0000,0x93FC0000,0xA9FC0000, -0x93FC0000,0x1AC0001,0x1AC0001,0x1AC0001,0x1AC0001,0x87FC0000,0x87FC0000,0x87FC0000,0xC5F80000,0xC5F80000,0xD6000000,0x87FC0000,0x87FC0000,0x87FC0000,0xC5F80000,0xC5F80000,0xD6000000,0xC5F80000,0xC5F80000,0xD6000000,0xD6000000,0x87FC0000,0x87FC0000,0x87FC0000,0xC5F80000,0xC5F80000,0xD6000000,0xC5F80000,0xC5F80000,0xD6000000,0xD6000000,0xC5F80000, -0xC5F80000,0xD6000000,0xD6000000,0xD6000000,0x1F40000,0xDC80000,0x1AC0001,0x6BFC0000,0x9DFC0000,0xB5FC0000,0xBDF80000,0xC9F80000,0x3DFC0000,0x87FC0000,0xB5FC0000,0xD6000000,0xB5FC0000,0x1D40001,0xC3FC0000,0xE1FC0000,0xEA000000,0xC3FC0000,0xE1FC0000,0xEA000000,0xE1FC0000,0xEA000000,0xEA000000,0xC3FC0000,0xE1FC0000,0xEA000000,0xE1FC0000,0xEA000000, -0xEA000000,0xE1FC0000,0xEA000000,0xEA000000,0xEA000000,0xC3FC0000,0xE1FC0000,0xEA000000,0xE1FC0000,0xEA000000,0xEA000000,0xE1FC0000,0xEA000000,0xEA000000,0xEA000000,0xE1FC0000,0xEA000000,0xEA000000,0xEA000000,0xEA000000,0x9FFC0000,0x3F40000,0x3F40000,0xCFFC0000,0xDFF80000,0xE7F40000,0xEA000000,0xEA000000,0xB5FC0000,0xD5FC0000,0xEBC80000,0xEA000000, -0xDBFC0000,0x19C0734,0xFF90041C,0xFD8802D4,0xEB8802D3,0xFF8002E2,0xFF7800E7,0xEF7C013B,0xF3780192,0xE97400D7,0xE1740192,0xFF7403A9,0xFF6800C6,0xEF6C00FD,0xF76000CF,0xEB640002,0xE16800D5,0xEB6002D4,0xE55800FF,0xDD60013A,0xD76002D5,0x6BFC0734,0xFF580319,0xEB6402D4,0xFF38019A,0xEB4800D1,0xE1500190,0xFD1802D3,0xEB2800A2,0xE12C00D5,0xD73C02D4,0xB7F80734, -0xEAE802D3,0xE0D80190,0xD6C402D4,0xCC000738,0xFF8C0494,0xF598065A,0xF79C068F,0xFF800236,0xFF6C0086,0xF9640002,0xEB680011,0xE95C0012,0xFF800481,0xFF7401F7,0xED5800A9,0xE12C00D5,0xA3FC0734,0x1B002D3,0xFFA40126,0xF99C00A2,0xEB9C00A2,0xFF980119,0xFB8C0002,0xED8C0015,0xEF8800C9,0xE9840026,0xE18800C9,0x89FC02D3,0xFF7000AD,0xEB8000A2,0xFB5800C9,0xEB640001, -0xE17000C8,0xC5FC02D3,0xEB2400A2,0xE11800C8,0xD60002D4,0x89FC02D3,0xFF7000AD,0xEB8000A2,0xFB5800C9,0xEB640001,0xE17000C8,0xC5FC02D3,0xEB2400A2,0xE11800C8,0xD60002D4,0xC5FC02D3,0xEB2400A2,0xE11800C8,0xD60002D4,0xD60002D4,0xFFA001E5,0xFFAC0269,0xFFAC0266,0xFF94010A,0xFF7C0049,0xF9640001,0xEB700002,0xE9540006,0xFF9801E6,0xFF8400E2,0xEF5000A2,0xE11800C8, -0xB7FC02D3,0x18802D3,0x18802D3,0x18802D3,0x18802D3,0xFF7800E3,0xFF7800E3,0xFF7800E3,0xE57400CA,0xE57400CA,0xD77400CA,0xFF6800AD,0xFF6800AD,0xFF6800AD,0xE9640002,0xE9640002,0xD9680026,0xDB6400A3,0xDB6400A3,0xD5600015,0xCD6000A5,0x4DFC02D3,0x4DFC02D3,0x4DFC02D3,0xEF4400C9,0xEF4400C9,0xD75C00C8,0xE92C00A2,0xE92C00A2,0xD73C0001,0xCD4800A4,0xA7FC02D3, -0xA7FC02D3,0xD70400C8,0xCCF000A4,0xC40002D4,0xFD8001A6,0xFB84024B,0x18802D3,0xFF7000CA,0xFF6C0022,0xF7640002,0xF1640005,0xE5600002,0xFF78018A,0xFF6800A8,0xEB5C00A3,0xD73C0001,0x91FC02D3,0x19C00A2,0x19C00A2,0x19C00A2,0x19C00A2,0xF98C0001,0xF98C0001,0xF98C0001,0xDF8C0001,0xDF8C0001,0xD7880001,0x6BFC00A2,0x6BFC00A2,0x6BFC00A2,0xE56C0001,0xE56C0001, -0xD7780000,0xB7F800A2,0xB7F800A2,0xD7400000,0xCC0000A4,0x6BFC00A2,0x6BFC00A2,0x6BFC00A2,0xE56C0001,0xE56C0001,0xD7780000,0xB7F800A2,0xB7F800A2,0xD7400000,0xCC0000A4,0xB7F800A2,0xB7F800A2,0xD7400000,0xCC0000A4,0xCC0000A4,0xFF900055,0xF5980071,0x19C00A2,0xFD840032,0xFF700005,0xF5680000,0xED6C0000,0xE5600001,0xFD880062,0xFF74002D,0xA3FC00A2,0xD7400000, -0xA3FC00A2,0x1C000CA,0xFFB40022,0xF5AC0001,0xEBAC0001,0xA5FC00C8,0xFB8C0001,0xEB980001,0xD3FC00C8,0xEB5C0000,0xE00000C8,0xA5FC00C8,0xFB8C0001,0xEB980001,0xD3FC00C8,0xEB5C0000,0xE00000C8,0xD3FC00C8,0xEB5C0000,0xE00000C8,0xE00000C8,0xA5FC00C8,0xFB8C0001,0xEB980001,0xD3FC00C8,0xEB5C0000,0xE00000C8,0xD3FC00C8,0xEB5C0000,0xE00000C8,0xE00000C8,0xD3FC00C8, -0xEB5C0000,0xE00000C8,0xE00000C8,0xE00000C8,0xFFB40091,0x1E000C8,0xF9C000A2,0xFFA00059,0xFF7C0025,0xFB5C0000,0xEB740000,0xEB3C0000,0xFFAC0095,0xFF940050,0xED7C0001,0xE00000C8,0xC7FC00C8,0x17400CA,0x17400CA,0x17400CA,0x17400CA,0x17400CA,0x17400CA,0x17400CA,0x17400CA,0x17400CA,0x17400CA,0xEB640001,0xEB640001,0xEB640001,0xEB640001,0xEB640001, -0xEB640001,0xCF600001,0xCF600001,0xCF600001,0xC5600001,0x31FC00C8,0x31FC00C8,0x31FC00C8,0x31FC00C8,0x31FC00C8,0x31FC00C8,0xD5400001,0xD5400001,0xD5400001,0xC54C0001,0x9BF800C8,0x9BF800C8,0x9BF800C8,0xC5100000,0xBA0000C8,0xFF6C007D,0x17400CA,0x17400CA,0xFF68002D,0xFF64000A,0xF9640001,0xF9640001,0xE1640001,0xF9680062,0xFD600029,0xD95C0000,0xD5400001, -0x7FFC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table90[] = { -0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x97FC0000, -0x97FC0000,0x97FC0000,0x97FC0000,0xB8000000,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0xD880000,0xD880000,0xD880000,0x2BFC0000,0x7DF80000,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000, -0x69FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xCC000000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xCC000000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xCC000000,0xCC000000,0x3B40000,0x1980001,0x1980001,0x1DC0000,0xBFC0000,0x45FC0000,0x45FC0000,0x85FC0000,0x1DC0000,0xBFC0000,0xA1FC0000,0xB5FC0000, -0xA1FC0000,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x9FFC0000,0x9FFC0000,0x9FFC0000,0xD1F80000,0xD1F80000,0xDE000000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0xD1F80000,0xD1F80000,0xDE000000,0xD1F80000,0xD1F80000,0xDE000000,0xDE000000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0xD1F80000,0xD1F80000,0xDE000000,0xD1F80000,0xD1F80000,0xDE000000,0xDE000000,0xD1F80000, -0xD1F80000,0xDE000000,0xDE000000,0xDE000000,0x1FFC0000,0x1DC0000,0x1BC0001,0x89FC0000,0xB1FC0000,0xC3FC0000,0xC9FC0000,0xD5F40000,0x63FC0000,0x9FFC0000,0xC3FC0000,0xDE000000,0xC3FC0000,0x1E40001,0xDBFC0000,0xEDFC0000,0xF2000000,0xDBFC0000,0xEDFC0000,0xF2000000,0xEDFC0000,0xF2000000,0xF2000000,0xDBFC0000,0xEDFC0000,0xF2000000,0xEDFC0000,0xF2000000, -0xF2000000,0xEDFC0000,0xF2000000,0xF2000000,0xF2000000,0xDBFC0000,0xEDFC0000,0xF2000000,0xEDFC0000,0xF2000000,0xF2000000,0xEDFC0000,0xF2000000,0xF2000000,0xF2000000,0xEDFC0000,0xF2000000,0xF2000000,0xF2000000,0xF2000000,0xC7FC0000,0x47FC0000,0x47FC0000,0xE3FC0000,0xEBFC0000,0xF1F40000,0xF2000000,0xF2000000,0xD3FC0000,0xE7FC0000,0xF3D80000,0xF2000000, -0xE9FC0000,0x1AC0734,0xFFA40477,0xFF9C02EB,0xF39802D3,0xFF980352,0xFF8C012F,0xF78C013B,0xFB880192,0xF18400D7,0xE9840192,0xFF8C0401,0xFF7C011E,0xF77C00FD,0xFF7000CF,0xF3740002,0xE97800D5,0xF37002D4,0xED6800FF,0xE570013A,0xDF7002D5,0x83FC0734,0xFF700361,0xF37402D4,0xFF5801E2,0xF35800D1,0xE9600190,0xFF3802DD,0xF33800A2,0xE93C00D5,0xDF4C02D4,0xC3F80734, -0xF2F802D3,0xE8E80190,0xDED402D4,0xD4000738,0xFFA004E6,0xFDA8065A,0xFFAC068F,0xFF9402CB,0xFF84010B,0xFF740006,0xF3780011,0xF16C0012,0xFF9804CA,0xFF84028B,0xF56800A9,0xE93C00D5,0xB3FC0734,0x1C002D3,0xFFB4016B,0xFFAC00A3,0xF3AC00A2,0xFFA8015B,0xFF9C0012,0xF59C0015,0xF79800C9,0xF1940026,0xE99800C9,0xA3FC02D3,0xFF8800D5,0xF39000A2,0xFF7000CE,0xF3740001, -0xE98000C8,0xD1FC02D3,0xF33400A2,0xE92800C8,0xDE0002D4,0xA3FC02D3,0xFF8800D5,0xF39000A2,0xFF7000CE,0xF3740001,0xE98000C8,0xD1FC02D3,0xF33400A2,0xE92800C8,0xDE0002D4,0xD1FC02D3,0xF33400A2,0xE92800C8,0xDE0002D4,0xDE0002D4,0xFFB40202,0xF7BC028B,0xF7BC028B,0xFFA4015B,0xFF900089,0xFF740005,0xF3800002,0xF1640006,0xFDB00222,0xFF980136,0xF76000A2,0xE92800C8, -0xC5FC02D3,0x19802D3,0x19802D3,0x19802D3,0x19802D3,0xFF8C00FE,0xFF8C00FE,0xFF8C00FE,0xED8400CA,0xED8400CA,0xDF8400CA,0xFF7800CB,0xFF7800CB,0xFF7800CB,0xF1740002,0xF1740002,0xE1780026,0xE37400A3,0xE37400A3,0xDD700015,0xD57000A5,0x65FC02D3,0x65FC02D3,0x65FC02D3,0xF75400C9,0xF75400C9,0xDF6C00C8,0xF13C00A2,0xF13C00A2,0xDF4C0001,0xD55800A4,0xB3FC02D3, -0xB3FC02D3,0xDF1400C8,0xD50000A4,0xCC0002D4,0xFF9001C6,0xF598026A,0x19802D3,0xFF8800F3,0xFF80004A,0xFF740002,0xF9740005,0xED700002,0xFD8801C3,0xFF8000DD,0xF36C00A3,0xDF4C0001,0x9FFC02D3,0x1AC00A2,0x1AC00A2,0x1AC00A2,0x1AC00A2,0xFF9C0002,0xFF9C0002,0xFF9C0002,0xE79C0001,0xE79C0001,0xDF980001,0x83FC00A2,0x83FC00A2,0x83FC00A2,0xED7C0001,0xED7C0001, -0xDF880000,0xC3F800A2,0xC3F800A2,0xDF500000,0xD40000A4,0x83FC00A2,0x83FC00A2,0x83FC00A2,0xED7C0001,0xED7C0001,0xDF880000,0xC3F800A2,0xC3F800A2,0xDF500000,0xD40000A4,0xC3F800A2,0xC3F800A2,0xDF500000,0xD40000A4,0xD40000A4,0xFFA00064,0xFDA80071,0x1AC00A2,0xFF98003D,0xFF840011,0xFD780000,0xF57C0000,0xED700001,0xF5A00071,0xFB90003D,0xB3FC00A2,0xDF500000, -0xB3FC00A2,0x1D000CA,0xFFC4003A,0xFDBC0001,0xF3BC0001,0xBDFC00C8,0xFFA00009,0xF3A80001,0xDFF800C8,0xF36C0000,0xE80000C8,0xBDFC00C8,0xFFA00009,0xF3A80001,0xDFF800C8,0xF36C0000,0xE80000C8,0xDFF800C8,0xF36C0000,0xE80000C8,0xE80000C8,0xBDFC00C8,0xFFA00009,0xF3A80001,0xDFF800C8,0xF36C0000,0xE80000C8,0xDFF800C8,0xF36C0000,0xE80000C8,0xE80000C8,0xDFF800C8, -0xF36C0000,0xE80000C8,0xE80000C8,0xE80000C8,0xFBC800A4,0x1F000C8,0xFFCC00AA,0xFDBC0071,0xFFA4003D,0xFF740004,0xF3840000,0xF34C0000,0xF9C800A2,0xFBB80071,0xF58C0001,0xE80000C8,0xD7FC00C8,0x18400CA,0x18400CA,0x18400CA,0x18400CA,0x18400CA,0x18400CA,0x18400CA,0x18400CA,0x18400CA,0x18400CA,0xF3740001,0xF3740001,0xF3740001,0xF3740001,0xF3740001, -0xF3740001,0xD7700001,0xD7700001,0xD7700001,0xCD700001,0x49FC00C8,0x49FC00C8,0x49FC00C8,0x49FC00C8,0x49FC00C8,0x49FC00C8,0xDD500001,0xDD500001,0xDD500001,0xCD5C0001,0xA7F800C8,0xA7F800C8,0xA7F800C8,0xCD200000,0xC20000C8,0xF9800082,0x18400CA,0x18400CA,0xFB7C003D,0xFF780012,0xFD740002,0xFD740002,0xE9740001,0xFF74006A,0xFD740032,0xE16C0000,0xDD500001, -0x8FFC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table91[] = { -0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0xA3FC0000, -0xA3FC0000,0xA3FC0000,0xA3FC0000,0xC0000000,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x19C0000,0x19C0000,0x19C0000,0x43FC0000,0x8BFC0000,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000, -0x81FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xD4000000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xD4000000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xD4000000,0xD4000000,0xBC40000,0x1A80001,0x1A80001,0x1F00000,0x33FC0000,0x63FC0000,0x63FC0000,0x99FC0000,0x1F00000,0x33FC0000,0xB1FC0000,0xC1FC0000, -0xB1FC0000,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xDDF40000,0xDDF40000,0xE6000000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xDDF40000,0xDDF40000,0xE6000000,0xDDF40000,0xDDF40000,0xE6000000,0xE6000000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xDDF40000,0xDDF40000,0xE6000000,0xDDF40000,0xDDF40000,0xE6000000,0xE6000000,0xDDF40000, -0xDDF40000,0xE6000000,0xE6000000,0xE6000000,0x57FC0000,0x1EC0000,0x1CC0001,0xA7FC0000,0xC5FC0000,0xD3FC0000,0xD7FC0000,0xDFF80000,0x8BFC0000,0xB7FC0000,0xD3FC0000,0xE6000000,0xD3FC0000,0x1F40001,0xF5FC0000,0xF9FC0000,0xFA000000,0xF5FC0000,0xF9FC0000,0xFA000000,0xF9FC0000,0xFA000000,0xFA000000,0xF5FC0000,0xF9FC0000,0xFA000000,0xF9FC0000,0xFA000000, -0xFA000000,0xF9FC0000,0xFA000000,0xFA000000,0xFA000000,0xF5FC0000,0xF9FC0000,0xFA000000,0xF9FC0000,0xFA000000,0xFA000000,0xF9FC0000,0xFA000000,0xFA000000,0xFA000000,0xF9FC0000,0xFA000000,0xFA000000,0xFA000000,0xFA000000,0xEDFC0000,0xC7FC0000,0xC7FC0000,0xF7FC0000,0xF9FC0000,0xFBF40000,0xFA000000,0xFA000000,0xF1FC0000,0xF7FC0000,0xFBE80000,0xFA000000, -0xF9FC0000,0x1BC0734,0xFFB004DF,0xFFAC032C,0xFBA802D3,0xFFA403F2,0xFFA001B3,0xFF9C013B,0xFF9401A4,0xF99400D7,0xF1940192,0xFFA40479,0xFF9401AE,0xFF8C00FD,0xFF880107,0xFB840002,0xF18800D5,0xFB8002D4,0xF57800FF,0xED80013A,0xE78002D5,0x9BFC0734,0xFF8803C9,0xFB8402D4,0xFF700252,0xFB6800D1,0xF1700190,0xFF58031F,0xFB4800A2,0xF14C00D5,0xE75C02D4,0xCFF80734, -0xFB0802D3,0xF0F80190,0xE6E402D4,0xDC000738,0xFFB40553,0xF5B806A4,0xF7BC06C4,0xFFA4038C,0xFF9801BF,0xFF88005A,0xFB880011,0xF97C0012,0xFDB00553,0xFF98033A,0xFD7800A9,0xF14C00D5,0xC1FC0734,0x1D002D3,0xFFC401A3,0xFFC000CE,0xFBBC00A2,0xFFBC0199,0xFFB4005A,0xFDAC0015,0xFFA800C9,0xF9A40026,0xF1A800C9,0xBBFC02D3,0xFFA0011D,0xFBA000A2,0xFF8800FE,0xFB840001, -0xF19000C8,0xDDFC02D3,0xFB4400A2,0xF13800C8,0xE60002D4,0xBBFC02D3,0xFFA0011D,0xFBA000A2,0xFF8800FE,0xFB840001,0xF19000C8,0xDDFC02D3,0xFB4400A2,0xF13800C8,0xE60002D4,0xDDFC02D3,0xFB4400A2,0xF13800C8,0xE60002D4,0xE60002D4,0xFBC80244,0xFFCC028B,0xFFCC028B,0xFFC001A6,0xFFA800ED,0xFF900039,0xFB900002,0xF9740006,0xFFC40243,0xFFB0019E,0xFF7000A2,0xF13800C8, -0xD5FC02D3,0x1A802D3,0x1A802D3,0x1A802D3,0x1A802D3,0xFF9C012B,0xFF9C012B,0xFF9C012B,0xF59400CA,0xF59400CA,0xE79400CA,0xFF8C00ED,0xFF8C00ED,0xFF8C00ED,0xF9840002,0xF9840002,0xE9880026,0xEB8400A3,0xEB8400A3,0xE5800015,0xDD8000A5,0x7DFC02D3,0x7DFC02D3,0x7DFC02D3,0xFF6400C9,0xFF6400C9,0xE77C00C8,0xF94C00A2,0xF94C00A2,0xE75C0001,0xDD6800A4,0xBFFC02D3, -0xBFFC02D3,0xE72400C8,0xDD1000A4,0xD40002D4,0xFFA001E5,0xFDA8026A,0x1A802D3,0xFF980126,0xFF940082,0xFF88001A,0xFF88000A,0xF5800002,0xFD9C01E1,0xFF900111,0xFB7C00A3,0xE75C0001,0xAFFC02D3,0x1BC00A2,0x1BC00A2,0x1BC00A2,0x1BC00A2,0xFFB0000D,0xFFB0000D,0xFFB0000D,0xEFAC0001,0xEFAC0001,0xE7A80001,0x9BFC00A2,0x9BFC00A2,0x9BFC00A2,0xF58C0001,0xF58C0001, -0xE7980000,0xCFF800A2,0xCFF800A2,0xE7600000,0xDC0000A4,0x9BFC00A2,0x9BFC00A2,0x9BFC00A2,0xF58C0001,0xF58C0001,0xE7980000,0xCFF800A2,0xCFF800A2,0xE7600000,0xDC0000A4,0xCFF800A2,0xCFF800A2,0xE7600000,0xDC0000A4,0xDC0000A4,0xFBB40071,0xF5B80082,0x1BC00A2,0xFBAC0055,0xFF9C0022,0xFF900008,0xFD8C0000,0xF5800001,0xFDB00071,0xFFA40048,0xC1FC00A2,0xE7600000, -0xC1FC00A2,0x1E000CA,0xFFDC0062,0xFFCC0011,0xFBCC0001,0xD5FC00C8,0xFFC0002D,0xFBB80001,0xEBF800C8,0xFB7C0000,0xF00000C8,0xD5FC00C8,0xFFC0002D,0xFBB80001,0xEBF800C8,0xFB7C0000,0xF00000C8,0xEBF800C8,0xFB7C0000,0xF00000C8,0xF00000C8,0xD5FC00C8,0xFFC0002D,0xFBB80001,0xEBF800C8,0xFB7C0000,0xF00000C8,0xEBF800C8,0xFB7C0000,0xF00000C8,0xF00000C8,0xEBF800C8, -0xFB7C0000,0xF00000C8,0xF00000C8,0xF00000C8,0xF3E000B5,0x27FC00C8,0xF9E000B5,0xFDD40091,0xFFBC0061,0xFFA40022,0xFB940000,0xFB5C0000,0xFDD800A4,0xFFCC0082,0xFD9C0001,0xF00000C8,0xE5FC00C8,0x19400CA,0x19400CA,0x19400CA,0x19400CA,0x19400CA,0x19400CA,0x19400CA,0x19400CA,0x19400CA,0x19400CA,0xFB840001,0xFB840001,0xFB840001,0xFB840001,0xFB840001, -0xFB840001,0xDF800001,0xDF800001,0xDF800001,0xD5800001,0x63FC00C8,0x63FC00C8,0x63FC00C8,0x63FC00C8,0x63FC00C8,0x63FC00C8,0xE5600001,0xE5600001,0xE5600001,0xD56C0001,0xB3F800C8,0xB3F800C8,0xB3F800C8,0xD5300000,0xCA0000C8,0xF3940091,0x19400CA,0x19400CA,0xFD8C004A,0xFD880022,0xFD84000A,0xFD84000A,0xF1840001,0xFD8C0071,0xF9880048,0xE97C0000,0xE5600001, -0x9FF800C8,}; -static const uint32_t g_etc1_to_bc7_m6_table92[] = { -0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x5FFC0000,0x5FFC0000,0x5FFC0000,0x5FFC0000,0x5FFC0000,0x5FFC0000,0x5FFC0000,0x5FFC0000,0x5FFC0000,0x5FFC0000,0xB1F80000, -0xB1F80000,0xB1F80000,0xB1F80000,0xC8000001,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0x1940000,0xFAC0000,0xFAC0000,0xFAC0000,0x5FFC0000,0x9DF80000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000, -0x9BFC0000,0xCFF80000,0xCFF80000,0xCFF80000,0xDC000001,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0xCFF80000,0xCFF80000,0xCFF80000,0xDC000001,0xCFF80000,0xCFF80000,0xCFF80000,0xDC000001,0xDC000001,0x5D80000,0x1BC0000,0x1BC0000,0x17FC0000,0x5FFC0000,0x85FC0000,0x85FC0000,0xAFFC0000,0x17FC0000,0x5FFC0000,0xC1FC0000,0xCFF80000, -0xC1FC0000,0x1E00000,0x1E00000,0x1E00000,0x1E00000,0xD3FC0000,0xD3FC0000,0xD3FC0000,0xE9FC0000,0xE9FC0000,0xEE000001,0xD3FC0000,0xD3FC0000,0xD3FC0000,0xE9FC0000,0xE9FC0000,0xEE000001,0xE9FC0000,0xE9FC0000,0xEE000001,0xEE000001,0xD3FC0000,0xD3FC0000,0xD3FC0000,0xE9FC0000,0xE9FC0000,0xEE000001,0xE9FC0000,0xE9FC0000,0xEE000001,0xEE000001,0xE9FC0000, -0xE9FC0000,0xEE000001,0xEE000001,0xEE000001,0x97FC0000,0x17FC0000,0x1E00000,0xC9FC0000,0xDBFC0000,0xE3FC0000,0xE7F80000,0xEBF80000,0xB7FC0000,0xD3FC0000,0xE3FC0000,0xEE000001,0xE3FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1CC0625,0xFFC4047D,0xFFC0034C,0xFFBC02D4,0xFFBC03A5,0xFFB40204,0xFFB00161,0xFFAC017D,0xFFA800C9,0xF7A80139,0xFFB00412,0xFFAC01FB,0xFFA4012A,0xFFA00106,0xFF980009,0xF9980082,0xFF940225,0xFB9000C8,0xF59000C1,0xEF940222,0xB5FC0625,0xFFA003BC,0xFF9802D3,0xFF94024F,0xFF8400E9,0xF784013B,0xFF7C02BE,0xFF6400A5,0xF7640076,0xEF700222,0xDBF80625, -0xFF2C02D3,0xF7180139,0xEEFC0222,0xE4000627,0xFFC404D9,0xFDC8059D,0xFFCC05C5,0xFFB40375,0xFFA801FF,0xFFA400B5,0xFF9C0032,0xFF900004,0xFFBC04B6,0xFFB00336,0xFF9000B4,0xF7640076,0xD1FC0625,0x1DC0225,0xFFD40178,0xFFCC00E4,0xFFCC00A4,0xFFD00145,0xFFC8007E,0xFFC40020,0xFFC00081,0xFDB8000C,0xF7BC0071,0xCFFC0222,0xFFC00118,0xFFB800A2,0xFFAC00C6,0xFF9C0001, -0xF7A40071,0xE7FC0222,0xFF6C00A2,0xF7540071,0xEE000222,0xCFFC0222,0xFFC00118,0xFFB800A2,0xFFAC00C6,0xFF9C0001,0xF7A40071,0xE7FC0222,0xFF6C00A2,0xF7540071,0xEE000222,0xE7FC0222,0xFF6C00A2,0xF7540071,0xEE000222,0xEE000222,0xFFD801C3,0xF7DC0201,0xF7DC0204,0xFDD00171,0xFFC000F2,0xFFAC0059,0xFFA80005,0xFF900000,0xFDD801C5,0xFFC80146,0xFF9800AB,0xF7540071, -0xE1FC0222,0x1BC02D4,0x1BC02D4,0x1BC02D4,0x1BC02D4,0xFFB00161,0xFFB00161,0xFFB00161,0xFDA800C8,0xFDA800C8,0xEFA800C9,0xFFA4012A,0xFFA4012A,0xFFA4012A,0xFF980009,0xFF980009,0xF3980026,0xF59400A2,0xF59400A2,0xEF900015,0xE79400A2,0x99FC02D3,0x99FC02D3,0x99FC02D3,0xFF8400E9,0xFF8400E9,0xF18C00C9,0xFF6400A5,0xFF6400A5,0xEF700002,0xE77800A2,0xCDFC02D3, -0xCDFC02D3,0xEF3800C9,0xE71C00A2,0xDC0002D3,0xFBB40225,0xF5B8028C,0x1BC02D4,0xFFAC0175,0xFFA400D9,0xFF9C0059,0xFF9C0032,0xFD940002,0xFDB00201,0xFFA40163,0xFF9000AB,0xEF700002,0xBFFC02D3,0x1CC00A4,0x1CC00A4,0x1CC00A4,0x1CC00A4,0xFFC40020,0xFFC40020,0xFFC40020,0xF9BC0000,0xF9BC0000,0xEFBC0001,0xB7FC00A2,0xB7FC00A2,0xB7FC00A2,0xFF9C0001,0xFF9C0001, -0xEFAC0001,0xDDF400A2,0xDDF400A2,0xEF740001,0xE60000A2,0xB7FC00A2,0xB7FC00A2,0xB7FC00A2,0xFF9C0001,0xFF9C0001,0xEFAC0001,0xDDF400A2,0xDDF400A2,0xEF740001,0xE60000A2,0xDDF400A2,0xDDF400A2,0xEF740001,0xE60000A2,0xE60000A2,0xFDC80080,0xFFCC0080,0x1CC00A4,0xFDC00064,0xFDBC003D,0xFFAC0019,0xFFA80005,0xFF900000,0xFFC40080,0xFBC00062,0xD3FC00A2,0xEF740001, -0xD3FC00A2,0x1F00071,0xFFE80041,0xFFE40014,0xFFE00000,0xE9FC0071,0xFFD80028,0xFFD00000,0xF3FC0071,0xFFA00000,0xF6000071,0xE9FC0071,0xFFD80028,0xFFD00000,0xF3FC0071,0xFFA00000,0xF6000071,0xF3FC0071,0xFFA00000,0xF6000071,0xF6000071,0xE9FC0071,0xFFD80028,0xFFD00000,0xF3FC0071,0xFFA00000,0xF6000071,0xF3FC0071,0xFFA00000,0xF6000071,0xF6000071,0xF3FC0071, -0xFFA00000,0xF6000071,0xF6000071,0xF6000071,0xFFEC0062,0x87FC0071,0xFFEC0064,0xFDE80055,0xFDE40048,0xFFC8001D,0xFFB40000,0xFF8C0000,0xFDEC0062,0xFFE40055,0xFFBC0004,0xF6000071,0xF1FC0071,0x1A800C8,0x1A800C8,0x1A800C8,0x1A800C8,0x1A800C8,0x1A800C8,0x1A800C8,0x1A800C8,0x1A800C8,0x1A800C8,0xFF980005,0xFF980005,0xFF980005,0xFF980005,0xFF980005, -0xFF980005,0xE7940001,0xE7940001,0xE7940001,0xDD940001,0x7DFC00C8,0x7DFC00C8,0x7DFC00C8,0x7DFC00C8,0x7DFC00C8,0x7DFC00C8,0xEF700001,0xEF700001,0xEF700001,0xDD800001,0xBFFC00C8,0xBFFC00C8,0xBFFC00C8,0xDD440001,0xD20000CA,0xFBA40091,0x1A800C8,0x1A800C8,0xFFA00055,0xFD9C0034,0xFF980014,0xFF980014,0xFB940000,0xFD9C0082,0xFD9C0055,0xEF900001,0xEF700001, -0xAFFC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table93[] = { -0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x77FC0000,0x77FC0000,0x77FC0000,0x77FC0000,0x77FC0000,0x77FC0000,0x77FC0000,0x77FC0000,0x77FC0000,0x77FC0000,0xBDF80000, -0xBDF80000,0xBDF80000,0xBDF80000,0xD0000001,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1C00000,0x1C00000,0x1C00000,0x77FC0000,0xABFC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000, -0xB5FC0000,0xDBF80000,0xDBF80000,0xDBF80000,0xE4000001,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xDBF80000,0xDBF80000,0xDBF80000,0xE4000001,0xDBF80000,0xDBF80000,0xDBF80000,0xE4000001,0xE4000001,0xDE80000,0x1CC0000,0x1CC0000,0x51FC0000,0x87FC0000,0xA3FC0000,0xA3FC0000,0xC3FC0000,0x51FC0000,0x87FC0000,0xD1FC0000,0xDBF80000, -0xD1FC0000,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0xEBFC0000,0xEBFC0000,0xEBFC0000,0xF5FC0000,0xF5FC0000,0xF6000001,0xEBFC0000,0xEBFC0000,0xEBFC0000,0xF5FC0000,0xF5FC0000,0xF6000001,0xF5FC0000,0xF5FC0000,0xF6000001,0xF6000001,0xEBFC0000,0xEBFC0000,0xEBFC0000,0xF5FC0000,0xF5FC0000,0xF6000001,0xF5FC0000,0xF5FC0000,0xF6000001,0xF6000001,0xF5FC0000, -0xF5FC0000,0xF6000001,0xF6000001,0xF6000001,0xD1FC0000,0x97FC0000,0x1F00000,0xE7FC0000,0xEFFC0000,0xF3FC0000,0xF5F80000,0xF7F40000,0xDFFC0000,0xEBFC0000,0xF3FC0000,0xF6000001,0xF3FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1D804C1,0xFFD003C9,0xFFCC0314,0xFFCC02D4,0xFFC80305,0xFFC40204,0xFFC401A0,0xFFB80151,0xFFB800D8,0xFBB800E1,0xFFC4031D,0xFFBC01F3,0xFFBC017A,0xFFAC00E6,0xFFAC003D,0xFBA80036,0xFFAC016D,0xFFA400A4,0xF9A40049,0xF5A4014E,0xC7FC04C1,0xFFB80354,0xFFB002D3,0xFFAC01F7,0xFFA0011A,0xFB9800E3,0xFF940216,0xFF8800CD,0xFB78001A,0xF580014F,0xE3FC04C1, -0xFF6002D3,0xFB3800E1,0xF514014E,0xEA0004C3,0xFFD003DA,0xF5D80485,0xF5D8049C,0xFFC402E5,0xFFBC01E3,0xFFB800E7,0xFFB40084,0xFFA80015,0xFFD003C4,0xFFC402B9,0xFFA800E4,0xFB78001A,0xDDF804C1,0x1E8014D,0xFDE40105,0xFFE000BD,0xFFDC00A4,0xFFDC00C9,0xFFD8006C,0xFFD40041,0xFFCC0041,0xFFCC0001,0xFBCC0019,0xDFFC014D,0xFFD800D8,0xFFCC00A4,0xFFC0007D,0xFFB80011, -0xFBB80019,0xEFFC014D,0xFF9C00A2,0xFB740019,0xF400014E,0xDFFC014D,0xFFD800D8,0xFFCC00A4,0xFFC0007D,0xFFB80011,0xFBB80019,0xEFFC014D,0xFF9C00A2,0xFB740019,0xF400014E,0xEFFC014D,0xFF9C00A2,0xFB740019,0xF400014E,0xF400014E,0xFFE4011D,0xFDE80131,0xFDE80138,0xFFDC00E1,0xFFD400AA,0xFFC80056,0xFFBC0025,0xFFB0000D,0xFFE00122,0xFFDC00DA,0xFFB800A6,0xFB740019, -0xEBFC014D,0x1CC02D4,0x1CC02D4,0x1CC02D4,0x1CC02D4,0xFFC401A0,0xFFC401A0,0xFFC401A0,0xFFB800D8,0xFFB800D8,0xF7B800C9,0xFFBC017A,0xFFBC017A,0xFFBC017A,0xFFAC003D,0xFFAC003D,0xFBA80026,0xFDA400A2,0xFDA400A2,0xF7A00015,0xEFA400A2,0xB1FC02D3,0xB1FC02D3,0xB1FC02D3,0xFFA0011A,0xFFA0011A,0xF99C00C9,0xFF8800CD,0xFF8800CD,0xF7800002,0xEF8800A2,0xD9FC02D3, -0xD9FC02D3,0xF74800C9,0xEF2C00A2,0xE40002D3,0xFDC80244,0xFDC8028C,0x1CC02D4,0xFFBC01BA,0xFFB40131,0xFFB400A8,0xFFB40084,0xFFA80015,0xFFBC0236,0xFFBC01A6,0xFFA800DB,0xF7800002,0xCFFC02D3,0x1DC00A4,0x1DC00A4,0x1DC00A4,0x1DC00A4,0xFFD40041,0xFFD40041,0xFFD40041,0xFFCC0001,0xFFCC0001,0xF7CC0001,0xCFFC00A2,0xCFFC00A2,0xCFFC00A2,0xFFB80011,0xFFB80011, -0xF7BC0001,0xE7FC00A2,0xE7FC00A2,0xF7840001,0xEE0000A2,0xCFFC00A2,0xCFFC00A2,0xCFFC00A2,0xFFB80011,0xFFB80011,0xF7BC0001,0xE7FC00A2,0xE7FC00A2,0xF7840001,0xEE0000A2,0xE7FC00A2,0xE7FC00A2,0xF7840001,0xEE0000A2,0xEE0000A2,0xFFD80082,0xF7DC0091,0x1DC00A4,0xFDD80071,0xFDD00055,0xFFC8003D,0xFFBC0025,0xFFB0000D,0xF1DC0091,0xFBD40071,0xE1FC00A2,0xF7840001, -0xE1FC00A2,0x1F80019,0xFFF4000D,0xFFF00004,0xFFF00000,0xF5FC0019,0xFFF00008,0xFFE80000,0xF9FC0019,0xFFD40000,0xFA000019,0xF5FC0019,0xFFF00008,0xFFE80000,0xF9FC0019,0xFFD40000,0xFA000019,0xF9FC0019,0xFFD40000,0xFA000019,0xFA000019,0xF5FC0019,0xFFF00008,0xFFE80000,0xF9FC0019,0xFFD40000,0xFA000019,0xF9FC0019,0xFFD40000,0xFA000019,0xFA000019,0xF9FC0019, -0xFFD40000,0xFA000019,0xFA000019,0xFA000019,0xFDF40014,0xC7FC0019,0xF5F80019,0xFFF40012,0xFFE80010,0xFFE80008,0xFFDC0000,0xFFC80000,0xF1FC0019,0xFFF00014,0xFFE00001,0xFA000019,0xF9FC0019,0x1B800C8,0x1B800C8,0x1B800C8,0x1B800C8,0x1B800C8,0x1B800C8,0x1B800C8,0x1B800C8,0x1B800C8,0x1B800C8,0xFFA80014,0xFFA80014,0xFFA80014,0xFFA80014,0xFFA80014, -0xFFA80014,0xEFA40001,0xEFA40001,0xEFA40001,0xE5A40001,0x95FC00C8,0x95FC00C8,0x95FC00C8,0x95FC00C8,0x95FC00C8,0x95FC00C8,0xF7800001,0xF7800001,0xF7800001,0xE5900001,0xCBFC00C8,0xCBFC00C8,0xCBFC00C8,0xE5540001,0xDA0000CA,0xF3B400A4,0x1B800C8,0x1B800C8,0xFBB40071,0xFDB00048,0xFFAC0029,0xFFAC0029,0xFFA40004,0xF9B00091,0xFBAC0064,0xF7A00001,0xF7800001, -0xBFF800C8,}; -static const uint32_t g_etc1_to_bc7_m6_table94[] = { -0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0xC9F80000, -0xC9F80000,0xC9F80000,0xC9F80000,0xD8000001,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1D00000,0x1D00000,0x1D00000,0x8FFC0000,0xBBFC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000, -0xCDFC0000,0xE7F80000,0xE7F80000,0xE7F80000,0xEC000001,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xE7F80000,0xE7F80000,0xE7F80000,0xEC000001,0xE7F80000,0xE7F80000,0xE7F80000,0xEC000001,0xEC000001,0x1FC0000,0x1DC0000,0x1DC0000,0x89FC0000,0xADFC0000,0xC1FC0000,0xC1FC0000,0xD7FC0000,0x89FC0000,0xADFC0000,0xDFFC0000,0xE7F80000, -0xDFFC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1E403A2,0xFFDC032A,0xFFD802D5,0xFFD802B1,0xFFDC0282,0xFFD40205,0xFFD001C9,0xFFCC0150,0xFFCC0110,0xFFC800C8,0xFFD0028A,0xFFD001E9,0xFFC801A9,0xFFC000FC,0xFFC00098,0xFFBC0029,0xFFB8011A,0xFFB800A1,0xFFB00014,0xF9B400C1,0xD5FC03A2,0xFFCC02F1,0xFFC802AE,0xFFB801CC,0xFFB80153,0xFFAC00CA,0xFFAC01B3,0xFFA000FE,0xFF900001,0xF99400C2,0xEBF803A2, -0xFF9002AE,0xFF5800C8,0xF93400C1,0xF00003A2,0xFDE00326,0xF9E00370,0xFBE40389,0xFFD40272,0xFFD001DA,0xFFC8012F,0xFFC400E0,0xFFB80065,0xFFD8030F,0xFFD00253,0xFFBC010D,0xFF900001,0xE5FC03A2,0x1F400C2,0xFDF000AE,0xFFF00099,0xFFEC0091,0xFFE80086,0xFFE80065,0xFFE80055,0xFFE40030,0xFFE0001D,0xFFDC0000,0xEFFC00C1,0xFFE400A1,0xFFE40091,0xFFD80058,0xFFD80034, -0xFFCC0000,0xF7F800C1,0xFFC80091,0xFF940000,0xF80000C1,0xEFFC00C1,0xFFE400A1,0xFFE40091,0xFFD80058,0xFFD80034,0xFFCC0000,0xF7F800C1,0xFFC80091,0xFF940000,0xF80000C1,0xF7F800C1,0xFFC80091,0xFF940000,0xF80000C1,0xF80000C1,0xFDF000AC,0xFFEC00C0,0xF3F400C2,0xFFEC009B,0xFFE80081,0xFFE0005E,0xFFE00048,0xFFD0002D,0xFBF000AC,0xFFE80095,0xFFD80092,0xFF940000, -0xF5FC00C1,0x1D802B1,0x1D802B1,0x1D802B1,0x1D802B1,0xFFD001C9,0xFFD001C9,0xFFD001C9,0xFFCC0110,0xFFCC0110,0xFFC800C8,0xFFC801A9,0xFFC801A9,0xFFC801A9,0xFFC00098,0xFFC00098,0xFFBC0029,0xFFB800A1,0xFFB800A1,0xFDB00011,0xF7B40091,0xC9FC02AE,0xC9FC02AE,0xC9FC02AE,0xFFB80153,0xFFB80153,0xFFAC00CA,0xFFA000FE,0xFFA000FE,0xFF900001,0xF7980091,0xE5F802AE, -0xE5F802AE,0xFF5800C8,0xF73C0091,0xEC0002AE,0xFFD80245,0xF5D8028C,0x1D802B1,0xFFD401E2,0xFFCC017A,0xFFC40114,0xFFC400E0,0xFFB80065,0xFFD0022E,0xFFD001C3,0xFFBC0109,0xFF900001,0xDFF802AE,0x1EC0091,0x1EC0091,0x1EC0091,0x1EC0091,0xFFE80055,0xFFE80055,0xFFE80055,0xFFE0001D,0xFFE0001D,0xFFDC0000,0xE5FC0091,0xE5FC0091,0xE5FC0091,0xFFD80034,0xFFD80034, -0xFFCC0000,0xF3F80091,0xF3F80091,0xFF940000,0xF6000091,0xE5FC0091,0xE5FC0091,0xE5FC0091,0xFFD80034,0xFFD80034,0xFFCC0000,0xF3F80091,0xF3F80091,0xFF940000,0xF6000091,0xF3F80091,0xF3F80091,0xFF940000,0xF6000091,0xF6000091,0xFBEC0080,0xFFEC0080,0x1EC0091,0xF9EC0080,0xFFE0006A,0xFFE00055,0xFFE00048,0xFFD0002D,0xF9EC0080,0xFFE80071,0xEFFC0091,0xFF940000, -0xEFFC0091,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1C800C8,0x1C800C8,0x1C800C8,0x1C800C8,0x1C800C8,0x1C800C8,0x1C800C8,0x1C800C8,0x1C800C8,0x1C800C8,0xFFBC0029,0xFFBC0029,0xFFBC0029,0xFFBC0029,0xFFBC0029, -0xFFBC0029,0xF7B40001,0xF7B40001,0xF7B40001,0xEDB40001,0xAFFC00C8,0xAFFC00C8,0xAFFC00C8,0xAFFC00C8,0xAFFC00C8,0xAFFC00C8,0xFF900001,0xFF900001,0xFF900001,0xEDA00001,0xD7FC00C8,0xD7FC00C8,0xD7FC00C8,0xED640001,0xE20000CA,0xFBC400A4,0x1C800C8,0x1C800C8,0xFBC40080,0xFFBC0061,0xFFBC0041,0xFFBC0041,0xFFB80014,0xFFBC009D,0xFFBC0075,0xFFB00001,0xFF900001, -0xCDFC00C8,}; -static const uint32_t g_etc1_to_bc7_m6_table95[] = { -0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xD5F80000, -0xD5F80000,0xD5F80000,0xD5F80000,0xE0000001,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x9E00000,0x9E00000,0x9E00000,0xA9FC0000,0xC9FC0000,0x1EC0000,0x1EC0000,0x1EC0000,0x1EC0000,0x1EC0000,0x1EC0000,0x1EC0000,0x1EC0000,0x1EC0000,0x1EC0000,0xE5FC0000,0xE5FC0000,0xE5FC0000,0xE5FC0000,0xE5FC0000, -0xE5FC0000,0xF3F80000,0xF3F80000,0xF3F80000,0xF4000001,0xE5FC0000,0xE5FC0000,0xE5FC0000,0xE5FC0000,0xE5FC0000,0xE5FC0000,0xF3F80000,0xF3F80000,0xF3F80000,0xF4000001,0xF3F80000,0xF3F80000,0xF3F80000,0xF4000001,0xF4000001,0x77FC0000,0x1EC0000,0x1EC0000,0xC3FC0000,0xD5FC0000,0xDFFC0000,0xDFFC0000,0xEBFC0000,0xC3FC0000,0xD5FC0000,0xEFFC0000,0xF3F80000, -0xEFFC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1EC0232,0xFFE801F2,0xFFE401C9,0xFFE401B9,0xFFE801AA,0xFFE00165,0xFFDC0149,0xFFD80110,0xFFD800EC,0xFFD800C8,0xFFDC0186,0xFFDC0131,0xFFDC010D,0xFFD400BB,0xFFD00088,0xFFD00048,0xFFCC0091,0xFFCC0051,0xFFC40001,0xFBC40039,0xE3FC0232,0xFFD801DD,0xFFD801B9,0xFFCC0150,0xFFCC0110,0xFFC400C8,0xFFC00109,0xFFB80096,0xFFAC0011,0xFBAC0039,0xF1F80232, -0xFFAC01B9,0xFF8800C8,0xFB5C0039,0xF4000232,0xFFE401E6,0xFDE80210,0xFFEC0221,0xFFE0019E,0xFFDC013E,0xFFD400D8,0xFFD000B5,0xFFD00061,0xFFE401ED,0xFFDC018C,0xFFD000AF,0xFFAC0011,0xEDFC0232,0x1F80036,0xFFF80031,0xFFF4002D,0xFFF40029,0xFFF40022,0xFFF4001D,0xFFF40019,0xFFF0000C,0xFFF00008,0xFFEC0000,0xF7FC0036,0xFFF0002D,0xFFF00029,0xFFF00018,0xFFE40010, -0xFFE40000,0xFBFC0036,0xFFE00029,0xFFC80000,0xFA000039,0xF7FC0036,0xFFF0002D,0xFFF00029,0xFFF00018,0xFFE40010,0xFFE40000,0xFBFC0036,0xFFE00029,0xFFC80000,0xFA000039,0xFBFC0036,0xFFE00029,0xFFC80000,0xFA000039,0xFA000039,0xFFF40030,0xF5F80036,0xF5F80036,0xFFF8002C,0xFFF00022,0xFFEC0018,0xFFE80011,0xFFE4000A,0xFFF8002C,0xFFF4002B,0xFFE8002A,0xFFC80000, -0xFBFC0036,0x1E401B9,0x1E401B9,0x1E401B9,0x1E401B9,0xFFDC0149,0xFFDC0149,0xFFDC0149,0xFFD800EC,0xFFD800EC,0xFFD800C8,0xFFDC010D,0xFFDC010D,0xFFDC010D,0xFFD00088,0xFFD00088,0xFFD00048,0xFFCC0051,0xFFCC0051,0xFFC40001,0xFBC40029,0xD9FC01B9,0xD9FC01B9,0xD9FC01B9,0xFFCC0110,0xFFCC0110,0xFFC400C8,0xFFB80096,0xFFB80096,0xFFAC0011,0xFBAC0029,0xEDF801B9, -0xEDF801B9,0xFF8800C8,0xFB5C0029,0xF20001BA,0xFDE40182,0xFBE401A0,0x1E401B9,0xFFDC0144,0xFFD80101,0xFFD400C8,0xFFD000B5,0xFFD00061,0xFDE00181,0xFFDC013B,0xFFD000AE,0xFFAC0011,0xE7FC01B9,0x1F40029,0x1F40029,0x1F40029,0x1F40029,0xFFF40019,0xFFF40019,0xFFF40019,0xFFF00008,0xFFF00008,0xFFEC0000,0xF1FC0029,0xF1FC0029,0xF1FC0029,0xFFE40010,0xFFE40010, -0xFFE40000,0xF9F80029,0xF9F80029,0xFFC80000,0xFA000029,0xF1FC0029,0xF1FC0029,0xF1FC0029,0xFFE40010,0xFFE40010,0xFFE40000,0xF9F80029,0xF9F80029,0xFFC80000,0xFA000029,0xF9F80029,0xF9F80029,0xFFC80000,0xFA000029,0xFA000029,0xFFF40020,0xF3F40029,0x1F40029,0xFDF40020,0xFFF00019,0xFFEC0014,0xFFE80011,0xFFE4000A,0xFDF40020,0xFDF00022,0xF7FC0029,0xFFC80000, -0xF7FC0029,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1D800C8,0x1D800C8,0x1D800C8,0x1D800C8,0x1D800C8,0x1D800C8,0x1D800C8,0x1D800C8,0x1D800C8,0x1D800C8,0xFFD00048,0xFFD00048,0xFFD00048,0xFFD00048,0xFFD00048, -0xFFD00048,0xFFC40001,0xFFC40001,0xFFC40001,0xF5C40001,0xC7FC00C8,0xC7FC00C8,0xC7FC00C8,0xC7FC00C8,0xC7FC00C8,0xC7FC00C8,0xFFAC0011,0xFFAC0011,0xFFAC0011,0xF5B00001,0xE3FC00C8,0xE3FC00C8,0xE3FC00C8,0xF5740001,0xEA0000CA,0xF5D800B5,0x1D800C8,0x1D800C8,0xFDD40091,0xFFD00075,0xFFCC0061,0xFFCC0061,0xFFCC0034,0xFDD400A2,0xFFD00082,0xFFC4001D,0xFFAC0011, -0xDDF800C8,}; -static const uint32_t g_etc1_to_bc7_m6_table96[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x180001,0x180001,0x180001,0x180001,0x2240000,0x2240000,0x2240000,0x4C0000,0x4C0000,0xC000000,0x2240000,0x2240000,0x2240000,0x4C0000,0x4C0000,0xC000000,0x4C0000,0x4C0000,0xC000000,0xC000000,0x2240000,0x2240000,0x2240000,0x4C0000,0x4C0000,0xC000000,0x4C0000,0x4C0000,0xC000000,0xC000000,0x4C0000, -0x4C0000,0xC000000,0xC000000,0xC000000,0x41C0000,0x1C0000,0x180001,0x240000,0x2C0000,0x340000,0x3C0000,0x5C0000,0x200000,0x2240000,0x340000,0xC000000,0x340000,0x540000,0x7C0000,0xFC0000,0x28000001,0x7C0000,0xFC0000,0x28000001,0xFC0000,0x28000001,0x28000001,0x7C0000,0xFC0000,0x28000001,0xFC0000,0x28000001, -0x28000001,0xFC0000,0x28000001,0x28000001,0x28000001,0x7C0000,0xFC0000,0x28000001,0xFC0000,0x28000001,0x28000001,0xFC0000,0x28000001,0x28000001,0x28000001,0xFC0000,0x28000001,0x28000001,0x28000001,0x28000001,0x680000,0x4580000,0x4580000,0x8C0000,0xCC0000,0x1940000,0x28000001,0x28000001,0x2700000,0x9C0000,0x1DCC0000,0x28000001, -0xB00000,0x1C0499,0x76080071,0x3C080071,0x28080072,0x500001A5,0x3A000028,0x28000001,0x280001A5,0x200000A2,0x1A0001A5,0x3600039D,0x2E000149,0x240000AA,0x22000236,0x2000011B,0x180001F1,0x1A00039D,0x1C000236,0x160002AE,0x1200039E,0x280499,0x28000216,0x22000127,0x220002AF,0x1E000181,0x18000231,0x180003E2,0x1600028E,0x140002EE,0x120003C2,0x500499, -0x1600032F,0x10000373,0x1000041B,0xC00049B,0xA4000108,0xFE0C0229,0xF21401F2,0x48000118,0x3600011B,0x2800011B,0x220000C2,0x20000173,0x64000209,0x3C00017A,0x1E0001B6,0x140002EE,0x380499,0x24039D,0x720C0055,0x3A0C0055,0x280C0056,0x500001A5,0x3A000028,0x28000001,0x280001A5,0x200000A2,0x1A0001A5,0x34039D,0x2E000149,0x240000AA,0x22000236,0x2000011B, -0x180001F1,0x68039D,0x1C000236,0x160002AE,0x1200039E,0x34039D,0x2E000149,0x240000AA,0x22000236,0x2000011B,0x180001F1,0x68039D,0x1C000236,0x160002AE,0x1200039E,0x68039D,0x1C000236,0x160002AE,0x1200039E,0x1200039E,0xA4000108,0xFE0C0205,0xF61C016E,0x48000118,0x3600011B,0x2800011B,0x220000C2,0x20000173,0x6C0001DB,0x3C00016A,0x1E0001B2,0x160002AE, -0x4C039D,0x80071,0x80071,0x80071,0x80071,0x26000000,0x26000000,0x26000000,0x12000000,0x12000000,0xC000000,0x10000055,0x10000055,0x10000055,0xC000020,0xC000020,0xC000010,0x8000055,0x8000055,0x8000034,0x6000055,0xC0071,0xC0071,0xC0071,0xC000030,0xC000030,0xC000020,0x600005D,0x600005D,0x800003D,0x6000059,0x140071, -0x140071,0x4000052,0x4000062,0x4000072,0x44000019,0xC8000000,0x80071,0x24000022,0x1A00001D,0x12000022,0x1200001D,0xC000022,0x24000036,0x1A00002D,0xA000056,0x800003D,0x100071,0xC0055,0xC0055,0xC0055,0xC0055,0x26000000,0x26000000,0x26000000,0x12000000,0x12000000,0xC000000,0x100055,0x100055,0x100055,0xC000020,0xC000020, -0xC000010,0x200055,0x200055,0x8000034,0x6000055,0x100055,0x100055,0x100055,0xC000020,0xC000020,0xC000010,0x200055,0x200055,0x8000034,0x6000055,0x200055,0x200055,0x8000034,0x6000055,0x6000055,0x44000019,0xC8000000,0xC0055,0x24000022,0x1A00001D,0x12000022,0x1200001D,0xC000022,0x24000032,0x1A000029,0x180055,0x8000034, -0x180055,0x3801A5,0x621C0001,0x361C0001,0x28180002,0x5001A5,0x3A000028,0x28000001,0xA001A5,0x200000A2,0x1A0001A5,0x5001A5,0x3A000028,0x28000001,0xA001A5,0x200000A2,0x1A0001A5,0xA001A5,0x200000A2,0x1A0001A5,0x1A0001A5,0x5001A5,0x3A000028,0x28000001,0xA001A5,0x200000A2,0x1A0001A5,0xA001A5,0x200000A2,0x1A0001A5,0x1A0001A5,0xA001A5, -0x200000A2,0x1A0001A5,0x1A0001A5,0x1A0001A5,0xA40000A4,0x3C01A5,0xFC280062,0x52000091,0x360000A2,0x2C00009D,0x2400006A,0x200000CA,0x720000DD,0x4C0000B4,0x26000059,0x1A0001A5,0x7001A5,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table97[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x280001,0x280001,0x280001,0x280001,0x23C0000,0x23C0000,0x23C0000,0x7C0000,0x7C0000,0x14000000,0x23C0000,0x23C0000,0x23C0000,0x7C0000,0x7C0000,0x14000000,0x7C0000,0x7C0000,0x14000000,0x14000000,0x23C0000,0x23C0000,0x23C0000,0x7C0000,0x7C0000,0x14000000,0x7C0000,0x7C0000,0x14000000,0x14000000,0x7C0000, -0x7C0000,0x14000000,0x14000000,0x14000000,0x300000,0x2C0000,0x280001,0x380000,0x440000,0x580000,0x640000,0x980000,0x340000,0x23C0000,0x580000,0x14000000,0x580000,0x640000,0x940000,0x12C0000,0x30000001,0x940000,0x12C0000,0x30000001,0x12C0000,0x30000001,0x30000001,0x940000,0x12C0000,0x30000001,0x12C0000,0x30000001, -0x30000001,0x12C0000,0x30000001,0x30000001,0x30000001,0x940000,0x12C0000,0x30000001,0x12C0000,0x30000001,0x30000001,0x12C0000,0x30000001,0x30000001,0x30000001,0x12C0000,0x30000001,0x30000001,0x30000001,0x30000001,0x7C0000,0xC680000,0xC680000,0xA80000,0xF40000,0x1E00000,0x30000001,0x30000001,0x880000,0xBC0000,0x25DC0000,0x30000001, -0xD40000,0x240691,0x86100129,0x460C0129,0x300C012A,0x6A0001A5,0x46000008,0x32000011,0x340001A5,0x2C00006A,0x220001A5,0x480004ED,0x3A0001D1,0x2E00010E,0x2E00029E,0x2800012B,0x2200021E,0x220004ED,0x20000306,0x1C000362,0x160004EE,0x340691,0x34000316,0x280001E3,0x28000367,0x280001D4,0x1E000295,0x1E000566,0x2000037F,0x1C0003C6,0x1600052E,0x680691, -0x1C00048F,0x1600049F,0x140005C6,0x10000693,0xC2000118,0xF2140349,0xF61C0372,0x66000128,0x4000013B,0x34000135,0x2A0000C3,0x240001AE,0x82000289,0x500001CD,0x26000266,0x1C0003C6,0x4C0691,0x3004ED,0x821400DD,0x441400DD,0x301400DE,0x6A0001A5,0x46000008,0x3204000E,0x340001A5,0x2C00006A,0x220001A5,0x24404ED,0x3A0001D1,0x2E00010E,0x2E00029E,0x2800012B, -0x2200021E,0x8C04ED,0x20000306,0x1C000362,0x160004EE,0x24404ED,0x3A0001D1,0x2E00010E,0x2E00029E,0x2800012B,0x2200021E,0x8C04ED,0x20000306,0x1C000362,0x160004EE,0x8C04ED,0x20000306,0x1C000362,0x160004EE,0x160004EE,0xC2000118,0xF61C02BD,0xFA24026E,0x66000128,0x4000013B,0x34000135,0x2A0000C3,0x240001AE,0x82000249,0x500001B4,0x26000262,0x1C000362, -0x6404ED,0xC0129,0xC0129,0xC0129,0xC0129,0x3E000000,0x3E000000,0x3E000000,0x1E000000,0x1E000000,0x14000000,0x1C0000DD,0x1C0000DD,0x1C0000DD,0x18000050,0x18000050,0x12000028,0xE0000DD,0xE0000DD,0xE000088,0xA0000DD,0x140126,0x140126,0x140126,0x12000088,0x12000088,0x1200004C,0xC0000F1,0xC0000F1,0xC0000A1,0x80000EA,0x240126, -0x240126,0xA0000C6,0x8000105,0x6000126,0x76000041,0xFA040011,0xC0129,0x3A000059,0x28000050,0x22000055,0x1E000049,0x16000061,0x42000092,0x3200007D,0x120000DE,0xC0000A1,0x1C0126,0x1400DD,0x1400DD,0x1400DD,0x1400DD,0x3E000000,0x3E000000,0x3E000000,0x1E000000,0x1E000000,0x14000000,0x1C00DD,0x1C00DD,0x1C00DD,0x18000050,0x18000050, -0x12000028,0x3800DD,0x3800DD,0xE000088,0xA0000DD,0x1C00DD,0x1C00DD,0x1C00DD,0x18000050,0x18000050,0x12000028,0x3800DD,0x3800DD,0xE000088,0xA0000DD,0x3800DD,0x3800DD,0xE000088,0xA0000DD,0xA0000DD,0x76000041,0xFA04000D,0x1400DD,0x3A000059,0x28000050,0x22000055,0x1E000049,0x16000061,0x42000082,0x32000074,0x2800DD,0xE000088, -0x2800DD,0x4801A5,0x6A2C0001,0x3E2C0001,0x30280002,0x6801A5,0x46000008,0x30100001,0xD001A5,0x2C00006A,0x220001A5,0x6801A5,0x46000008,0x30100001,0xD001A5,0x2C00006A,0x220001A5,0xD001A5,0x2C00006A,0x220001A5,0x220001A5,0x6801A5,0x46000008,0x30100001,0xD001A5,0x2C00006A,0x220001A5,0xD001A5,0x2C00006A,0x220001A5,0x220001A5,0xD001A5, -0x2C00006A,0x220001A5,0x220001A5,0x220001A5,0xD6000075,0x4C01A5,0xF4380071,0x6E000059,0x4A00006A,0x36000064,0x2E00003A,0x280000A2,0x920400B5,0x5E00007D,0x32000028,0x220001A5,0x9401A5,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table98[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x40001,0x40001,0x40001,0x40001,0x40001,0x40001,0x40001,0x40001,0x40001,0x40001,0x80000,0x80000,0x80000,0x80000,0x80000, -0x80000,0xC0000,0xC0000,0xC0000,0x2000000,0x80000,0x80000,0x80000,0x80000,0x80000,0x80000,0xC0000,0xC0000,0xC0000,0x2000000,0xC0000,0xC0000,0xC0000,0x2000000,0x2000000,0xA040000,0x40001,0x40001,0x6040000,0x80000,0x80000,0x80000,0x80000,0x6040000,0x80000,0xC0000,0xC0000, -0xC0000,0x380001,0x380001,0x380001,0x380001,0x540000,0x540000,0x540000,0xAC0000,0xAC0000,0x1C000000,0x540000,0x540000,0x540000,0xAC0000,0xAC0000,0x1C000000,0xAC0000,0xAC0000,0x1C000000,0x1C000000,0x540000,0x540000,0x540000,0xAC0000,0xAC0000,0x1C000000,0xAC0000,0xAC0000,0x1C000000,0x1C000000,0xAC0000, -0xAC0000,0x1C000000,0x1C000000,0x1C000000,0x440000,0x63C0000,0x380001,0x24C0000,0x600000,0x780000,0x8C0000,0xD40000,0x480000,0x540000,0x780000,0x1C000000,0x780000,0x740000,0xAC0000,0x15C0000,0x38000001,0xAC0000,0x15C0000,0x38000001,0x15C0000,0x38000001,0x38000001,0xAC0000,0x15C0000,0x38000001,0x15C0000,0x38000001, -0x38000001,0x15C0000,0x38000001,0x38000001,0x38000001,0xAC0000,0x15C0000,0x38000001,0x15C0000,0x38000001,0x38000001,0x15C0000,0x38000001,0x38000001,0x38000001,0x15C0000,0x38000001,0x38000001,0x38000001,0x38000001,0x900000,0x7C0000,0x7C0000,0x2C00000,0x11C0000,0x9F00000,0x38000001,0x38000001,0x9C0000,0xD80000,0x2DEC0000,0x38000001, -0xF40000,0x2C088E,0x9A1401FE,0x501401FE,0x381401FF,0x7E0401AA,0x54040005,0x3C08003E,0x3E0401AA,0x3404004F,0x2A0401AA,0x5A0005EA,0x46000218,0x36000163,0x3A0002BD,0x3000010A,0x28000215,0x2C0005EA,0x26000383,0x240003DD,0x1C0005ED,0x40088E,0x40000401,0x3400028E,0x340003FA,0x2E0001F3,0x280002BE,0x2800069F,0x2600042C,0x20000463,0x1C000651,0x80088E, -0x200005EF,0x1C0005B2,0x1A000749,0x1600088E,0xF40000E9,0xF61C0486,0xFA24050F,0x74000111,0x5000011D,0x3C000103,0x34000096,0x2C00019A,0xA40002D1,0x5E0001D1,0x320002B1,0x20000463,0x5C088E,0x3C05EA,0x8E200152,0x4E200152,0x38200153,0x7A0801A6,0x54040001,0x3A10002A,0x3E0401A6,0x3404004B,0x2A0401A6,0x5805EA,0x46000218,0x36000163,0x3A0002BD,0x3000010A, -0x28000215,0xB005EA,0x26000383,0x240003DD,0x1C0005ED,0x5805EA,0x46000218,0x36000163,0x3A0002BD,0x3000010A,0x28000215,0xB005EA,0x26000383,0x240003DD,0x1C0005ED,0xB005EA,0x26000383,0x240003DD,0x1C0005ED,0x1C0005ED,0xF40000E9,0xFC280356,0xFE2C0353,0x74000111,0x5000011D,0x3C000103,0x34000096,0x2C00019A,0xA400026D,0x5E0001AD,0x320002A8,0x240003DD, -0x7C05EA,0x1401FE,0x1401FE,0x1401FE,0x1401FE,0x52040005,0x52040005,0x52040005,0x28040006,0x28040006,0x1C040005,0x30000152,0x30000152,0x30000152,0x22000059,0x22000059,0x1A000028,0x16000154,0x16000154,0x160000B4,0xE000154,0x2001FD,0x2001FD,0x2001FD,0x220000D2,0x220000D2,0x18000069,0x12000188,0x12000188,0x140000EA,0xE00016D,0x3C01FD, -0x3C01FD,0x10000149,0xA0001B5,0xA0001FD,0xB6000049,0xFE0C005E,0x1401FE,0x50000071,0x3C000061,0x2E000061,0x2A000049,0x2000007D,0x640000DD,0x440000AD,0x1E000156,0x140000EA,0x2C01FD,0x200152,0x200152,0x200152,0x200152,0x4E080001,0x4E080001,0x4E080001,0x28080001,0x28080001,0x1C040001,0x300152,0x300152,0x300152,0x22000059,0x22000059, -0x1A000028,0x5C0152,0x5C0152,0x160000B4,0xE000154,0x300152,0x300152,0x300152,0x22000059,0x22000059,0x1A000028,0x5C0152,0x5C0152,0x160000B4,0xE000154,0x5C0152,0x5C0152,0x160000B4,0xE000154,0xE000154,0xB6000049,0xFE0C003A,0x200152,0x50000071,0x3C000061,0x2E000061,0x2A000049,0x2000007D,0x640000B9,0x4400009D,0x400152,0x160000B4, -0x400152,0x5801A5,0x723C0001,0x463C0001,0x38380002,0x8001A5,0x54040000,0x38200001,0x10001A5,0x36000049,0x2A0001A5,0x8001A5,0x54040000,0x38200001,0x10001A5,0x36000049,0x2A0001A5,0x10001A5,0x36000049,0x2A0001A5,0x2A0001A5,0x8001A5,0x54040000,0x38200001,0x10001A5,0x36000049,0x2A0001A5,0x10001A5,0x36000049,0x2A0001A5,0x2A0001A5,0x10001A5, -0x36000049,0x2A0001A5,0x2A0001A5,0x2A0001A5,0xF6040055,0x5C01A5,0xFC480071,0x84000034,0x56000048,0x40000048,0x36000019,0x32000071,0xB4040091,0x6E000055,0x3C00000A,0x2A0001A5,0xB401A5,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x8000000,0x8000000,0x8000000,0x8000000,0x8000000, -0x8000000,0x4000000,0x4000000,0x4000000,0x2000000,0x40005,0x40005,0x40005,0x40005,0x40005,0x40005,0x2000002,0x2000002,0x2000002,0x2000001,0x5,0x5,0x5,0x2000004,0x5,0x28000000,0x40005,0x40005,0x12000000,0xC000000,0xA000000,0xA000000,0x6000000,0x12000001,0xC000001,0x4000000,0x2000002, -0x5,}; -static const uint32_t g_etc1_to_bc7_m6_table99[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x140001,0x140001,0x140001,0x140001,0x140001,0x140001,0x140001,0x140001,0x140001,0x140001,0x200000,0x200000,0x200000,0x200000,0x200000, -0x200000,0x3C0000,0x3C0000,0x3C0000,0xA000000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x3C0000,0x3C0000,0x3C0000,0xA000000,0x3C0000,0x3C0000,0x3C0000,0xA000000,0xA000000,0x180000,0x140001,0x140001,0x2180000,0x1C0000,0x1C0000,0x1C0000,0x240000,0x2180000,0x1C0000,0x2C0000,0x3C0000, -0x2C0000,0x480001,0x480001,0x480001,0x480001,0x6C0000,0x6C0000,0x6C0000,0xDC0000,0xDC0000,0x24000000,0x6C0000,0x6C0000,0x6C0000,0xDC0000,0xDC0000,0x24000000,0xDC0000,0xDC0000,0x24000000,0x24000000,0x6C0000,0x6C0000,0x6C0000,0xDC0000,0xDC0000,0x24000000,0xDC0000,0xDC0000,0x24000000,0x24000000,0xDC0000, -0xDC0000,0x24000000,0x24000000,0x24000000,0x2540000,0xE4C0000,0x480001,0x640000,0x7C0000,0x9C0000,0xB40000,0x1100000,0x5C0000,0x6C0000,0x9C0000,0x24000000,0x9C0000,0x840000,0xC40000,0x18C0000,0x40000001,0xC40000,0x18C0000,0x40000001,0x18C0000,0x40000001,0x40000001,0xC40000,0x18C0000,0x40000001,0x18C0000,0x40000001, -0x40000001,0x18C0000,0x40000001,0x40000001,0x40000001,0xC40000,0x18C0000,0x40000001,0x18C0000,0x40000001,0x40000001,0x18C0000,0x40000001,0x40000001,0x40000001,0x18C0000,0x40000001,0x40000001,0x40000001,0x40000001,0xA40000,0x8C0000,0x8C0000,0xDC0000,0x1400000,0x13F00000,0x40000001,0x40000001,0xB40000,0xF80000,0x35FC0000,0x40000001, -0x1180000,0x380A26,0xA62002D2,0x5A2002D2,0x402002D3,0x8E0C01E2,0x600C003F,0x461000A2,0x480C01E2,0x3C080073,0x320C01E2,0x720005EA,0x580001A8,0x40040159,0x4600024D,0x3A00007A,0x320001C9,0x380005EA,0x320002FB,0x2C000362,0x240005ED,0x540A26,0x4C000489,0x3C00031B,0x40000432,0x3A0001E3,0x2E0002BE,0x34000717,0x30000415,0x2A00043D,0x2400067D,0xA40A26, -0x260006D7,0x26000662,0x20000819,0x1C000A26,0xFA0400DE,0xFC2805A6,0xFE2C0687,0x90000086,0x6200008B,0x4C00007A,0x40000029,0x380000FA,0xC200025D,0x76000142,0x3C000233,0x2A00043D,0x740A26,0x4C05EA,0x96300152,0x56300152,0x40300153,0x821801A6,0x5C140001,0x4220002A,0x461401A6,0x3C14004B,0x321401A6,0x7005EA,0x580001A8,0x40080153,0x4600024D,0x3A00007A, -0x320001C9,0xE405EA,0x320002FB,0x2C000362,0x240005ED,0x7005EA,0x580001A8,0x40080153,0x4600024D,0x3A00007A,0x320001C9,0xE405EA,0x320002FB,0x2C000362,0x240005ED,0xE405EA,0x320002FB,0x2C000362,0x240005ED,0x240005ED,0xFC0800CE,0xF438037A,0xFA44035E,0x90000086,0x6200008B,0x4C00007A,0x40000029,0x380000FA,0xD00001C1,0x7C000105,0x3C000223,0x2C000362, -0xA005EA,0x2002D2,0x2002D2,0x2002D2,0x2002D2,0x620C003D,0x620C003D,0x620C003D,0x320C003E,0x320C003E,0x240C003D,0x48000152,0x48000152,0x48000152,0x34000025,0x34000025,0x24000001,0x22000152,0x22000152,0x1C000080,0x16000154,0x3002D2,0x3002D2,0x3002D2,0x280000FE,0x280000FE,0x2200007D,0x1E0001C8,0x1E0001C8,0x1C0000E4,0x16000194,0x5C02D2, -0x5C02D2,0x160001B5,0x14000228,0xE0002D5,0xF6000014,0xF21400F5,0x2002D2,0x7A000034,0x54000028,0x40000028,0x38000019,0x2A00003D,0x820000D5,0x5A000086,0x2C00015B,0x1C0000E4,0x4002D2,0x300152,0x300152,0x300152,0x300152,0x56180001,0x56180001,0x56180001,0x30180001,0x30180001,0x24140001,0x2440152,0x2440152,0x2440152,0x34000025,0x34000025, -0x24000001,0x8C0152,0x8C0152,0x1C000080,0x16000154,0x2440152,0x2440152,0x2440152,0x34000025,0x34000025,0x24000001,0x8C0152,0x8C0152,0x1C000080,0x16000154,0x8C0152,0x8C0152,0x1C000080,0x16000154,0x16000154,0xF6000014,0xF820003D,0x300152,0x7A000034,0x54000028,0x40000028,0x38000019,0x2A00003D,0x96000088,0x5A000062,0x640152,0x1C000080, -0x640152,0x6801A5,0x7A4C0001,0x4E4C0001,0x40480002,0x9801A5,0x5C140000,0x40300001,0x13001A5,0x3E000022,0x320001A5,0x9801A5,0x5C140000,0x40300001,0x13001A5,0x3E000022,0x320001A5,0x13001A5,0x3E000022,0x320001A5,0x320001A5,0x9801A5,0x5C140000,0x40300001,0x13001A5,0x3E000022,0x320001A5,0x13001A5,0x3E000022,0x320001A5,0x320001A5,0x13001A5, -0x3E000022,0x320001A5,0x320001A5,0x320001A5,0xFE140055,0x6C01A5,0xF4580082,0x9A000019,0x6A000028,0x4E000022,0x40000005,0x3A000055,0xD2040071,0x8600003A,0x44000001,0x320001A5,0xD801A5,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0xC003D,0x20000000,0x20000000,0x20000000,0x20000000,0x20000000, -0x20000000,0x10000000,0x10000000,0x10000000,0xA000000,0x10003D,0x10003D,0x10003D,0x10003D,0x10003D,0x10003D,0xC000014,0xC000014,0xC000014,0x800000D,0x18003D,0x18003D,0x18003D,0x8000028,0x400003D,0xA8000000,0xC003D,0xC003D,0x4A000000,0x34000000,0x28000000,0x28000000,0x1A000000,0x44000011,0x34000009,0x14000001,0xC000014, -0x14003D,}; -static const uint32_t g_etc1_to_bc7_m6_table100[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000, -0x3C0000,0x740000,0x740000,0x740000,0x12000001,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x740000,0x740000,0x740000,0x12000001,0x740000,0x740000,0x740000,0x12000001,0x12000001,0xC280000,0x280000,0x280000,0x42C0000,0x2300000,0x2340000,0x2340000,0x2400000,0x42C0000,0x2300000,0x540000,0x740000, -0x540000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x880000,0x880000,0x880000,0x1140000,0x1140000,0x2C000001,0x880000,0x880000,0x880000,0x1140000,0x1140000,0x2C000001,0x1140000,0x1140000,0x2C000001,0x2C000001,0x880000,0x880000,0x880000,0x1140000,0x1140000,0x2C000001,0x1140000,0x1140000,0x2C000001,0x2C000001,0x1140000, -0x1140000,0x2C000001,0x2C000001,0x2C000001,0x6680000,0x8600000,0x5C0000,0x7C0000,0x980000,0xC00000,0xE00000,0x1540000,0x740000,0x880000,0xC00000,0x2C000001,0xC00000,0x940001,0x2DC0000,0x1C40000,0x4A000000,0x2DC0000,0x1C40000,0x4A000000,0x1C40000,0x4A000000,0x4A000000,0x2DC0000,0x1C40000,0x4A000000,0x1C40000,0x4A000000, -0x4A000000,0x1C40000,0x4A000000,0x4A000000,0x4A000000,0x2DC0000,0x1C40000,0x4A000000,0x1C40000,0x4A000000,0x4A000000,0x1C40000,0x4A000000,0x4A000000,0x4A000000,0x1C40000,0x4A000000,0x4A000000,0x4A000000,0x4A000000,0xBC0000,0xA00000,0xA00000,0xFC0000,0x16C0000,0x1DFC0000,0x4A000000,0x4A000000,0xCC0000,0x1180000,0x3FF00000,0x4A000000, -0x13C0000,0x480C65,0xB62C0428,0x642C0428,0x4A2C0428,0xA014026D,0x6A1400D8,0x4C1C0165,0x5414026D,0x461000F0,0x3C14026D,0x8E0005EA,0x6A000163,0x4E080192,0x580001F6,0x4600001D,0x3C0001A5,0x440005ED,0x3E000284,0x38000301,0x2E0005EA,0x680C63,0x5E000594,0x48000438,0x4C0004BF,0x40000222,0x3A000313,0x400007BE,0x3A00041D,0x34000436,0x2E0006CB,0xD00C63, -0x32000818,0x2C00076D,0x2600094E,0x22000C63,0xFE140162,0xF43807A1,0xF840089D,0xB400001E,0x76000024,0x5A000018,0x4A000001,0x4200007A,0xF2000212,0x900000D7,0x460001D4,0x34000436,0x940C63,0x5C05ED,0x9E440154,0x5E440154,0x4A400154,0x8C2801A5,0x64280002,0x4A300029,0x4E2801A5,0x4424004C,0x3C2801A5,0x8C05EA,0x6A000163,0x4A180152,0x580001F6,0x4600001D, -0x3C0001A5,0x11805EA,0x3E000284,0x38000301,0x2E0005EA,0x8C05EA,0x6A000163,0x4A180152,0x580001F6,0x4600001D,0x3C0001A5,0x11805EA,0x3E000284,0x38000301,0x2E0005EA,0x11805EA,0x3E000284,0x38000301,0x2E0005EA,0x2E0005EA,0xFC1C00E5,0xFE4C0379,0xF2540384,0xB400001E,0x76000024,0x5A000018,0x4A000001,0x4200007A,0xF2000131,0x90000086,0x460001C4,0x38000301, -0xC805EA,0x2C0428,0x2C0428,0x2C0428,0x2C0428,0x761400C8,0x761400C8,0x761400C8,0x3E1400C8,0x3E1400C8,0x2C1400C9,0x64000152,0x64000152,0x64000152,0x40000005,0x40000005,0x2E04000E,0x30000152,0x30000152,0x26000055,0x20000152,0x400428,0x400428,0x400428,0x3400018E,0x3400018E,0x2A0000F1,0x2800022D,0x2800022D,0x240000FA,0x1E0001BE,0x800428, -0x800428,0x1C00028B,0x1C0002DB,0x1400042B,0xFC0C003E,0xF82001F1,0x2C0428,0xA400000D,0x6C000008,0x52000008,0x4C000000,0x38000019,0xB40000E3,0x76000072,0x3E000162,0x240000FA,0x5C0428,0x400154,0x400154,0x400154,0x400154,0x62280000,0x62280000,0x62280000,0x3A280000,0x3A280000,0x2C280001,0x600152,0x600152,0x600152,0x40000005,0x40000005, -0x2C100001,0xC40152,0xC40152,0x26000055,0x20000152,0x600152,0x600152,0x600152,0x40000005,0x40000005,0x2C100001,0xC40152,0xC40152,0x26000055,0x20000152,0xC40152,0xC40152,0x26000055,0x20000152,0x20000152,0xFE100012,0xF2340048,0x400154,0xA400000D,0x6C000008,0x52000008,0x4C000000,0x38000019,0xC2000055,0x7C000032,0x8C0152,0x26000055, -0x8C0152,0x7801A5,0x845C0000,0x585C0000,0x4A5C0000,0xB401A5,0x66240000,0x4A400000,0x16801A5,0x4800000D,0x3C0001A5,0xB401A5,0x66240000,0x4A400000,0x16801A5,0x4800000D,0x3C0001A5,0x16801A5,0x4800000D,0x3C0001A5,0x3C0001A5,0xB401A5,0x66240000,0x4A400000,0x16801A5,0x4800000D,0x3C0001A5,0x16801A5,0x4800000D,0x3C0001A5,0x3C0001A5,0x16801A5, -0x4800000D,0x3C0001A5,0x3C0001A5,0x3C0001A5,0xFE280062,0x8001A5,0xFE6C0080,0xB4000005,0x78000012,0x5A000008,0x4A080000,0x46000034,0xF6080055,0x98000019,0x4E100000,0x3C0001A5,0xFC01A5,0x1400C8,0x1400C8,0x1400C8,0x1400C8,0x1400C8,0x1400C8,0x1400C8,0x1400C8,0x1400C8,0x1400C8,0x3C000000,0x3C000000,0x3C000000,0x3C000000,0x3C000000, -0x3C000000,0x1C000001,0x1C000001,0x1C000001,0x12000001,0x1C00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x1C00C8,0x18000049,0x18000049,0x18000049,0x12000025,0x3800C8,0x3800C8,0x3800C8,0xE00007D,0x80000CA,0xFA040008,0x1400C8,0x1400C8,0x8A000000,0x60000000,0x4A000000,0x4A000000,0x30000000,0x7600003A,0x5600001D,0x24000004,0x18000049, -0x2800C8,}; -static const uint32_t g_etc1_to_bc7_m6_table101[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000, -0x2500000,0xA40000,0xA40000,0xA40000,0x1A000001,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000,0x2500000,0xA40000,0xA40000,0xA40000,0x1A000001,0xA40000,0xA40000,0xA40000,0x1A000001,0x1A000001,0x3C0000,0x380000,0x380000,0x400000,0x2440000,0x4C0000,0x4C0000,0x5C0000,0x400000,0x2440000,0x740000,0xA40000, -0x740000,0x6C0000,0x6C0000,0x6C0000,0x6C0000,0xA00000,0xA00000,0xA00000,0x1440000,0x1440000,0x34000001,0xA00000,0xA00000,0xA00000,0x1440000,0x1440000,0x34000001,0x1440000,0x1440000,0x34000001,0x34000001,0xA00000,0xA00000,0xA00000,0x1440000,0x1440000,0x34000001,0x1440000,0x1440000,0x34000001,0x34000001,0x1440000, -0x1440000,0x34000001,0x34000001,0x34000001,0x27C0000,0x740000,0x6C0000,0x2900000,0xB40000,0xE40000,0x1080000,0x1900000,0x880000,0xA00000,0xE40000,0x34000001,0xE40000,0xA40001,0x2F40000,0x1F40000,0x52000000,0x2F40000,0x1F40000,0x52000000,0x1F40000,0x52000000,0x52000000,0x2F40000,0x1F40000,0x52000000,0x1F40000,0x52000000, -0x52000000,0x1F40000,0x52000000,0x52000000,0x52000000,0x2F40000,0x1F40000,0x52000000,0x1F40000,0x52000000,0x52000000,0x1F40000,0x52000000,0x52000000,0x52000000,0x1F40000,0x52000000,0x52000000,0x52000000,0x52000000,0xD00000,0xB00000,0xB00000,0x3140000,0x1940000,0x27FC0000,0x52000000,0x52000000,0xE00000,0x1380000,0x49C40000,0x52000000, -0x1600000,0x540EC9,0xC23805B4,0x6C3805B5,0x523805B4,0xB21C032D,0x762001A4,0x56240261,0x5E1C032D,0x501801A4,0x441C032D,0xA60005EA,0x7C000153,0x581001FE,0x620001B9,0x50000005,0x440401B9,0x500005ED,0x4A000234,0x3E0002A5,0x360005EA,0x780EC7,0x680006C8,0x520005B3,0x5800057F,0x4C0002A2,0x400003A7,0x4C000876,0x46000455,0x3C00044E,0x34000717,0xF40EC7, -0x380009AC,0x320008B1,0x30000A97,0x28000EC7,0xFE20025D,0xFA440995,0xFE4C0AC9,0xCC000001,0x8A000004,0x66000003,0x5408001C,0x4E000030,0xFE000225,0xA60000A2,0x54000194,0x3C00044E,0xAC0EC7,0x6C05ED,0xA6540154,0x66540154,0x52500154,0x943801A5,0x6C380002,0x52400029,0x563801A5,0x4C34004C,0x443801A5,0xA405EA,0x78040153,0x52280152,0x620001B9,0x50000005, -0x441001A5,0x14C05EA,0x4A000234,0x3E0002A5,0x360005EA,0xA405EA,0x78040153,0x52280152,0x620001B9,0x50000005,0x441001A5,0x14C05EA,0x4A000234,0x3E0002A5,0x360005EA,0x14C05EA,0x4A000234,0x3E0002A5,0x360005EA,0x360005EA,0xFC3000F8,0xF65C039D,0xFA640384,0xCC000001,0x8A000004,0x66000003,0x52100001,0x4E000030,0xFE08010A,0xA8000035,0x5400017B,0x3E0002A5, -0xE805EA,0x3805B4,0x3805B4,0x3805B4,0x3805B4,0x861C0188,0x861C0188,0x861C0188,0x481C0188,0x481C0188,0x341C0189,0x7C000152,0x7C000152,0x7C000152,0x50000001,0x50000001,0x36080042,0x3C000152,0x3C000152,0x3200002D,0x28000152,0x5005B3,0x5005B3,0x5005B3,0x4000024E,0x4000024E,0x3400019B,0x340002A5,0x340002A5,0x2E000122,0x280001FB,0xA005B3, -0xA005B3,0x26000389,0x200003B6,0x1A0005B3,0xF81400D1,0xFE2C0329,0x3805B4,0xC6000001,0x84000001,0x64000001,0x5A040009,0x48000004,0xE4000105,0x9600006A,0x4C00016B,0x2E000122,0x7005B3,0x500154,0x500154,0x500154,0x500154,0x6A380000,0x6A380000,0x6A380000,0x42380000,0x42380000,0x34380001,0x780152,0x780152,0x780152,0x4C080001,0x4C080001, -0x34200001,0xF40152,0xF40152,0x3200002D,0x28000152,0x780152,0x780152,0x780152,0x4C080001,0x4C080001,0x34200001,0xF40152,0xF40152,0x3200002D,0x28000152,0xF40152,0xF40152,0x3200002D,0x28000152,0x28000152,0xFE200019,0xFA440048,0x500154,0xC4040001,0x80040000,0x62040000,0x54100000,0x48000004,0xF4000034,0x9A000012,0xAC0152,0x3200002D, -0xAC0152,0x8801A5,0x8C6C0000,0x606C0000,0x526C0000,0xC801A5,0x6E340000,0x52500000,0x19801A5,0x50000004,0x440001A5,0xC801A5,0x6E340000,0x52500000,0x19801A5,0x50000004,0x440001A5,0x19801A5,0x50000004,0x440001A5,0x440001A5,0xC801A5,0x6E340000,0x52500000,0x19801A5,0x50000004,0x440001A5,0x19801A5,0x50000004,0x440001A5,0x440001A5,0x19801A5, -0x50000004,0x440001A5,0x440001A5,0x440001A5,0xF6400071,0x9001A5,0xF67C0091,0xCC000000,0x8A000004,0x66000002,0x52180000,0x5000001D,0xFE180055,0xAE00000A,0x56200000,0x440001A5,0x12001A5,0x1C0188,0x1C0188,0x1C0188,0x1C0188,0x1C0188,0x1C0188,0x1C0188,0x1C0188,0x1C0188,0x1C0188,0x54000000,0x54000000,0x54000000,0x54000000,0x54000000, -0x54000000,0x28000000,0x28000000,0x28000000,0x1A000001,0x280188,0x280188,0x280188,0x280188,0x280188,0x280188,0x22000089,0x22000089,0x22000089,0x18000049,0x500188,0x500188,0x500188,0x100000F2,0xC00018A,0xFE0C0048,0x1C0188,0x1C0188,0xC4000000,0x88000000,0x68000000,0x68000000,0x44000000,0xA000007D,0x74000041,0x34000009,0x22000089, -0x380188,}; -static const uint32_t g_etc1_to_bc7_m6_table102[] = { -0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x240000, -0x240000,0x240000,0x240000,0x6000000,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xC0001,0xE0C0000,0xE0C0000,0xE0C0000,0x140000,0x1C0000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x2680000,0x2680000,0x2680000,0x2680000,0x2680000, -0x2680000,0xD80000,0xD80000,0xD80000,0x22000001,0x2680000,0x2680000,0x2680000,0x2680000,0x2680000,0x2680000,0xD80000,0xD80000,0xD80000,0x22000001,0xD80000,0xD80000,0xD80000,0x22000001,0x22000001,0x4C0000,0x480000,0x480000,0x540000,0x2580000,0x600000,0x600000,0x780000,0x540000,0x2580000,0x980000,0xD80000, -0x980000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0xB80000,0xB80000,0xB80000,0x1740000,0x1740000,0x3C000001,0xB80000,0xB80000,0xB80000,0x1740000,0x1740000,0x3C000001,0x1740000,0x1740000,0x3C000001,0x3C000001,0xB80000,0xB80000,0xB80000,0x1740000,0x1740000,0x3C000001,0x1740000,0x1740000,0x3C000001,0x3C000001,0x1740000, -0x1740000,0x3C000001,0x3C000001,0x3C000001,0x900000,0x840000,0x7C0000,0xA80000,0xD00000,0x1080000,0x12C0000,0x1CC0000,0x9C0000,0xB80000,0x1080000,0x3C000001,0x1080000,0xB40001,0x30C0000,0xBFC0000,0x5A000000,0x30C0000,0xBFC0000,0x5A000000,0xBFC0000,0x5A000000,0x5A000000,0x30C0000,0xBFC0000,0x5A000000,0xBFC0000,0x5A000000, -0x5A000000,0xBFC0000,0x5A000000,0x5A000000,0x5A000000,0x30C0000,0xBFC0000,0x5A000000,0xBFC0000,0x5A000000,0x5A000000,0xBFC0000,0x5A000000,0x5A000000,0x5A000000,0xBFC0000,0x5A000000,0x5A000000,0x5A000000,0x5A000000,0xE40000,0x8C00000,0x8C00000,0x1300000,0x1BC0000,0x31FC0000,0x5A000000,0x5A000000,0xF80000,0x1540000,0x51D40000,0x5A000000, -0x1800000,0x600F1E,0xCA4805ED,0x764405ED,0x5A4405ED,0xBA2C034A,0x7E3001C5,0x5E340286,0x662C034A,0x582801BD,0x4C2C034A,0xAE1005EB,0x84100154,0x6020020F,0x6C0801B6,0x58100006,0x4C1401BE,0x5A0C05EB,0x5204020D,0x4604028A,0x3E0C05EB,0x900F1A,0x7A000675,0x5A1005EA,0x680004BE,0x5800021D,0x4C000362,0x580007E9,0x4E00036B,0x46000371,0x3C00069F,0x1240F1A, -0x44000939,0x3E0007FE,0x38000A17,0x30000F1A,0xFE3402CD,0xFE4C0A1A,0xF65C0B55,0xD4100002,0x920C0004,0x6E100004,0x5C180025,0x560C0023,0xFE100262,0xBC000021,0x5E000159,0x46000371,0xD00F1A,0x7C05ED,0xAE640154,0x6E640154,0x5A600154,0x9C4801A5,0x74480002,0x5A500029,0x5E4801A5,0x5444004C,0x4C4801A5,0xBC05EA,0x80140153,0x5A380152,0x700001A6,0x58100005, -0x4C2001A5,0x17C05EA,0x500001E8,0x48000266,0x3E0005EA,0xBC05EA,0x80140153,0x5A380152,0x700001A6,0x58100005,0x4C2001A5,0x17C05EA,0x500001E8,0x48000266,0x3E0005EA,0x17C05EA,0x500001E8,0x48000266,0x3E0005EA,0x3E0005EA,0xFE3C0121,0xFE6C039D,0xF27403AD,0xD4100001,0x920C0003,0x6E100003,0x5A200001,0x5800000D,0xFE180129,0xBC000008,0x5E000158,0x48000266, -0x10C05EA,0x4405ED,0x4405ED,0x4405ED,0x4405ED,0x8E2C01A5,0x8E2C01A5,0x8E2C01A5,0x502C01A5,0x502C01A5,0x3C2C01A6,0x84100153,0x84100153,0x84100153,0x58100002,0x58100002,0x3E18004B,0x44100153,0x44100153,0x380C002A,0x300C0153,0x6805EA,0x6805EA,0x6805EA,0x52000205,0x52000205,0x3C0401A6,0x40000248,0x40000248,0x3A0000A9,0x2E00019A,0xD005EA, -0xD005EA,0x2C000356,0x2A00037E,0x220005EA,0xFE2000FA,0xF63C0379,0x4405ED,0xD80C0001,0x8C100002,0x6C100002,0x6410000B,0x500C0002,0xFE0000B5,0xB6000015,0x5A040153,0x3A0000A9,0x9405EA,0x600154,0x600154,0x600154,0x600154,0x72480000,0x72480000,0x72480000,0x4A480000,0x4A480000,0x3C480001,0x900152,0x900152,0x900152,0x54180001,0x54180001, -0x3C300001,0x1240152,0x1240152,0x3A000019,0x30000152,0x900152,0x900152,0x900152,0x54180001,0x54180001,0x3C300001,0x1240152,0x1240152,0x3A000019,0x30000152,0x1240152,0x1240152,0x3A000019,0x30000152,0x30000152,0xFA340020,0xF2540055,0x600154,0xD80C0000,0x88140000,0x6A140000,0x5C200000,0x52080000,0xFE0C0032,0xBC000004,0xD00152,0x3A000019, -0xD00152,0x9801A5,0x947C0000,0x687C0000,0x5A7C0000,0xE001A5,0x76440000,0x5A600000,0x1CC01A5,0x5A040000,0x4C0001A5,0xE001A5,0x76440000,0x5A600000,0x1CC01A5,0x5A040000,0x4C0001A5,0x1CC01A5,0x5A040000,0x4C0001A5,0x4C0001A5,0xE001A5,0x76440000,0x5A600000,0x1CC01A5,0x5A040000,0x4C0001A5,0x1CC01A5,0x5A040000,0x4C0001A5,0x4C0001A5,0x1CC01A5, -0x5A040000,0x4C0001A5,0x4C0001A5,0x4C0001A5,0xFE500071,0xA401A5,0xFE8C0091,0xD4100000,0x98000000,0x70080000,0x5A280000,0x5800000D,0xFE2C0062,0xBC040002,0x5E300000,0x4C0001A5,0x14001A5,0x2C01A5,0x2C01A5,0x2C01A5,0x2C01A5,0x2C01A5,0x2C01A5,0x2C01A5,0x2C01A5,0x2C01A5,0x2C01A5,0x5C100001,0x5C100001,0x5C100001,0x5C100001,0x5C100001, -0x5C100001,0x30100001,0x30100001,0x30100001,0x220C0002,0x4001A5,0x4001A5,0x4001A5,0x4001A5,0x4001A5,0x4001A5,0x2E000050,0x2E000050,0x2E000050,0x22000011,0x7C01A5,0x7C01A5,0x7C01A5,0x1C0000C1,0x140001A5,0xF61C0062,0x2C01A5,0x2C01A5,0xCC100001,0x90100001,0x70100001,0x70100001,0x4C100001,0xE0000041,0xA400000D,0x40080001,0x2E000050, -0x5801A5,}; -static const uint32_t g_etc1_to_bc7_m6_table103[] = { -0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x580000, -0x580000,0x580000,0x580000,0xE000000,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x200000,0x200000,0x200000,0x2C0000,0x3C0000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x2800000,0x2800000,0x2800000,0x2800000,0x2800000, -0x2800000,0x1080000,0x1080000,0x1080000,0x2A000001,0x2800000,0x2800000,0x2800000,0x2800000,0x2800000,0x2800000,0x1080000,0x1080000,0x1080000,0x2A000001,0x1080000,0x1080000,0x1080000,0x2A000001,0x2A000001,0x65C0000,0x580000,0x580000,0x4640000,0x26C0000,0x780000,0x780000,0x940000,0x4640000,0x26C0000,0xB80000,0x1080000, -0xB80000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0xD00000,0xD00000,0xD00000,0x1A40000,0x1A40000,0x44000001,0xD00000,0xD00000,0xD00000,0x1A40000,0x1A40000,0x44000001,0x1A40000,0x1A40000,0x44000001,0x44000001,0xD00000,0xD00000,0xD00000,0x1A40000,0x1A40000,0x44000001,0x1A40000,0x1A40000,0x44000001,0x44000001,0x1A40000, -0x1A40000,0x44000001,0x44000001,0x44000001,0x6A00000,0x2940000,0x8C0000,0x2BC0000,0xEC0000,0x1280000,0x1540000,0x3FC0000,0xB00000,0xD00000,0x1280000,0x44000001,0x1280000,0xC40001,0x3240000,0x17FC0000,0x62000000,0x3240000,0x17FC0000,0x62000000,0x17FC0000,0x62000000,0x62000000,0x3240000,0x17FC0000,0x62000000,0x17FC0000,0x62000000, -0x62000000,0x17FC0000,0x62000000,0x62000000,0x62000000,0x3240000,0x17FC0000,0x62000000,0x17FC0000,0x62000000,0x62000000,0x17FC0000,0x62000000,0x62000000,0x62000000,0x17FC0000,0x62000000,0x62000000,0x62000000,0x62000000,0xF80000,0xD40000,0xD40000,0x14C0000,0x1E40000,0x3BFC0000,0x62000000,0x62000000,0x10C0000,0x1740000,0x59E40000,0x62000000, -0x1A40000,0x700F1E,0xD25805ED,0x7E5405ED,0x625405ED,0xC23C034A,0x864001C5,0x66440286,0x6E3C034A,0x603801BD,0x543C034A,0xB62005EB,0x8C200154,0x6830020F,0x741801B6,0x60200006,0x542401BE,0x621C05EB,0x5A14020D,0x4E14028A,0x461C05EB,0xA80F1A,0x8C000615,0x622005EA,0x74000416,0x620001BA,0x5404034A,0x6200073B,0x5800028C,0x4E0002AB,0x4600062A,0x1580F1A, -0x50000899,0x4A000746,0x3E000983,0x38000F1A,0xFE440322,0xFA640A2E,0xFE6C0B55,0xDC200002,0x9A1C0004,0x76200004,0x64280025,0x5E1C0023,0xFE1C02BE,0xD0040000,0x66100159,0x4E0002AB,0xF00F1A,0x8C05ED,0xB6740154,0x76740154,0x62700154,0xA45801A5,0x7C580002,0x62600029,0x665801A5,0x5C54004C,0x545801A5,0x2D005EA,0x88240153,0x62480152,0x7A0C01A5,0x60200005, -0x543001A5,0x1AC05EA,0x5C0001A8,0x50000221,0x460005EA,0x2D005EA,0x88240153,0x62480152,0x7A0C01A5,0x60200005,0x543001A5,0x1AC05EA,0x5C0001A8,0x50000221,0x460005EA,0x1AC05EA,0x5C0001A8,0x50000221,0x460005EA,0x460005EA,0xFE500132,0xF67C03C5,0xFA8403AD,0xDC200001,0x9A1C0003,0x76200003,0x62300001,0x6008000C,0xFC300155,0xD0040000,0x68040152,0x50000221, -0x12C05EA,0x5405ED,0x5405ED,0x5405ED,0x5405ED,0x963C01A5,0x963C01A5,0x963C01A5,0x583C01A5,0x583C01A5,0x443C01A6,0x8C200153,0x8C200153,0x8C200153,0x60200002,0x60200002,0x4628004B,0x4C200153,0x4C200153,0x401C002A,0x381C0153,0x8005EA,0x8005EA,0x8005EA,0x620001BA,0x620001BA,0x441401A6,0x520001E4,0x520001E4,0x4200003B,0x38000162,0x10005EA, -0x10005EA,0x380002DE,0x32000303,0x2A0005EA,0xFC380111,0xFE4C0379,0x5405ED,0xE01C0001,0x94200002,0x74200002,0x6C20000B,0x581C0002,0xFE1400C8,0xD0040000,0x62140153,0x4200003B,0xB405EA,0x700154,0x700154,0x700154,0x700154,0x7A580000,0x7A580000,0x7A580000,0x52580000,0x52580000,0x44580001,0xA80152,0xA80152,0xA80152,0x5C280001,0x5C280001, -0x44400001,0x1580152,0x1580152,0x44000005,0x38000152,0xA80152,0xA80152,0xA80152,0x5C280001,0x5C280001,0x44400001,0x1580152,0x1580152,0x44000005,0x38000152,0x1580152,0x1580152,0x44000005,0x38000152,0x38000152,0xF6480029,0xFA640055,0x700154,0xE01C0000,0x90240000,0x72240000,0x64300000,0x5A180000,0xF624003D,0xD0040000,0xF00152,0x44000005, -0xF00152,0xA801A5,0x9C8C0000,0x708C0000,0x628C0000,0xF801A5,0x7E540000,0x62700000,0x1FC01A5,0x62140000,0x540001A5,0xF801A5,0x7E540000,0x62700000,0x1FC01A5,0x62140000,0x540001A5,0x1FC01A5,0x62140000,0x540001A5,0x540001A5,0xF801A5,0x7E540000,0x62700000,0x1FC01A5,0x62140000,0x540001A5,0x1FC01A5,0x62140000,0x540001A5,0x540001A5,0x1FC01A5, -0x62140000,0x540001A5,0x540001A5,0x540001A5,0xFA640080,0xB401A5,0xF69C00A4,0xDC200000,0xA0100000,0x78180000,0x62380000,0x60000005,0xF4480071,0xD0040000,0x66400000,0x540001A5,0x16401A5,0x3C01A5,0x3C01A5,0x3C01A5,0x3C01A5,0x3C01A5,0x3C01A5,0x3C01A5,0x3C01A5,0x3C01A5,0x3C01A5,0x64200001,0x64200001,0x64200001,0x64200001,0x64200001, -0x64200001,0x38200001,0x38200001,0x38200001,0x2A1C0002,0x5401A5,0x5401A5,0x5401A5,0x5401A5,0x5401A5,0x5401A5,0x3A000020,0x3A000020,0x3A000020,0x2A040001,0xAC01A5,0xAC01A5,0xAC01A5,0x26000092,0x1C0001A5,0xFE2C0062,0x3C01A5,0x3C01A5,0xD4200001,0x98200001,0x78200001,0x78200001,0x54200001,0xFA080029,0xCE040000,0x48180001,0x3A000020, -0x7801A5,}; -static const uint32_t g_etc1_to_bc7_m6_table104[] = { -0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x8C0000, -0x8C0000,0x8C0000,0x8C0000,0x16000001,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x340000,0x340000,0x340000,0x2440000,0x640000,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x680001,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000, -0x9C0000,0x13C0000,0x13C0000,0x13C0000,0x34000000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x9C0000,0x13C0000,0x13C0000,0x13C0000,0x34000000,0x13C0000,0x13C0000,0x13C0000,0x34000000,0x34000000,0x700000,0x680001,0x680001,0x7C0000,0x840000,0x900000,0x900000,0xB00000,0x7C0000,0x840000,0xE00000,0x13C0000, -0xE00000,0x9C0001,0x9C0001,0x9C0001,0x9C0001,0x2E80000,0x2E80000,0x2E80000,0x1DC0000,0x1DC0000,0x4E000000,0x2E80000,0x2E80000,0x2E80000,0x1DC0000,0x1DC0000,0x4E000000,0x1DC0000,0x1DC0000,0x4E000000,0x4E000000,0x2E80000,0x2E80000,0x2E80000,0x1DC0000,0x1DC0000,0x4E000000,0x1DC0000,0x1DC0000,0x4E000000,0x4E000000,0x1DC0000, -0x1DC0000,0x4E000000,0x4E000000,0x4E000000,0xB80000,0xA80000,0x9C0001,0x2D40000,0x1080000,0x1500000,0x1800000,0x11F40000,0x2C40000,0x2E80000,0x1500000,0x4E000000,0x1500000,0xD80000,0x1400000,0x25F80000,0x6A000001,0x1400000,0x25F80000,0x6A000001,0x25F80000,0x6A000001,0x6A000001,0x1400000,0x25F80000,0x6A000001,0x25F80000,0x6A000001, -0x6A000001,0x25F80000,0x6A000001,0x6A000001,0x6A000001,0x1400000,0x25F80000,0x6A000001,0x25F80000,0x6A000001,0x6A000001,0x25F80000,0x6A000001,0x6A000001,0x6A000001,0x25F80000,0x6A000001,0x6A000001,0x6A000001,0x6A000001,0x50C0000,0xAE40000,0xAE40000,0x16C0000,0x9F80000,0x47F80000,0x6A000001,0x6A000001,0x3240000,0x1940000,0x63D80000,0x6A000001, -0x1CC0000,0x840F1A,0xDE6805EA,0x866805EA,0x6A6805EB,0xCC4C034A,0x8E5001C3,0x6E54028A,0x784C034A,0x684C01BE,0x5C4C034A,0xC23005EA,0x96300153,0x7040020D,0x7E2C01B2,0x6A300006,0x5E3401BD,0x6A3005EA,0x6224020F,0x58280286,0x4E3005ED,0xC40F1A,0x9E0005EB,0x6C3005EB,0x86000392,0x6C0801B6,0x5C18034A,0x740006B1,0x640001DC,0x5A000215,0x4E0005F1,0x18C0F1A, -0x5C0007F7,0x50000662,0x4A0008D9,0x40000F1E,0xFE580372,0xF4780A82,0xF67C0B9A,0xE6300000,0xA2300003,0x80300002,0x6C380023,0x662C0025,0xFE3402F6,0xD6180002,0x7024015B,0x5A000215,0x1180F1A,0xA005EA,0xC0840152,0x80840152,0x6A840153,0xAC6C01A6,0x86680001,0x6C74002A,0x706801A6,0x6668004B,0x5C6801A6,0xEC05EA,0x90380153,0x6A5C0153,0x841C01A5,0x6A340002, -0x5C4401A5,0x1E405EA,0x6600017D,0x5A0001F1,0x4E0005ED,0xEC05EA,0x90380153,0x6A5C0153,0x841C01A5,0x6A340002,0x5C4401A5,0x1E405EA,0x6600017D,0x5A0001F1,0x4E0005ED,0x1E405EA,0x6600017D,0x5A0001F1,0x4E0005ED,0x4E0005ED,0xFE640164,0xFE8C03CE,0xF49803D3,0xE6300000,0xA2300003,0x80300002,0x6C440002,0x6A1C000B,0xFE440171,0xD6180001,0x72140152,0x5A0001F1, -0x15405EA,0x6805EA,0x6805EA,0x6805EA,0x6805EA,0xA24C01A5,0xA24C01A5,0xA24C01A5,0x624C01A5,0x624C01A5,0x4E4C01A5,0x96300152,0x96300152,0x96300152,0x68300005,0x68300005,0x503C004C,0x56300152,0x56300152,0x4A300029,0x40300154,0x29805EA,0x29805EA,0x29805EA,0x740001A5,0x740001A5,0x4E2401A5,0x5E00018B,0x5E00018B,0x4E000004,0x40080154,0x13805EA, -0x13805EA,0x44000279,0x3C0002A5,0x320005ED,0xFC480129,0xF860039D,0x6805EA,0xE6300000,0xA0300002,0x80300002,0x7634000C,0x62300001,0xFE2800F2,0xD6180001,0x6C240152,0x4E000004,0xDC05EA,0x840152,0x840152,0x840152,0x840152,0x806C0001,0x806C0001,0x806C0001,0x5A6C0001,0x5A6C0001,0x4E680001,0xC40152,0xC40152,0xC40152,0x643C0001,0x643C0001, -0x4E500000,0x18C0152,0x18C0152,0x4E000000,0x40000154,0xC40152,0xC40152,0xC40152,0x643C0001,0x643C0001,0x4E500000,0x18C0152,0x18C0152,0x4E000000,0x40000154,0x18C0152,0x18C0152,0x4E000000,0x40000154,0x40000154,0xFE580029,0xF4780062,0x840152,0xE6300000,0x98380000,0x7A380001,0x6E400000,0x622C0000,0xFE34003D,0xD6180000,0x1180152,0x4E000000, -0x1180152,0xBC01A5,0xA4A00001,0x78A00001,0x6A9C0002,0x11401A5,0x86680000,0x6A840001,0xFF801A5,0x6A2C0001,0x5C0001A5,0x11401A5,0x86680000,0x6A840001,0xFF801A5,0x6A2C0001,0x5C0001A5,0xFF801A5,0x6A2C0001,0x5C0001A5,0x5C0001A5,0x11401A5,0x86680000,0x6A840001,0xFF801A5,0x6A2C0001,0x5C0001A5,0xFF801A5,0x6A2C0001,0x5C0001A5,0x5C0001A5,0xFF801A5, -0x6A2C0001,0x5C0001A5,0x5C0001A5,0x5C0001A5,0xFE780080,0xC801A5,0xFEAC00AA,0xE2340000,0xA8240000,0x82280000,0x6A4C0001,0x6A000001,0xFC580071,0xD8180000,0x70500000,0x5C0001A5,0x18C01A5,0x4C01A5,0x4C01A5,0x4C01A5,0x4C01A5,0x4C01A5,0x4C01A5,0x4C01A5,0x4C01A5,0x4C01A5,0x4C01A5,0x6E300000,0x6E300000,0x6E300000,0x6E300000,0x6E300000, -0x6E300000,0x42300000,0x42300000,0x42300000,0x34300000,0x7001A5,0x7001A5,0x7001A5,0x7001A5,0x7001A5,0x7001A5,0x4C000002,0x4C000002,0x4C000002,0x34140000,0xE401A5,0xE401A5,0xE401A5,0x30000061,0x260001A5,0xF8400071,0x4C01A5,0x4C01A5,0xE2300000,0xA4300000,0x84300000,0x84300000,0x5E300000,0xFC1C0032,0xCE180001,0x52280000,0x4C000002, -0xA001A5,}; -static const uint32_t g_etc1_to_bc7_m6_table105[] = { -0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0x25C0000,0xBC0000, -0xBC0000,0xBC0000,0xBC0000,0x1E000001,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x440000,0x440000,0x440000,0x25C0000,0x880000,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0x780001,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000, -0xB40000,0x1700000,0x1700000,0x1700000,0x3C000000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0x1700000,0x1700000,0x1700000,0x3C000000,0x1700000,0x1700000,0x1700000,0x3C000000,0x3C000000,0x8800000,0x780001,0x780001,0x28C0000,0x980000,0x2A40000,0x2A40000,0xCC0000,0x28C0000,0x980000,0x1000000,0x1700000, -0x1000000,0xAC0001,0xAC0001,0xAC0001,0xAC0001,0x3000000,0x3000000,0x3000000,0x5FC0000,0x5FC0000,0x56000000,0x3000000,0x3000000,0x3000000,0x5FC0000,0x5FC0000,0x56000000,0x5FC0000,0x5FC0000,0x56000000,0x56000000,0x3000000,0x3000000,0x3000000,0x5FC0000,0x5FC0000,0x56000000,0x5FC0000,0x5FC0000,0x56000000,0x56000000,0x5FC0000, -0x5FC0000,0x56000000,0x56000000,0x56000000,0x4C80000,0x4B80000,0xAC0001,0xEC0000,0x1240000,0x1700000,0x1A80000,0x1BF80000,0x2D80000,0x3000000,0x1700000,0x56000000,0x1700000,0xE80000,0x1580000,0x31F80000,0x72000001,0x1580000,0x31F80000,0x72000001,0x31F80000,0x72000001,0x72000001,0x1580000,0x31F80000,0x72000001,0x31F80000,0x72000001, -0x72000001,0x31F80000,0x72000001,0x72000001,0x72000001,0x1580000,0x31F80000,0x72000001,0x31F80000,0x72000001,0x72000001,0x31F80000,0x72000001,0x72000001,0x72000001,0x31F80000,0x72000001,0x72000001,0x72000001,0x72000001,0x5200000,0xF80000,0xF80000,0x1840000,0x15FC0000,0x51F80000,0x72000001,0x72000001,0x13C0000,0x1B40000,0x6BE80000,0x72000001, -0x1EC0000,0x940F1A,0xE67805EA,0x8E7805EA,0x727805EB,0xD45C034A,0x966001C3,0x7664028A,0x805C034A,0x705C01BE,0x645C034A,0xCA4005EA,0x9E400153,0x7850020D,0x863C01B2,0x72400006,0x664401BD,0x724005EA,0x6A34020F,0x60380286,0x564005ED,0xDC0F1A,0xA61005EB,0x744005EB,0x9200035A,0x741801B6,0x6428034A,0x80000651,0x7000017C,0x620001D5,0x580805ED,0x1BC0F1A, -0x66000786,0x5C0005BA,0x50000861,0x48000F1E,0xFE6403C8,0xFC880A82,0xFE8C0B9A,0xEE400000,0xAA400003,0x88400002,0x74480023,0x6E3C0025,0xFE440361,0xDE280002,0x7834015B,0x620001D5,0x1380F1A,0xB005EA,0xC8940152,0x88940152,0x72940153,0xB47C01A6,0x8E780001,0x7484002A,0x787801A6,0x6E78004B,0x647801A6,0x10405EA,0x98480153,0x726C0153,0x8C2C01A5,0x72440002, -0x645401A5,0x7FC05EA,0x70000163,0x620001D5,0x560005ED,0x10405EA,0x98480153,0x726C0153,0x8C2C01A5,0x72440002,0x645401A5,0x7FC05EA,0x70000163,0x620001D5,0x560005ED,0x7FC05EA,0x70000163,0x620001D5,0x560005ED,0x560005ED,0xFE780179,0xF8A003EA,0xFCA803D3,0xEE400000,0xAA400003,0x88400002,0x74540002,0x722C000B,0xFE5C0189,0xDE280001,0x7A240152,0x620001D5, -0x17405EA,0x7805EA,0x7805EA,0x7805EA,0x7805EA,0xAA5C01A5,0xAA5C01A5,0xAA5C01A5,0x6A5C01A5,0x6A5C01A5,0x565C01A5,0x9E400152,0x9E400152,0x9E400152,0x70400005,0x70400005,0x584C004C,0x5E400152,0x5E400152,0x52400029,0x48400154,0x2B005EA,0x2B005EA,0x2B005EA,0x7C1001A5,0x7C1001A5,0x563401A5,0x6A000163,0x6A000163,0x56080002,0x48180154,0x16805EA, -0x16805EA,0x4E000239,0x44000248,0x3A0005ED,0xFE580149,0xFE6C03A5,0x7805EA,0xEE400000,0xA8400002,0x88400002,0x7E44000C,0x6A400001,0xFE3C010A,0xDE280001,0x74340152,0x56080002,0xFC05EA,0x940152,0x940152,0x940152,0x940152,0x887C0001,0x887C0001,0x887C0001,0x627C0001,0x627C0001,0x56780001,0xDC0152,0xDC0152,0xDC0152,0x6C4C0001,0x6C4C0001, -0x56600000,0x1BC0152,0x1BC0152,0x56100000,0x48000154,0xDC0152,0xDC0152,0xDC0152,0x6C4C0001,0x6C4C0001,0x56600000,0x1BC0152,0x1BC0152,0x56100000,0x48000154,0x1BC0152,0x1BC0152,0x56100000,0x48000154,0x48000154,0xFA6C0032,0xFC880062,0x940152,0xEE400000,0xA0480000,0x82480001,0x76500000,0x6A3C0000,0xFA48004A,0xDE280000,0x1380152,0x56100000, -0x1380152,0xCC01A5,0xACB00001,0x80B00001,0x72AC0002,0x12C01A5,0x8E780000,0x72940001,0x1BF801A5,0x723C0001,0x640001A5,0x12C01A5,0x8E780000,0x72940001,0x1BF801A5,0x723C0001,0x640001A5,0x1BF801A5,0x723C0001,0x640001A5,0x640001A5,0x12C01A5,0x8E780000,0x72940001,0x1BF801A5,0x723C0001,0x640001A5,0x1BF801A5,0x723C0001,0x640001A5,0x640001A5,0x1BF801A5, -0x723C0001,0x640001A5,0x640001A5,0x640001A5,0xFA8C0091,0xD801A5,0xF8C000B5,0xEA440000,0xB0340000,0x8A380000,0x725C0001,0x72100001,0xFC6C0082,0xE0280000,0x78600000,0x640001A5,0x1AC01A5,0x5C01A5,0x5C01A5,0x5C01A5,0x5C01A5,0x5C01A5,0x5C01A5,0x5C01A5,0x5C01A5,0x5C01A5,0x5C01A5,0x76400000,0x76400000,0x76400000,0x76400000,0x76400000, -0x76400000,0x4A400000,0x4A400000,0x4A400000,0x3C400000,0x8801A5,0x8801A5,0x8801A5,0x8801A5,0x8801A5,0x8801A5,0x58080000,0x58080000,0x58080000,0x3C240000,0x11401A5,0x11401A5,0x11401A5,0x38000034,0x2E0001A5,0xFE4C0075,0x5C01A5,0x5C01A5,0xEA400000,0xAC400000,0x8C400000,0x8C400000,0x66400000,0xF830003D,0xD6280001,0x5A380000,0x58080000, -0xC001A5,}; -static const uint32_t g_etc1_to_bc7_m6_table106[] = { -0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0x2740000,0xF00000, -0xF00000,0xF00000,0xF00000,0x26000001,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x2540000,0x2540000,0x2540000,0x2740000,0xA80000,0x880001,0x880001,0x880001,0x880001,0x880001,0x880001,0x880001,0x880001,0x880001,0x880001,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000, -0xCC0000,0x1A00000,0x1A00000,0x1A00000,0x44000000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0x1A00000,0x1A00000,0x1A00000,0x44000000,0x1A00000,0x1A00000,0x1A00000,0x44000000,0x44000000,0x940000,0x880001,0x880001,0xA00000,0xAC0000,0xBC0000,0xBC0000,0xE80000,0xA00000,0xAC0000,0x1240000,0x1A00000, -0x1240000,0xBC0001,0xBC0001,0xBC0001,0xBC0001,0x3180000,0x3180000,0x3180000,0x11FC0000,0x11FC0000,0x5E000000,0x3180000,0x3180000,0x3180000,0x11FC0000,0x11FC0000,0x5E000000,0x11FC0000,0x11FC0000,0x5E000000,0x5E000000,0x3180000,0x3180000,0x3180000,0x11FC0000,0x11FC0000,0x5E000000,0x11FC0000,0x11FC0000,0x5E000000,0x5E000000,0x11FC0000, -0x11FC0000,0x5E000000,0x5E000000,0x5E000000,0xDC0000,0xCC80000,0xBC0001,0x3000000,0x1400000,0x1940000,0x1D00000,0x25FC0000,0x2EC0000,0x3180000,0x1940000,0x5E000000,0x1940000,0xF80000,0x1700000,0x3DF80000,0x7A000001,0x1700000,0x3DF80000,0x7A000001,0x3DF80000,0x7A000001,0x7A000001,0x1700000,0x3DF80000,0x7A000001,0x3DF80000,0x7A000001, -0x7A000001,0x3DF80000,0x7A000001,0x7A000001,0x7A000001,0x1700000,0x3DF80000,0x7A000001,0x3DF80000,0x7A000001,0x7A000001,0x3DF80000,0x7A000001,0x7A000001,0x7A000001,0x3DF80000,0x7A000001,0x7A000001,0x7A000001,0x7A000001,0x5340000,0x1080000,0x1080000,0x1A00000,0x23FC0000,0x5BF80000,0x7A000001,0x7A000001,0x1500000,0x1D00000,0x73F80000,0x7A000001, -0x9FC0000,0xA40F1A,0xEE8805EA,0x968805EA,0x7A8805EB,0xDC6C034A,0x9E7001C3,0x7E74028A,0x886C034A,0x786C01BE,0x6C6C034A,0xD25005EA,0xA6500153,0x8060020D,0x8E4C01B2,0x7A500006,0x6E5401BD,0x7A5005EA,0x7244020F,0x68480286,0x5E5005ED,0xF40F1A,0xAE2005EB,0x7C5005EB,0xA004034A,0x7C2801B6,0x6C38034A,0x8C000611,0x7A000155,0x6A0801C5,0x601805ED,0x1F00F1A, -0x6C000716,0x66000542,0x5C0007E9,0x50000F1E,0xFE780419,0xF4980ADA,0xF8A00BDB,0xF6500000,0xB2500003,0x90500002,0x7C580023,0x764C0025,0xFE5C03A2,0xE6380002,0x8044015B,0x6A0801C5,0x15C0F1A,0xC005EA,0xD0A40152,0x90A40152,0x7AA40153,0xBC8C01A6,0x96880001,0x7C94002A,0x808801A6,0x7688004B,0x6C8801A6,0x11C05EA,0xA0580153,0x7A7C0153,0x943C01A5,0x7A540002, -0x6C6401A5,0x13FC05EA,0x7A000155,0x6C0001B5,0x5E0005ED,0x11C05EA,0xA0580153,0x7A7C0153,0x943C01A5,0x7A540002,0x6C6401A5,0x13FC05EA,0x7A000155,0x6C0001B5,0x5E0005ED,0x13FC05EA,0x7A000155,0x6C0001B5,0x5E0005ED,0x5E0005ED,0xFC9001A9,0xFEAC03FE,0xF4B803FE,0xF6500000,0xB2500003,0x90500002,0x7C640002,0x7A3C000B,0xFE7001C3,0xE6380001,0x82340152,0x6C0001B5, -0x19805EA,0x8805EA,0x8805EA,0x8805EA,0x8805EA,0xB26C01A5,0xB26C01A5,0xB26C01A5,0x726C01A5,0x726C01A5,0x5E6C01A5,0xA6500152,0xA6500152,0xA6500152,0x78500005,0x78500005,0x605C004C,0x66500152,0x66500152,0x5A500029,0x50500154,0xC805EA,0xC805EA,0xC805EA,0x842001A5,0x842001A5,0x5E4401A5,0x78040153,0x78040153,0x5E180002,0x50280154,0x19805EA, -0x19805EA,0x560001FD,0x4E00020D,0x420005ED,0xFE680164,0xF88003C2,0x8805EA,0xF6500000,0xB0500002,0x90500002,0x8654000C,0x72500001,0xFC4C0123,0xE6380001,0x7C440152,0x5E180002,0x12005EA,0xA40152,0xA40152,0xA40152,0xA40152,0x908C0001,0x908C0001,0x908C0001,0x6A8C0001,0x6A8C0001,0x5E880001,0xF40152,0xF40152,0xF40152,0x745C0001,0x745C0001, -0x5E700000,0x1F00152,0x1F00152,0x5E200000,0x50000154,0xF40152,0xF40152,0xF40152,0x745C0001,0x745C0001,0x5E700000,0x1F00152,0x1F00152,0x5E200000,0x50000154,0x1F00152,0x1F00152,0x5E200000,0x50000154,0x50000154,0xF680003D,0xF4980071,0xA40152,0xF6500000,0xA8580000,0x8A580001,0x7E600000,0x724C0000,0xF2600055,0xE6380000,0x15C0152,0x5E200000, -0x15C0152,0xDC01A5,0xB4C00001,0x88C00001,0x7ABC0002,0x14401A5,0x96880000,0x7AA40001,0x27F801A5,0x7A4C0001,0x6C0001A5,0x14401A5,0x96880000,0x7AA40001,0x27F801A5,0x7A4C0001,0x6C0001A5,0x27F801A5,0x7A4C0001,0x6C0001A5,0x6C0001A5,0x14401A5,0x96880000,0x7AA40001,0x27F801A5,0x7A4C0001,0x6C0001A5,0x27F801A5,0x7A4C0001,0x6C0001A5,0x6C0001A5,0x27F801A5, -0x7A4C0001,0x6C0001A5,0x6C0001A5,0x6C0001A5,0xFAA000A2,0xE801A5,0xFECC00C1,0xF2540000,0xB8440000,0x92480000,0x7A6C0001,0x7A200001,0xF6880091,0xE8380000,0x80700000,0x6C0001A5,0x1D001A5,0x6C01A5,0x6C01A5,0x6C01A5,0x6C01A5,0x6C01A5,0x6C01A5,0x6C01A5,0x6C01A5,0x6C01A5,0x6C01A5,0x7E500000,0x7E500000,0x7E500000,0x7E500000,0x7E500000, -0x7E500000,0x52500000,0x52500000,0x52500000,0x44500000,0xA001A5,0xA001A5,0xA001A5,0xA001A5,0xA001A5,0xA001A5,0x60180000,0x60180000,0x60180000,0x44340000,0x14401A5,0x14401A5,0x14401A5,0x4200001D,0x360001A5,0xF8600080,0x6C01A5,0x6C01A5,0xF2500000,0xB4500000,0x94500000,0x94500000,0x6E500000,0xFE3C0041,0xDE380001,0x62480000,0x60180000, -0xE401A5,}; -static const uint32_t g_etc1_to_bc7_m6_table107[] = { -0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x1200000, -0x1200000,0x1200000,0x1200000,0x2E000001,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0xA640000,0xA640000,0xA640000,0x28C0000,0xCC0000,0x980001,0x980001,0x980001,0x980001,0x980001,0x980001,0x980001,0x980001,0x980001,0x980001,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000, -0xE40000,0x1D00000,0x1D00000,0x1D00000,0x4C000000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0x1D00000,0x1D00000,0x1D00000,0x4C000000,0x1D00000,0x1D00000,0x1D00000,0x4C000000,0x4C000000,0xA40000,0x980001,0x980001,0xB40000,0xC00000,0xD00000,0xD00000,0x3000000,0xB40000,0xC00000,0x1480000,0x1D00000, -0x1480000,0xCC0001,0xCC0001,0xCC0001,0xCC0001,0x3300000,0x3300000,0x3300000,0x1DFC0000,0x1DFC0000,0x66000000,0x3300000,0x3300000,0x3300000,0x1DFC0000,0x1DFC0000,0x66000000,0x1DFC0000,0x1DFC0000,0x66000000,0x66000000,0x3300000,0x3300000,0x3300000,0x1DFC0000,0x1DFC0000,0x66000000,0x1DFC0000,0x1DFC0000,0x66000000,0x66000000,0x1DFC0000, -0x1DFC0000,0x66000000,0x66000000,0x66000000,0xF00000,0xDC0000,0xCC0001,0x1180000,0x1580000,0x1B40000,0x1F80000,0x31F80000,0x3000000,0x3300000,0x1B40000,0x66000000,0x1B40000,0x1080000,0x1880000,0x49F80000,0x82000001,0x1880000,0x49F80000,0x82000001,0x49F80000,0x82000001,0x82000001,0x1880000,0x49F80000,0x82000001,0x49F80000,0x82000001, -0x82000001,0x49F80000,0x82000001,0x82000001,0x82000001,0x1880000,0x49F80000,0x82000001,0x49F80000,0x82000001,0x82000001,0x49F80000,0x82000001,0x82000001,0x82000001,0x49F80000,0x82000001,0x82000001,0x82000001,0x82000001,0x14C0000,0x5180000,0x5180000,0x1BC0000,0x31FC0000,0x65F80000,0x82000001,0x82000001,0x1680000,0x1F00000,0x7DCC0000,0x82000001, -0x19FC0000,0xB40F1A,0xF69805EA,0x9E9805EA,0x829805EB,0xE47C034A,0xA68001C3,0x8684028A,0x907C034A,0x807C01BE,0x747C034A,0xDA6005EA,0xAE600153,0x8870020D,0x965C01B2,0x82600006,0x766401BD,0x826005EA,0x7A54020F,0x70580286,0x666005ED,0x10C0F1A,0xB63005EB,0x846005EB,0xA814034A,0x843801B6,0x7448034A,0x980005F1,0x820C0154,0x721801C5,0x682805ED,0xBF80F1A, -0x780006AE,0x6C0004B6,0x64000795,0x58000F1E,0xFE8C046E,0xFCA80ADA,0xFEAC0BDF,0xFE600000,0xBA600003,0x98600002,0x84680023,0x7E5C0025,0xFE700403,0xEE480002,0x8854015B,0x721801C5,0x17C0F1A,0xD005EA,0xD8B40152,0x98B40152,0x82B40153,0xC49C01A6,0x9E980001,0x84A4002A,0x889801A6,0x7E98004B,0x749801A6,0x13405EA,0xA8680153,0x828C0153,0x9C4C01A5,0x82640002, -0x747401A5,0x1FF805EA,0x820C0153,0x740001A9,0x660005ED,0x13405EA,0xA8680153,0x828C0153,0x9C4C01A5,0x82640002,0x747401A5,0x1FF805EA,0x820C0153,0x740001A9,0x660005ED,0x1FF805EA,0x820C0153,0x740001A9,0x660005ED,0x660005ED,0xFEA001C8,0xFAC40412,0xFCC803FE,0xFE600000,0xBA600003,0x98600002,0x84740002,0x824C000B,0xFC8401E2,0xEE480001,0x8A440152,0x740001A9, -0x1B805EA,0x9805EA,0x9805EA,0x9805EA,0x9805EA,0xBA7C01A5,0xBA7C01A5,0xBA7C01A5,0x7A7C01A5,0x7A7C01A5,0x667C01A5,0xAE600152,0xAE600152,0xAE600152,0x80600005,0x80600005,0x686C004C,0x6E600152,0x6E600152,0x62600029,0x58600154,0xE005EA,0xE005EA,0xE005EA,0x8C3001A5,0x8C3001A5,0x665401A5,0x80140153,0x80140153,0x66280002,0x58380154,0x1CC05EA, -0x1CC05EA,0x600001D5,0x560001C8,0x4A0005ED,0xFA7C0191,0xFE8C03CE,0x9805EA,0xFE600000,0xB8600002,0x98600002,0x8E64000C,0x7A600001,0xFC600152,0xEE480001,0x84540152,0x66280002,0x14005EA,0xB40152,0xB40152,0xB40152,0xB40152,0x989C0001,0x989C0001,0x989C0001,0x729C0001,0x729C0001,0x66980001,0x10C0152,0x10C0152,0x10C0152,0x7C6C0001,0x7C6C0001, -0x66800000,0xBF80152,0xBF80152,0x66300000,0x58000154,0x10C0152,0x10C0152,0x10C0152,0x7C6C0001,0x7C6C0001,0x66800000,0xBF80152,0xBF80152,0x66300000,0x58000154,0xBF80152,0xBF80152,0x66300000,0x58000154,0x58000154,0xFE90003D,0xFCA80071,0xB40152,0xFE600000,0xB0680000,0x92680001,0x86700000,0x7A5C0000,0xFA700055,0xEE480000,0x17C0152,0x66300000, -0x17C0152,0xEC01A5,0xBCD00001,0x90D00001,0x82CC0002,0x15C01A5,0x9E980000,0x82B40001,0x33F801A5,0x825C0001,0x740001A5,0x15C01A5,0x9E980000,0x82B40001,0x33F801A5,0x825C0001,0x740001A5,0x33F801A5,0x825C0001,0x740001A5,0x740001A5,0x15C01A5,0x9E980000,0x82B40001,0x33F801A5,0x825C0001,0x740001A5,0x33F801A5,0x825C0001,0x740001A5,0x740001A5,0x33F801A5, -0x825C0001,0x740001A5,0x740001A5,0x740001A5,0xFCB000A4,0xFC01A5,0xF8E000CA,0xFA640000,0xC0540000,0x9A580000,0x827C0001,0x82300001,0xFE980091,0xF0480000,0x88800000,0x740001A5,0x1F001A5,0x7C01A5,0x7C01A5,0x7C01A5,0x7C01A5,0x7C01A5,0x7C01A5,0x7C01A5,0x7C01A5,0x7C01A5,0x7C01A5,0x86600000,0x86600000,0x86600000,0x86600000,0x86600000, -0x86600000,0x5A600000,0x5A600000,0x5A600000,0x4C600000,0xB801A5,0xB801A5,0xB801A5,0xB801A5,0xB801A5,0xB801A5,0x68280000,0x68280000,0x68280000,0x4C440000,0x17401A5,0x17401A5,0x17401A5,0x4A000008,0x3E0001A5,0xFE6C0088,0x7C01A5,0x7C01A5,0xFA600000,0xBC600000,0x9C600000,0x9C600000,0x76600000,0xFA50004A,0xE6480001,0x6A580000,0x68280000, -0x10801A5,}; -static const uint32_t g_etc1_to_bc7_m6_table108[] = { -0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0x1580000, -0x1580000,0x1580000,0x1580000,0x38000000,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x4780000,0x4780000,0x4780000,0xA80000,0xF00000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0x1000000,0x1000000,0x1000000,0x1000000,0x1000000, -0x1000000,0x5F80000,0x5F80000,0x5F80000,0x54000001,0x1000000,0x1000000,0x1000000,0x1000000,0x1000000,0x1000000,0x5F80000,0x5F80000,0x5F80000,0x54000001,0x5F80000,0x5F80000,0x5F80000,0x54000001,0x54000001,0xB80000,0xAC0000,0xAC0000,0xC80000,0xD80000,0x2E80000,0x2E80000,0x1200000,0xC80000,0xD80000,0x16C0000,0x5F80000, -0x16C0000,0xE00000,0xE00000,0xE00000,0xE00000,0x14C0000,0x14C0000,0x14C0000,0x2BF80000,0x2BF80000,0x6E000001,0x14C0000,0x14C0000,0x14C0000,0x2BF80000,0x2BF80000,0x6E000001,0x2BF80000,0x2BF80000,0x6E000001,0x6E000001,0x14C0000,0x14C0000,0x14C0000,0x2BF80000,0x2BF80000,0x6E000001,0x2BF80000,0x2BF80000,0x6E000001,0x6E000001,0x2BF80000, -0x2BF80000,0x6E000001,0x6E000001,0x6E000001,0x1040000,0xEEC0000,0xE00000,0x1300000,0x1780000,0x1DC0000,0xFFC0000,0x3DF80000,0x1180000,0x14C0000,0x1DC0000,0x6E000001,0x1DC0000,0x1180001,0x1A40000,0x57F80000,0x8C000000,0x1A40000,0x57F80000,0x8C000000,0x57F80000,0x8C000000,0x8C000000,0x1A40000,0x57F80000,0x8C000000,0x57F80000,0x8C000000, -0x8C000000,0x57F80000,0x8C000000,0x8C000000,0x8C000000,0x1A40000,0x57F80000,0x8C000000,0x57F80000,0x8C000000,0x8C000000,0x57F80000,0x8C000000,0x8C000000,0x8C000000,0x57F80000,0x8C000000,0x8C000000,0x8C000000,0x8C000000,0x3600000,0x12C0000,0x12C0000,0x1D80000,0x3FFC0000,0x71F40000,0x8C000000,0x8C000000,0x1800000,0xDFC0000,0x85FC0000,0x8C000000, -0x29FC0000,0xC40F1E,0xFCAC05ED,0xA8A805ED,0x8CA805ED,0xEC90034A,0xB09401C5,0x90980286,0x9890034A,0x8A8C01BD,0x7E90034A,0xE07405EB,0xB6740154,0x9284020F,0x9E6C01B6,0x8A740006,0x7E7801BE,0x8C7005EB,0x8468020D,0x7868028A,0x707005EB,0x3240F1A,0xC04005EB,0x8C7405EA,0xB224034A,0x8E4C01B2,0x7E58034A,0xA40C05EB,0x8C1C0153,0x7C2C01C3,0x703C05EA,0x17FC0F1A, -0x8400065A,0x7800043A,0x6C00071A,0x62000F1A,0xFEA004CC,0xF6BC0B32,0xF8C00C1E,0xFE74000E,0xC4700004,0xA0740004,0x8E7C0025,0x88700023,0xFE800477,0xFA580000,0x90640159,0x7C2C01C3,0x1A40F1A,0xE005ED,0xE0C80154,0xA0C80154,0x8CC40154,0xCEAC01A5,0xA6AC0002,0x8CB40029,0x90AC01A5,0x86A8004C,0x7EAC01A5,0x15005EA,0xB2780153,0x8C9C0152,0xA46001A5,0x8A740005, -0x7E8401A5,0x2DF805EA,0x8C180152,0x7E0401A5,0x700005EA,0x15005EA,0xB2780153,0x8C9C0152,0xA46001A5,0x8A740005,0x7E8401A5,0x2DF805EA,0x8C180152,0x7E0401A5,0x700005EA,0x2DF805EA,0x8C180152,0x7E0401A5,0x700005EA,0x700005EA,0xFEB401E6,0xF2D4043D,0xF6DC0428,0xFE780005,0xC4700003,0xA0740003,0x8C840001,0x8A5C000C,0xFE980202,0xFA580000,0x92580152,0x7E0401A5, -0x1E005EA,0xA805ED,0xA805ED,0xA805ED,0xA805ED,0xC09001A5,0xC09001A5,0xC09001A5,0x829001A5,0x829001A5,0x6E9001A6,0xB6740153,0xB6740153,0xB6740153,0x8A740002,0x8A740002,0x707C004B,0x76740153,0x76740153,0x6A70002A,0x62700153,0xFC05EA,0xFC05EA,0xFC05EA,0x964001A5,0x964001A5,0x6E6801A6,0x88280153,0x88280153,0x703C0001,0x62480152,0x3F805EA, -0x3F805EA,0x6C0001B2,0x6000019A,0x540005EA,0xFE9001AE,0xFAA403EA,0xA805ED,0xFE740005,0xBE740002,0x9E740002,0x9674000B,0x82700002,0xFE74016D,0xFA580000,0x8C680153,0x703C0001,0x16805EA,0xC40154,0xC40154,0xC40154,0xC40154,0xA4AC0000,0xA4AC0000,0xA4AC0000,0x7CAC0000,0x7CAC0000,0x6EAC0001,0x3240152,0x3240152,0x3240152,0x867C0001,0x867C0001, -0x6E940001,0x17FC0152,0x17FC0152,0x6E440001,0x62000152,0x3240152,0x3240152,0x3240152,0x867C0001,0x867C0001,0x6E940001,0x17FC0152,0x17FC0152,0x6E440001,0x62000152,0x17FC0152,0x17FC0152,0x6E440001,0x62000152,0x62000152,0xFEA0004A,0xF6BC0080,0xC40154,0xFE780001,0xBA780000,0x9C780000,0x8E840000,0x846C0000,0xF8880062,0xFA580000,0x1A40152,0x6E440001, -0x1A40152,0xFC01A5,0xC6E00000,0x9AE00000,0x8CE00000,0x17801A5,0xA8A80000,0x8CC40000,0x3FFC01A5,0x8C680000,0x7E0001A5,0x17801A5,0xA8A80000,0x8CC40000,0x3FFC01A5,0x8C680000,0x7E0001A5,0x3FFC01A5,0x8C680000,0x7E0001A5,0x7E0001A5,0x17801A5,0xA8A80000,0x8CC40000,0x3FFC01A5,0x8C680000,0x7E0001A5,0x3FFC01A5,0x8C680000,0x7E0001A5,0x7E0001A5,0x3FFC01A5, -0x8C680000,0x7E0001A5,0x7E0001A5,0x7E0001A5,0xFAC800B5,0x10C01A5,0xF2F400DD,0xFC7C0002,0xCA640000,0xA26C0000,0x8C8C0000,0x8C3C0000,0xFEAC00A4,0xFA580000,0x90940000,0x7E0001A5,0xDFC01A5,0x9001A5,0x9001A5,0x9001A5,0x9001A5,0x9001A5,0x9001A5,0x9001A5,0x9001A5,0x9001A5,0x9001A5,0x8E740001,0x8E740001,0x8E740001,0x8E740001,0x8E740001, -0x8E740001,0x62740001,0x62740001,0x62740001,0x54700002,0xD401A5,0xD401A5,0xD401A5,0xD401A5,0xD401A5,0xD401A5,0x703C0000,0x703C0000,0x703C0000,0x54580001,0x1AC01A5,0x1AC01A5,0x1AC01A5,0x54000001,0x460001A5,0xFA840091,0x9001A5,0x9001A5,0xFE740001,0xC2740001,0xA2740001,0xA2740001,0x7E740001,0xF8680055,0xF8580000,0x726C0001,0x703C0000, -0x12C01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table109[] = { -0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0x1880000, -0x1880000,0x1880000,0x1880000,0x40000000,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0x800001,0xC880000,0xC880000,0xC880000,0xC00000,0x1140000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000, -0x1180000,0x11F80000,0x11F80000,0x11F80000,0x5C000001,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x11F80000,0x11F80000,0x11F80000,0x5C000001,0x11F80000,0x11F80000,0x11F80000,0x5C000001,0x5C000001,0xC80000,0xBC0000,0xBC0000,0x4D80000,0xEC0000,0x1000000,0x1000000,0x13C0000,0x4D80000,0xEC0000,0x1900000,0x11F80000, -0x1900000,0xF00000,0xF00000,0xF00000,0xF00000,0x1640000,0x1640000,0x1640000,0x37F80000,0x37F80000,0x76000001,0x1640000,0x1640000,0x1640000,0x37F80000,0x37F80000,0x76000001,0x37F80000,0x37F80000,0x76000001,0x76000001,0x1640000,0x1640000,0x1640000,0x37F80000,0x37F80000,0x76000001,0x37F80000,0x37F80000,0x76000001,0x76000001,0x37F80000, -0x37F80000,0x76000001,0x76000001,0x76000001,0x1180000,0x1000000,0xF00000,0x3440000,0x1940000,0x1FC0000,0x1DF80000,0x47FC0000,0x12C0000,0x1640000,0x1FC0000,0x76000001,0x1FC0000,0x1280001,0x1BC0000,0x61FC0000,0x94000000,0x1BC0000,0x61FC0000,0x94000000,0x61FC0000,0x94000000,0x94000000,0x1BC0000,0x61FC0000,0x94000000,0x61FC0000,0x94000000, -0x94000000,0x61FC0000,0x94000000,0x94000000,0x94000000,0x1BC0000,0x61FC0000,0x94000000,0x61FC0000,0x94000000,0x94000000,0x61FC0000,0x94000000,0x94000000,0x94000000,0x61FC0000,0x94000000,0x94000000,0x94000000,0x94000000,0x3740000,0x73C0000,0x73C0000,0x1F40000,0x4DFC0000,0x7BF40000,0x94000000,0x94000000,0x1940000,0x1DFC0000,0x8FD00000,0x94000000, -0x39FC0000,0xD40F1E,0xFEBC05F1,0xB0B805ED,0x94B805ED,0xF4A0034A,0xB8A401C5,0x98A80286,0xA0A0034A,0x929C01BD,0x86A0034A,0xE88405EB,0xBE840154,0x9A94020F,0xA67C01B6,0x92840006,0x868801BE,0x948005EB,0x8C78020D,0x8078028A,0x788005EB,0x33C0F1A,0xC85005EB,0x948405EA,0xBA34034A,0x965C01B2,0x8668034A,0xAC1C05EB,0x942C0153,0x843C01C3,0x784C05EA,0x23FC0F1A, -0x8E000629,0x820003F2,0x760006D7,0x6A000F1A,0xFEB40537,0xFECC0B32,0xFECC0C36,0xFE880026,0xCC800004,0xA8840004,0x968C0025,0x90800023,0xFE9804C6,0xFC6C0006,0x98740159,0x843C01C3,0x1C80F1A,0xF005ED,0xE8D80154,0xA8D80154,0x94D40154,0xD6BC01A5,0xAEBC0002,0x94C40029,0x98BC01A5,0x8EB8004C,0x86BC01A5,0x16805EA,0xBA880153,0x94AC0152,0xAC7001A5,0x92840005, -0x869401A5,0x39F805EA,0x94280152,0x861401A5,0x780005EA,0x16805EA,0xBA880153,0x94AC0152,0xAC7001A5,0x92840005,0x869401A5,0x39F805EA,0x94280152,0x861401A5,0x780005EA,0x39F805EA,0x94280152,0x861401A5,0x780005EA,0x780005EA,0xFAC80226,0xFAE4043D,0xFEEC0428,0xFC900012,0xCC800003,0xA8840003,0x94940001,0x926C000C,0xFEAC0244,0xFE6C0002,0x9A680152,0x861401A5, -0x3FC05EA,0xB805ED,0xB805ED,0xB805ED,0xB805ED,0xC8A001A5,0xC8A001A5,0xC8A001A5,0x8AA001A5,0x8AA001A5,0x76A001A6,0xBE840153,0xBE840153,0xBE840153,0x92840002,0x92840002,0x788C004B,0x7E840153,0x7E840153,0x7280002A,0x6A800153,0x11405EA,0x11405EA,0x11405EA,0x9E5001A5,0x9E5001A5,0x767801A6,0x90380153,0x90380153,0x784C0001,0x6A580152,0xFF805EA, -0xFF805EA,0x760001A6,0x6800017E,0x5C0005EA,0xFEA001CB,0xFEAC040A,0xB805ED,0xFE88000D,0xC6840002,0xA6840002,0x9E84000B,0x8A800002,0xFE8001A3,0xFA6C0002,0x94780153,0x784C0001,0x18C05EA,0xD40154,0xD40154,0xD40154,0xD40154,0xACBC0000,0xACBC0000,0xACBC0000,0x84BC0000,0x84BC0000,0x76BC0001,0x33C0152,0x33C0152,0x33C0152,0x8E8C0001,0x8E8C0001, -0x76A40001,0x23FC0152,0x23FC0152,0x76540001,0x6A000152,0x33C0152,0x33C0152,0x33C0152,0x8E8C0001,0x8E8C0001,0x76A40001,0x23FC0152,0x23FC0152,0x76540001,0x6A000152,0x23FC0152,0x23FC0152,0x76540001,0x6A000152,0x6A000152,0xFAB40055,0xFECC0080,0xD40154,0xFE880004,0xC2880000,0xA4880000,0x96940000,0x8C7C0000,0xFE94006A,0xFA6C0001,0x1C80152,0x76540001, -0x1C80152,0x10C01A5,0xCEF00000,0xA2F00000,0x94F00000,0x19001A5,0xB0B80000,0x94D40000,0x4BFC01A5,0x94780000,0x860001A5,0x19001A5,0xB0B80000,0x94D40000,0x4BFC01A5,0x94780000,0x860001A5,0x4BFC01A5,0x94780000,0x860001A5,0x860001A5,0x19001A5,0xB0B80000,0x94D40000,0x4BFC01A5,0x94780000,0x860001A5,0x4BFC01A5,0x94780000,0x860001A5,0x860001A5,0x4BFC01A5, -0x94780000,0x860001A5,0x860001A5,0x860001A5,0xF2E000C8,0x12001A5,0xFB0400DD,0xFE940005,0xD2740000,0xAA7C0000,0x949C0000,0x944C0000,0xF4C800B5,0xFE6C0001,0x98A40000,0x860001A5,0x1DF801A5,0xA001A5,0xA001A5,0xA001A5,0xA001A5,0xA001A5,0xA001A5,0xA001A5,0xA001A5,0xA001A5,0xA001A5,0x96840001,0x96840001,0x96840001,0x96840001,0x96840001, -0x96840001,0x6A840001,0x6A840001,0x6A840001,0x5C800002,0xEC01A5,0xEC01A5,0xEC01A5,0xEC01A5,0xEC01A5,0xEC01A5,0x784C0000,0x784C0000,0x784C0000,0x5C680001,0x1DC01A5,0x1DC01A5,0x1DC01A5,0x5C100001,0x4E0001A5,0xF29400A2,0xA001A5,0xA001A5,0xFE840002,0xCA840001,0xAA840001,0xAA840001,0x86840001,0xFE740059,0xF86C0001,0x7A7C0001,0x784C0000, -0x15001A5,}; -static const uint32_t g_etc1_to_bc7_m6_table110[] = { -0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0x1B80000, -0x1B80000,0x1B80000,0x1B80000,0x48000000,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x900001,0x9C0000,0x9C0000,0x9C0000,0xD80000,0x1340000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0xCC0000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000, -0x1300000,0x1DF40000,0x1DF40000,0x1DF40000,0x64000001,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1DF40000,0x1DF40000,0x1DF40000,0x64000001,0x1DF40000,0x1DF40000,0x1DF40000,0x64000001,0x64000001,0x4D80000,0xCC0000,0xCC0000,0xEC0000,0x1000000,0x1140000,0x1140000,0x3540000,0xEC0000,0x1000000,0x1B00000,0x1DF40000, -0x1B00000,0x1000000,0x1000000,0x1000000,0x1000000,0x17C0000,0x17C0000,0x17C0000,0x43F80000,0x43F80000,0x7E000001,0x17C0000,0x17C0000,0x17C0000,0x43F80000,0x43F80000,0x7E000001,0x43F80000,0x43F80000,0x7E000001,0x7E000001,0x17C0000,0x17C0000,0x17C0000,0x43F80000,0x43F80000,0x7E000001,0x43F80000,0x43F80000,0x7E000001,0x7E000001,0x43F80000, -0x43F80000,0x7E000001,0x7E000001,0x7E000001,0x3280000,0x1100000,0x1000000,0x15C0000,0x3AC0000,0x11FC0000,0x29FC0000,0x53F80000,0x1400000,0x17C0000,0x11FC0000,0x7E000001,0x11FC0000,0x1380001,0x3D00000,0x6DFC0000,0x9C000000,0x3D00000,0x6DFC0000,0x9C000000,0x6DFC0000,0x9C000000,0x9C000000,0x3D00000,0x6DFC0000,0x9C000000,0x6DFC0000,0x9C000000, -0x9C000000,0x6DFC0000,0x9C000000,0x9C000000,0x9C000000,0x3D00000,0x6DFC0000,0x9C000000,0x6DFC0000,0x9C000000,0x9C000000,0x6DFC0000,0x9C000000,0x9C000000,0x9C000000,0x6DFC0000,0x9C000000,0x9C000000,0x9C000000,0x9C000000,0x3880000,0xF4C0000,0xF4C0000,0xFFC0000,0x5BFC0000,0x85F40000,0x9C000000,0x9C000000,0x1AC0000,0x2FFC0000,0x97E00000,0x9C000000, -0x47FC0000,0xE40F1E,0xFCCC0606,0xB8C805ED,0x9CC805ED,0xFCB0034A,0xC0B401C5,0xA0B80286,0xA8B0034A,0x9AAC01BD,0x8EB0034A,0xF09405EB,0xC6940154,0xA2A4020F,0xAE8C01B6,0x9A940006,0x8E9801BE,0x9C9005EB,0x9488020D,0x8888028A,0x809005EB,0x1540F1A,0xD06005EB,0x9C9405EA,0xC244034A,0x9E6C01B2,0x8E78034A,0xB42C05EB,0x9C3C0153,0x8C4C01C3,0x805C05EA,0x2FFC0F1A, -0x98000606,0x8A00039E,0x7E000686,0x72000F1A,0xFEC405AE,0xF6DC0B8E,0xF8E00C65,0xFE9C0054,0xD4900004,0xB0940004,0x9E9C0025,0x98900023,0xFCB00557,0xFE800014,0xA0840159,0x8C4C01C3,0x1E80F1A,0x10005ED,0xF0E80154,0xB0E80154,0x9CE40154,0xDECC01A5,0xB6CC0002,0x9CD40029,0xA0CC01A5,0x96C8004C,0x8ECC01A5,0x18005EA,0xC2980153,0x9CBC0152,0xB48001A5,0x9A940005, -0x8EA401A5,0x45F805EA,0x9C380152,0x8E2401A5,0x800005EA,0x18005EA,0xC2980153,0x9CBC0152,0xB48001A5,0x9A940005,0x8EA401A5,0x45F805EA,0x9C380152,0x8E2401A5,0x800005EA,0x45F805EA,0x9C380152,0x8E2401A5,0x800005EA,0x800005EA,0xFED8024B,0xF4F80465,0xF6FC0455,0xFEA40029,0xD4900003,0xB0940003,0x9CA40001,0x9A7C000C,0xFCC00269,0xFE840008,0xA2780152,0x8E2401A5, -0x13FC05EA,0xC805ED,0xC805ED,0xC805ED,0xC805ED,0xD0B001A5,0xD0B001A5,0xD0B001A5,0x92B001A5,0x92B001A5,0x7EB001A6,0xC6940153,0xC6940153,0xC6940153,0x9A940002,0x9A940002,0x809C004B,0x86940153,0x86940153,0x7A90002A,0x72900153,0x12C05EA,0x12C05EA,0x12C05EA,0xA66001A5,0xA66001A5,0x7E8801A6,0x98480153,0x98480153,0x805C0001,0x72680152,0x1BF805EA, -0x1BF805EA,0x7E0801A6,0x72000162,0x640005EA,0xFAB40206,0xFAC40411,0xC805ED,0xFE98001A,0xCE940002,0xAE940002,0xA694000B,0x92900002,0xFE9401BA,0xFC7C000B,0x9C880153,0x805C0001,0x1AC05EA,0xE40154,0xE40154,0xE40154,0xE40154,0xB4CC0000,0xB4CC0000,0xB4CC0000,0x8CCC0000,0x8CCC0000,0x7ECC0001,0x1540152,0x1540152,0x1540152,0x969C0001,0x969C0001, -0x7EB40001,0x2FFC0152,0x2FFC0152,0x7E640001,0x72000152,0x1540152,0x1540152,0x1540152,0x969C0001,0x969C0001,0x7EB40001,0x2FFC0152,0x2FFC0152,0x7E640001,0x72000152,0x2FFC0152,0x2FFC0152,0x7E640001,0x72000152,0x72000152,0xF6C80062,0xF6DC0091,0xE40154,0xFCA00008,0xCA980000,0xAC980000,0x9EA40000,0x948C0000,0xFAAC0071,0xFE800002,0x1E80152,0x7E640001, -0x1E80152,0x11C01A5,0xD7000000,0xAB000000,0x9D000000,0x1A801A5,0xB8C80000,0x9CE40000,0x57FC01A5,0x9C880000,0x8E0001A5,0x1A801A5,0xB8C80000,0x9CE40000,0x57FC01A5,0x9C880000,0x8E0001A5,0x57FC01A5,0x9C880000,0x8E0001A5,0x8E0001A5,0x1A801A5,0xB8C80000,0x9CE40000,0x57FC01A5,0x9C880000,0x8E0001A5,0x57FC01A5,0x9C880000,0x8E0001A5,0x8E0001A5,0x57FC01A5, -0x9C880000,0x8E0001A5,0x8E0001A5,0x8E0001A5,0xFAF000C8,0x13001A5,0xF31400F4,0xFCAC000D,0xDA840000,0xB28C0000,0x9CAC0000,0x9C5C0000,0xFCD800B5,0xFE880002,0xA0B40000,0x8E0001A5,0x2BFC01A5,0xB001A5,0xB001A5,0xB001A5,0xB001A5,0xB001A5,0xB001A5,0xB001A5,0xB001A5,0xB001A5,0xB001A5,0x9E940001,0x9E940001,0x9E940001,0x9E940001,0x9E940001, -0x9E940001,0x72940001,0x72940001,0x72940001,0x64900002,0x10401A5,0x10401A5,0x10401A5,0x10401A5,0x10401A5,0x10401A5,0x805C0000,0x805C0000,0x805C0000,0x64780001,0x5FC01A5,0x5FC01A5,0x5FC01A5,0x64200001,0x560001A5,0xFAA400A2,0xB001A5,0xB001A5,0xF894000A,0xD2940001,0xB2940001,0xB2940001,0x8E940001,0xFA880064,0xFC7C0002,0x828C0001,0x805C0000, -0x17001A5,}; -static const uint32_t g_etc1_to_bc7_m6_table111[] = { -0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000,0x1E80000, -0x1E80000,0x1E80000,0x1E80000,0x50000000,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xAC0000,0xAC0000,0xAC0000,0xF00000,0x1580000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0x3440000,0x3440000,0x3440000,0x3440000,0x3440000, -0x3440000,0x27FC0000,0x27FC0000,0x27FC0000,0x6C000001,0x3440000,0x3440000,0x3440000,0x3440000,0x3440000,0x3440000,0x27FC0000,0x27FC0000,0x27FC0000,0x6C000001,0x27FC0000,0x27FC0000,0x27FC0000,0x6C000001,0x6C000001,0xCE80000,0xDC0000,0xDC0000,0x1000000,0x1140000,0x12C0000,0x12C0000,0x1700000,0x1000000,0x1140000,0x1D40000,0x27FC0000, -0x1D40000,0x1100000,0x1100000,0x1100000,0x1100000,0x1940000,0x1940000,0x1940000,0x4FF80000,0x4FF80000,0x86000001,0x1940000,0x1940000,0x1940000,0x4FF80000,0x4FF80000,0x86000001,0x4FF80000,0x4FF80000,0x86000001,0x86000001,0x1940000,0x1940000,0x1940000,0x4FF80000,0x4FF80000,0x86000001,0x4FF80000,0x4FF80000,0x86000001,0x86000001,0x4FF80000, -0x4FF80000,0x86000001,0x86000001,0x86000001,0x13C0000,0x9200000,0x1100000,0x3700000,0x1C80000,0x1FFC0000,0x37FC0000,0x5DFC0000,0x1540000,0x1940000,0x1FFC0000,0x86000001,0x1FFC0000,0x1480001,0x3E80000,0x79FC0000,0xA4000000,0x3E80000,0x79FC0000,0xA4000000,0x79FC0000,0xA4000000,0xA4000000,0x3E80000,0x79FC0000,0xA4000000,0x79FC0000,0xA4000000, -0xA4000000,0x79FC0000,0xA4000000,0xA4000000,0xA4000000,0x3E80000,0x79FC0000,0xA4000000,0x79FC0000,0xA4000000,0xA4000000,0x79FC0000,0xA4000000,0xA4000000,0xA4000000,0x79FC0000,0xA4000000,0xA4000000,0xA4000000,0xA4000000,0x39C0000,0x1600000,0x1600000,0x23FC0000,0x69F80000,0x8FF40000,0xA4000000,0xA4000000,0x1C00000,0x41FC0000,0x9FF00000,0xA4000000, -0x57FC0000,0xF40F1E,0xFEDC061E,0xC0D805ED,0xA4D805ED,0xFCC00356,0xC8C401C5,0xA8C80286,0xB0C0034A,0xA2BC01BD,0x96C0034A,0xF8A405EB,0xCEA40154,0xAAB4020F,0xB69C01B6,0xA2A40006,0x96A801BE,0xA4A005EB,0x9C98020D,0x9098028A,0x88A005EB,0x16C0F1A,0xD87005EB,0xA4A405EA,0xCA54034A,0xA67C01B2,0x9688034A,0xBC3C05EB,0xA44C0153,0x945C01C3,0x886C05EA,0x3BFC0F1A, -0xA20005F1,0x94000376,0x8400065A,0x7A000F1A,0xFED80614,0xFEEC0B8E,0xFEEC0C81,0xFEB00093,0xDCA00004,0xB8A40004,0xA6AC0025,0xA0A00023,0xFEBC05A3,0xFE94003E,0xA8940159,0x945C01C3,0x7FC0F1A,0x11005ED,0xF8F80154,0xB8F80154,0xA4F40154,0xE6DC01A5,0xBEDC0002,0xA4E40029,0xA8DC01A5,0x9ED8004C,0x96DC01A5,0x19805EA,0xCAA80153,0xA4CC0152,0xBC9001A5,0xA2A40005, -0x96B401A5,0x51F805EA,0xA4480152,0x963401A5,0x880005EA,0x19805EA,0xCAA80153,0xA4CC0152,0xBC9001A5,0xA2A40005,0x96B401A5,0x51F805EA,0xA4480152,0x963401A5,0x880005EA,0x51F805EA,0xA4480152,0x963401A5,0x880005EA,0x880005EA,0xFEEC028B,0xFD080465,0xFF0C0455,0xFCBC0048,0xDCA00003,0xB8A40003,0xA4B40001,0xA28C000C,0xFCD80289,0xFE98001E,0xAA880152,0x963401A5, -0x21FC05EA,0xD805ED,0xD805ED,0xD805ED,0xD805ED,0xD8C001A5,0xD8C001A5,0xD8C001A5,0x9AC001A5,0x9AC001A5,0x86C001A6,0xCEA40153,0xCEA40153,0xCEA40153,0xA2A40002,0xA2A40002,0x88AC004B,0x8EA40153,0x8EA40153,0x82A0002A,0x7AA00153,0x14405EA,0x14405EA,0x14405EA,0xAE7001A5,0xAE7001A5,0x869801A6,0xA0580153,0xA0580153,0x886C0001,0x7A780152,0x27F805EA, -0x27F805EA,0x861801A6,0x7A000156,0x6C0005EA,0xFAC40225,0xFECC0439,0xD805ED,0xFEAC0032,0xD6A40002,0xB6A40002,0xAEA4000B,0x9AA00002,0xFAAC01E2,0xFE900015,0xA4980153,0x886C0001,0x1D005EA,0xF40154,0xF40154,0xF40154,0xF40154,0xBCDC0000,0xBCDC0000,0xBCDC0000,0x94DC0000,0x94DC0000,0x86DC0001,0x16C0152,0x16C0152,0x16C0152,0x9EAC0001,0x9EAC0001, -0x86C40001,0x3BFC0152,0x3BFC0152,0x86740001,0x7A000152,0x16C0152,0x16C0152,0x16C0152,0x9EAC0001,0x9EAC0001,0x86C40001,0x3BFC0152,0x3BFC0152,0x86740001,0x7A000152,0x3BFC0152,0x3BFC0152,0x86740001,0x7A000152,0x7A000152,0xFED80062,0xFEEC0091,0xF40154,0xFEB4000D,0xD2A80000,0xB4A80000,0xA6B40000,0x9C9C0000,0xFCC00080,0xFE980005,0x7FC0152,0x86740001, -0x7FC0152,0x12C01A5,0xDF100000,0xB3100000,0xA5100000,0x1C001A5,0xC0D80000,0xA4F40000,0x63FC01A5,0xA4980000,0x960001A5,0x1C001A5,0xC0D80000,0xA4F40000,0x63FC01A5,0xA4980000,0x960001A5,0x63FC01A5,0xA4980000,0x960001A5,0x960001A5,0x1C001A5,0xC0D80000,0xA4F40000,0x63FC01A5,0xA4980000,0x960001A5,0x63FC01A5,0xA4980000,0x960001A5,0x960001A5,0x63FC01A5, -0xA4980000,0x960001A5,0x960001A5,0x960001A5,0xF70400DD,0x14001A5,0xFB2400F4,0xFEC00014,0xE2940000,0xBA9C0000,0xA4BC0000,0xA46C0000,0xFCEC00CA,0xFEA0000A,0xA8C40000,0x960001A5,0x3BFC01A5,0xC001A5,0xC001A5,0xC001A5,0xC001A5,0xC001A5,0xC001A5,0xC001A5,0xC001A5,0xC001A5,0xC001A5,0xA6A40001,0xA6A40001,0xA6A40001,0xA6A40001,0xA6A40001, -0xA6A40001,0x7AA40001,0x7AA40001,0x7AA40001,0x6CA00002,0x11C01A5,0x11C01A5,0x11C01A5,0x11C01A5,0x11C01A5,0x11C01A5,0x886C0000,0x886C0000,0x886C0000,0x6C880001,0x11FC01A5,0x11FC01A5,0x11FC01A5,0x6C300001,0x5E0001A5,0xF2B400B5,0xC001A5,0xC001A5,0xFCA8000D,0xDAA40001,0xBAA40001,0xBAA40001,0x96A40001,0xFC9C0071,0xFC900005,0x8A9C0001,0x886C0000, -0x19401A5,}; -static const uint32_t g_etc1_to_bc7_m6_table112[] = { -0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0xBF80000, -0xBF80000,0xBF80000,0xBF80000,0x58000001,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xC00000,0xC00000,0xC00000,0x10C0000,0x17C0000,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0xEC0001,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000, -0x1600000,0x35FC0000,0x35FC0000,0x35FC0000,0x76000000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x35FC0000,0x35FC0000,0x35FC0000,0x76000000,0x35FC0000,0x35FC0000,0x35FC0000,0x76000000,0x76000000,0x6FC0000,0xEC0001,0xEC0001,0x1140000,0x3280000,0x1440000,0x1440000,0x1900000,0x1140000,0x3280000,0x1F80000,0x35FC0000, -0x1F80000,0x1200001,0x1200001,0x1200001,0x1200001,0x1B00000,0x1B00000,0x1B00000,0x5DF40000,0x5DF40000,0x90000000,0x1B00000,0x1B00000,0x1B00000,0x5DF40000,0x5DF40000,0x90000000,0x5DF40000,0x5DF40000,0x90000000,0x90000000,0x1B00000,0x1B00000,0x1B00000,0x5DF40000,0x5DF40000,0x90000000,0x5DF40000,0x5DF40000,0x90000000,0x90000000,0x5DF40000, -0x5DF40000,0x90000000,0x90000000,0x90000000,0x1500000,0x3340000,0x1200001,0x3880000,0x1E80000,0x31FC0000,0x47F80000,0x69FC0000,0x16C0000,0x1B00000,0x31FC0000,0x90000000,0x31FC0000,0x15C0000,0x9FC0000,0x87FC0000,0xAC000001,0x9FC0000,0x87FC0000,0xAC000001,0x87FC0000,0xAC000001,0xAC000001,0x9FC0000,0x87FC0000,0xAC000001,0x87FC0000,0xAC000001, -0xAC000001,0x87FC0000,0xAC000001,0xAC000001,0xAC000001,0x9FC0000,0x87FC0000,0xAC000001,0x87FC0000,0xAC000001,0xAC000001,0x87FC0000,0xAC000001,0xAC000001,0xAC000001,0x87FC0000,0xAC000001,0xAC000001,0xAC000001,0xAC000001,0x1B40000,0x1740000,0x1740000,0x39FC0000,0x77FC0000,0x99FC0000,0xAC000001,0xAC000001,0x1D80000,0x53FC0000,0xA9E40000,0xAC000001, -0x67FC0000,0x1080F1A,0xFCF0065A,0xC8EC05EA,0xACEC05EB,0xFED40376,0xD0D401C3,0xB0D8028A,0xBAD0034A,0xAAD001BE,0x9ED0034A,0xFCB805F1,0xD8B40153,0xB2C4020D,0xC0B001B2,0xACB40006,0xA0B801BD,0xACB405EA,0xA4A8020F,0x9AAC0286,0x90B405ED,0x1880F1A,0xE08405EB,0xAEB405EB,0xD268034A,0xAE8C01B6,0x9E9C034A,0xC64805EB,0xAC600154,0x9C6C01C5,0x927C05ED,0x49F80F1A, -0xAC0805EB,0x9C000356,0x9000061E,0x82000F1E,0xFEE4068A,0xF9000BEA,0xFB040CAA,0xFEC400EA,0xE4B40003,0xC2B40002,0xAEBC0023,0xA8B00025,0xFED40612,0xFEB0008D,0xB2A8015B,0x9C6C01C5,0x19FC0F1A,0x12405EA,0xFD080156,0xC3080152,0xAD080153,0xEEF001A6,0xC8EC0001,0xAEF8002A,0xB2EC01A6,0xA8EC004B,0x9EEC01A6,0x3B005EA,0xD2BC0153,0xACE00153,0xC6A001A5,0xACB80002, -0x9EC801A5,0x5DFC05EA,0xAC600153,0x9E4801A5,0x900005ED,0x3B005EA,0xD2BC0153,0xACE00153,0xC6A001A5,0xACB80002,0x9EC801A5,0x5DFC05EA,0xAC600153,0x9E4801A5,0x900005ED,0x5DFC05EA,0xAC600153,0x9E4801A5,0x900005ED,0x900005ED,0xFF0002B2,0xF71C0492,0xF9200483,0xFED40072,0xE4B40003,0xC2B40002,0xAEC80002,0xACA0000B,0xFCEC02D4,0xFEB80048,0xB4980152,0x9E4801A5, -0x33FC05EA,0xEC05EA,0xEC05EA,0xEC05EA,0xEC05EA,0xE4D001A5,0xE4D001A5,0xE4D001A5,0xA4D001A5,0xA4D001A5,0x90D001A5,0xD8B40152,0xD8B40152,0xD8B40152,0xAAB40005,0xAAB40005,0x92C0004C,0x98B40152,0x98B40152,0x8CB40029,0x82B40154,0x35C05EA,0x35C05EA,0x35C05EA,0xB68401A5,0xB68401A5,0x90A801A5,0xAA680153,0xAA680153,0x907C0002,0x828C0154,0x33FC05EA, -0x33FC05EA,0x902801A5,0x820C0154,0x740005ED,0xFED80248,0xFAE4043D,0xEC05EA,0xFEBC0054,0xE2B40002,0xC2B40002,0xB8B8000C,0xA4B40001,0xFEBC020C,0xFEA40031,0xAEA80152,0x907C0002,0x1F405EA,0x1080152,0x1080152,0x1080152,0x1080152,0xC2F00001,0xC2F00001,0xC2F00001,0x9CF00001,0x9CF00001,0x90EC0001,0x1880152,0x1880152,0x1880152,0xA6C00001,0xA6C00001, -0x90D40000,0x49F80152,0x49F80152,0x90840000,0x82000154,0x1880152,0x1880152,0x1880152,0xA6C00001,0xA6C00001,0x90D40000,0x49F80152,0x49F80152,0x90840000,0x82000154,0x49F80152,0x49F80152,0x90840000,0x82000154,0x82000154,0xFAEC0071,0xF90000A2,0x1080152,0xF6CC0019,0xDABC0000,0xBCBC0001,0xB0C40000,0xA4B00000,0xFED00088,0xFEB0000D,0x19FC0152,0x90840000, -0x19FC0152,0x14001A5,0xE7240001,0xBB240001,0xAD200002,0x1D801A5,0xC8EC0000,0xAD080001,0x71F801A5,0xACB00001,0x9E0001A5,0x1D801A5,0xC8EC0000,0xAD080001,0x71F801A5,0xACB00001,0x9E0001A5,0x71F801A5,0xACB00001,0x9E0001A5,0x9E0001A5,0x1D801A5,0xC8EC0000,0xAD080001,0x71F801A5,0xACB00001,0x9E0001A5,0x71F801A5,0xACB00001,0x9E0001A5,0x9E0001A5,0x71F801A5, -0xACB00001,0x9E0001A5,0x9E0001A5,0x9E0001A5,0xFF1400E1,0x15401A5,0xF5380109,0xFED80028,0xEAA80000,0xC4AC0000,0xACD00001,0xAC840001,0xF70800DD,0xFEC00012,0xB2D40000,0x9E0001A5,0x4BFC01A5,0xD001A5,0xD001A5,0xD001A5,0xD001A5,0xD001A5,0xD001A5,0xD001A5,0xD001A5,0xD001A5,0xD001A5,0xB0B40000,0xB0B40000,0xB0B40000,0xB0B40000,0xB0B40000, -0xB0B40000,0x84B40000,0x84B40000,0x84B40000,0x76B40000,0x13401A5,0x13401A5,0x13401A5,0x13401A5,0x13401A5,0x13401A5,0x927C0000,0x927C0000,0x927C0000,0x76980000,0x1FF801A5,0x1FF801A5,0x1FF801A5,0x763C0000,0x680001A5,0xFCC800B5,0xD001A5,0xD001A5,0xFCB80014,0xE6B40000,0xC6B40000,0xC6B40000,0xA0B40000,0xF8B00080,0xFEA0000A,0x94AC0000,0x927C0000, -0x1B801A5,}; -static const uint32_t g_etc1_to_bc7_m6_table113[] = { -0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x17F80000, -0x17F80000,0x17F80000,0x17F80000,0x60000001,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xD00000,0xD00000,0xD00000,0x1240000,0x1A00000,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0xFC0001,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000, -0x1780000,0x41FC0000,0x41FC0000,0x41FC0000,0x7E000000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x41FC0000,0x41FC0000,0x41FC0000,0x7E000000,0x41FC0000,0x41FC0000,0x41FC0000,0x7E000000,0x7E000000,0xF0C0000,0xFC0001,0xFC0001,0x1280000,0x33C0000,0x1580000,0x1580000,0x1AC0000,0x1280000,0x33C0000,0xFFC0000,0x41FC0000, -0xFFC0000,0x1300001,0x1300001,0x1300001,0x1300001,0x3C40000,0x3C40000,0x3C40000,0x67FC0000,0x67FC0000,0x98000000,0x3C40000,0x3C40000,0x3C40000,0x67FC0000,0x67FC0000,0x98000000,0x67FC0000,0x67FC0000,0x98000000,0x98000000,0x3C40000,0x3C40000,0x3C40000,0x67FC0000,0x67FC0000,0x98000000,0x67FC0000,0x67FC0000,0x98000000,0x98000000,0x67FC0000, -0x67FC0000,0x98000000,0x98000000,0x98000000,0x1640000,0xB440000,0x1300001,0x1A00000,0x5FC0000,0x3FFC0000,0x55F80000,0x75F80000,0x1800000,0x3C40000,0x3FFC0000,0x98000000,0x3FFC0000,0x16C0000,0x23FC0000,0x93FC0000,0xB4000001,0x23FC0000,0x93FC0000,0xB4000001,0x93FC0000,0xB4000001,0xB4000001,0x23FC0000,0x93FC0000,0xB4000001,0x93FC0000,0xB4000001, -0xB4000001,0x93FC0000,0xB4000001,0xB4000001,0xB4000001,0x23FC0000,0x93FC0000,0xB4000001,0x93FC0000,0xB4000001,0xB4000001,0x93FC0000,0xB4000001,0xB4000001,0xB4000001,0x93FC0000,0xB4000001,0xB4000001,0xB4000001,0xB4000001,0x1C80000,0x1840000,0x1840000,0x4DFC0000,0x85FC0000,0xA5F00000,0xB4000001,0xB4000001,0x1F00000,0x65FC0000,0xB1F40000,0xB4000001, -0x77FC0000,0x1180F1A,0xFF000686,0xD0FC05EA,0xB4FC05EB,0xFEE8039E,0xD8E401C3,0xB8E8028A,0xC2E0034A,0xB2E001BE,0xA6E0034A,0xFCCC0606,0xE0C40153,0xBAD4020D,0xC8C001B2,0xB4C40006,0xA8C801BD,0xB4C405EA,0xACB8020F,0xA2BC0286,0x98C405ED,0x1A00F1A,0xE89405EB,0xB6C405EB,0xDA78034A,0xB69C01B6,0xA6AC034A,0xCE5805EB,0xB4700154,0xA47C01C5,0x9A8C05ED,0x55F80F1A, -0xB41805EB,0xA600034A,0x98000606,0x8A000F1E,0xFEF806F7,0xFF0C0BFA,0xFF0C0CEA,0xFED80150,0xECC40003,0xCAC40002,0xB6CC0023,0xB0C00025,0xFEE006BA,0xFEC000D3,0xBAB8015B,0xA47C01C5,0x27FC0F1A,0x13405EA,0xFF180162,0xCB180152,0xB5180153,0xF70001A6,0xD0FC0001,0xB708002A,0xBAFC01A6,0xB0FC004B,0xA6FC01A6,0x1C805EA,0xDACC0153,0xB4F00153,0xCEB001A5,0xB4C80002, -0xA6D801A5,0x69FC05EA,0xB4700153,0xA65801A5,0x980005ED,0x1C805EA,0xDACC0153,0xB4F00153,0xCEB001A5,0xB4C80002,0xA6D801A5,0x69FC05EA,0xB4700153,0xA65801A5,0x980005ED,0x69FC05EA,0xB4700153,0xA65801A5,0x980005ED,0x980005ED,0xFF1402D5,0xFF2C0492,0xFF2C048B,0xFCE800A5,0xECC40003,0xCAC40002,0xB6D80002,0xB4B0000B,0xFEF802F6,0xFEC80065,0xBCA80152,0xA65801A5, -0x41FC05EA,0xFC05EA,0xFC05EA,0xFC05EA,0xFC05EA,0xECE001A5,0xECE001A5,0xECE001A5,0xACE001A5,0xACE001A5,0x98E001A5,0xE0C40152,0xE0C40152,0xE0C40152,0xB2C40005,0xB2C40005,0x9AD0004C,0xA0C40152,0xA0C40152,0x94C40029,0x8AC40154,0x37405EA,0x37405EA,0x37405EA,0xBE9401A5,0xBE9401A5,0x98B801A5,0xB2780153,0xB2780153,0x988C0002,0x8A9C0154,0x3FFC05EA, -0x3FFC05EA,0x983801A5,0x8A1C0154,0x7C0005ED,0xFEE80269,0xF4F80466,0xFC05EA,0xFED40071,0xEAC40002,0xCAC40002,0xC0C8000C,0xACC40001,0xFED00229,0xFEBC0048,0xB6B80152,0x988C0002,0xDFC05EA,0x1180152,0x1180152,0x1180152,0x1180152,0xCB000001,0xCB000001,0xCB000001,0xA5000001,0xA5000001,0x98FC0001,0x1A00152,0x1A00152,0x1A00152,0xAED00001,0xAED00001, -0x98E40000,0x55F80152,0x55F80152,0x98940000,0x8A000154,0x1A00152,0x1A00152,0x1A00152,0xAED00001,0xAED00001,0x98E40000,0x55F80152,0x55F80152,0x98940000,0x8A000154,0x55F80152,0x55F80152,0x98940000,0x8A000154,0x8A000154,0xF7000080,0xFF0C00AA,0x1180152,0xFEDC0019,0xE2CC0000,0xC4CC0001,0xB8D40000,0xACC00000,0xF8EC0091,0xFEC40012,0x27FC0152,0x98940000, -0x27FC0152,0x15001A5,0xEF340001,0xC3340001,0xB5300002,0x1F001A5,0xD0FC0000,0xB5180001,0x7DF801A5,0xB4C00001,0xA60001A5,0x1F001A5,0xD0FC0000,0xB5180001,0x7DF801A5,0xB4C00001,0xA60001A5,0x7DF801A5,0xB4C00001,0xA60001A5,0xA60001A5,0x1F001A5,0xD0FC0000,0xB5180001,0x7DF801A5,0xB4C00001,0xA60001A5,0x7DF801A5,0xB4C00001,0xA60001A5,0xA60001A5,0x7DF801A5, -0xB4C00001,0xA60001A5,0xA60001A5,0xA60001A5,0xFB2C00F2,0x16401A5,0xFD480109,0xFEF40034,0xF2B80000,0xCCBC0000,0xB4E00001,0xB4940001,0xFF1800DD,0xFED80022,0xBAE40000,0xA60001A5,0x5BFC01A5,0xE001A5,0xE001A5,0xE001A5,0xE001A5,0xE001A5,0xE001A5,0xE001A5,0xE001A5,0xE001A5,0xE001A5,0xB8C40000,0xB8C40000,0xB8C40000,0xB8C40000,0xB8C40000, -0xB8C40000,0x8CC40000,0x8CC40000,0x8CC40000,0x7EC40000,0x14C01A5,0x14C01A5,0x14C01A5,0x14C01A5,0x14C01A5,0x14C01A5,0x9A8C0000,0x9A8C0000,0x9A8C0000,0x7EA80000,0x2BF801A5,0x2BF801A5,0x2BF801A5,0x7E4C0000,0x700001A5,0xF4D800C8,0xE001A5,0xE001A5,0xFECC0019,0xEEC40000,0xCEC40000,0xCEC40000,0xA8C40000,0xFEBC0088,0xFEB4000D,0x9CBC0000,0x9A8C0000, -0x1DC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table114[] = { -0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x21FC0000, -0x21FC0000,0x21FC0000,0x21FC0000,0x68000001,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0x8E00000,0x8E00000,0x8E00000,0x13C0000,0x1C00000,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x10C0001,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000, -0x1900000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x86000000,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x86000000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x86000000,0x86000000,0x1200000,0x10C0001,0x10C0001,0x5380000,0x5500000,0x1700000,0x1700000,0x1C40000,0x5380000,0x5500000,0x1FF80000,0x4DFC0000, -0x1FF80000,0x1400001,0x1400001,0x1400001,0x1400001,0x3DC0000,0x3DC0000,0x3DC0000,0x73FC0000,0x73FC0000,0xA0000000,0x3DC0000,0x3DC0000,0x3DC0000,0x73FC0000,0x73FC0000,0xA0000000,0x73FC0000,0x73FC0000,0xA0000000,0xA0000000,0x3DC0000,0x3DC0000,0x3DC0000,0x73FC0000,0x73FC0000,0xA0000000,0x73FC0000,0x73FC0000,0xA0000000,0xA0000000,0x73FC0000, -0x73FC0000,0xA0000000,0xA0000000,0xA0000000,0x5740000,0x1580000,0x1400001,0x3B40000,0x19FC0000,0x4FFC0000,0x61FC0000,0x7FFC0000,0x1940000,0x3DC0000,0x4FFC0000,0xA0000000,0x4FFC0000,0x17C0000,0x3BFC0000,0x9FF80000,0xBC000001,0x3BFC0000,0x9FF80000,0xBC000001,0x9FF80000,0xBC000001,0xBC000001,0x3BFC0000,0x9FF80000,0xBC000001,0x9FF80000,0xBC000001, -0xBC000001,0x9FF80000,0xBC000001,0xBC000001,0xBC000001,0x3BFC0000,0x9FF80000,0xBC000001,0x9FF80000,0xBC000001,0xBC000001,0x9FF80000,0xBC000001,0xBC000001,0xBC000001,0x9FF80000,0xBC000001,0xBC000001,0xBC000001,0xBC000001,0x1DC0000,0x3940000,0x3940000,0x61FC0000,0x93F80000,0xAFF00000,0xBC000001,0xBC000001,0xDFC0000,0x75FC0000,0xBBC80000,0xBC000001, -0x85FC0000,0x1280F1A,0xFF1006D7,0xD90C05EA,0xBD0C05EB,0xFEF803F2,0xE0F401C3,0xC0F8028A,0xCAF0034A,0xBAF001BE,0xAEF0034A,0xFEE00629,0xE8D40153,0xC2E4020D,0xD0D001B2,0xBCD40006,0xB0D801BD,0xBCD405EA,0xB4C8020F,0xAACC0286,0xA0D405ED,0x1B80F1A,0xF0A405EB,0xBED405EB,0xE288034A,0xBEAC01B6,0xAEBC034A,0xD66805EB,0xBC800154,0xAC8C01C5,0xA29C05ED,0x61F80F1A, -0xBC2805EB,0xAE0C034A,0xA00005F1,0x92000F1E,0xFF0C0766,0xF9200C4A,0xFB240CF3,0xFEEC01C5,0xF4D40003,0xD2D40002,0xBEDC0023,0xB8D00025,0xFEF806FA,0xFED4013A,0xC2C8015B,0xAC8C01C5,0x37FC0F1A,0x14405EA,0xFD2C017E,0xD3280152,0xBD280153,0xFF1001A6,0xD90C0001,0xBF18002A,0xC30C01A6,0xB90C004B,0xAF0C01A6,0x1E005EA,0xE2DC0153,0xBD000153,0xD6C001A5,0xBCD80002, -0xAEE801A5,0x75FC05EA,0xBC800153,0xAE6801A5,0xA00005ED,0x1E005EA,0xE2DC0153,0xBD000153,0xD6C001A5,0xBCD80002,0xAEE801A5,0x75FC05EA,0xBC800153,0xAE6801A5,0xA00005ED,0x75FC05EA,0xBC800153,0xAE6801A5,0xA00005ED,0xA00005ED,0xFF200312,0xF73C04BE,0xF94004B2,0xFEF800DE,0xF4D40003,0xD2D40002,0xBEE80002,0xBCC0000B,0xFD140322,0xFEE000A1,0xC4B80152,0xAE6801A5, -0x51FC05EA,0x10C05EA,0x10C05EA,0x10C05EA,0x10C05EA,0xF4F001A5,0xF4F001A5,0xF4F001A5,0xB4F001A5,0xB4F001A5,0xA0F001A5,0xE8D40152,0xE8D40152,0xE8D40152,0xBAD40005,0xBAD40005,0xA2E0004C,0xA8D40152,0xA8D40152,0x9CD40029,0x92D40154,0x38C05EA,0x38C05EA,0x38C05EA,0xC6A401A5,0xC6A401A5,0xA0C801A5,0xBA880153,0xBA880153,0xA09C0002,0x92AC0154,0x4BFC05EA, -0x4BFC05EA,0xA04801A5,0x922C0154,0x840005ED,0xFEF402A9,0xFD080466,0x10C05EA,0xFCE400A5,0xF2D40002,0xD2D40002,0xC8D8000C,0xB4D40001,0xFEE40266,0xFED00062,0xBEC80152,0xA09C0002,0x1DF805EA,0x1280152,0x1280152,0x1280152,0x1280152,0xD3100001,0xD3100001,0xD3100001,0xAD100001,0xAD100001,0xA10C0001,0x1B80152,0x1B80152,0x1B80152,0xB6E00001,0xB6E00001, -0xA0F40000,0x61F80152,0x61F80152,0xA0A40000,0x92000154,0x1B80152,0x1B80152,0x1B80152,0xB6E00001,0xB6E00001,0xA0F40000,0x61F80152,0x61F80152,0xA0A40000,0x92000154,0x61F80152,0x61F80152,0xA0A40000,0x92000154,0x92000154,0xFF100080,0xF92000B5,0x1280152,0xFCF40029,0xEADC0000,0xCCDC0001,0xC0E40000,0xB4D00000,0xFEF80095,0xFEDC0019,0x37FC0152,0xA0A40000, -0x37FC0152,0x16001A5,0xF7440001,0xCB440001,0xBD400002,0xDFC01A5,0xD90C0000,0xBD280001,0x89F801A5,0xBCD00001,0xAE0001A5,0xDFC01A5,0xD90C0000,0xBD280001,0x89F801A5,0xBCD00001,0xAE0001A5,0x89F801A5,0xBCD00001,0xAE0001A5,0xAE0001A5,0xDFC01A5,0xD90C0000,0xBD280001,0x89F801A5,0xBCD00001,0xAE0001A5,0x89F801A5,0xBCD00001,0xAE0001A5,0xAE0001A5,0x89F801A5, -0xBCD00001,0xAE0001A5,0xAE0001A5,0xAE0001A5,0xF7400109,0x17801A5,0xF5580122,0xFD100048,0xFAC80000,0xD4CC0000,0xBCF00001,0xBCA40001,0xFF2C00F4,0xFEF0003A,0xC2F40000,0xAE0001A5,0x69FC01A5,0xF001A5,0xF001A5,0xF001A5,0xF001A5,0xF001A5,0xF001A5,0xF001A5,0xF001A5,0xF001A5,0xF001A5,0xC0D40000,0xC0D40000,0xC0D40000,0xC0D40000,0xC0D40000, -0xC0D40000,0x94D40000,0x94D40000,0x94D40000,0x86D40000,0x16401A5,0x16401A5,0x16401A5,0x16401A5,0x16401A5,0x16401A5,0xA29C0000,0xA29C0000,0xA29C0000,0x86B80000,0x37F801A5,0x37F801A5,0x37F801A5,0x865C0000,0x780001A5,0xFCE800C8,0xF001A5,0xF001A5,0xFED80028,0xF6D40000,0xD6D40000,0xD6D40000,0xB0D40000,0xFCD40091,0xFAC80019,0xA4CC0000,0xA29C0000, -0x1FC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table115[] = { -0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000,0x2DFC0000, -0x2DFC0000,0x2DFC0000,0x2DFC0000,0x70000001,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xF40000,0xF40000,0xF40000,0x3500000,0x1E40000,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x11C0001,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000, -0x1A80000,0x59FC0000,0x59FC0000,0x59FC0000,0x8E000000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x59FC0000,0x59FC0000,0x59FC0000,0x8E000000,0x59FC0000,0x59FC0000,0x59FC0000,0x8E000000,0x8E000000,0x1300000,0x11C0001,0x11C0001,0x14C0000,0x5640000,0x1840000,0x1840000,0x1E00000,0x14C0000,0x5640000,0x2DFC0000,0x59FC0000, -0x2DFC0000,0x1500001,0x1500001,0x1500001,0x1500001,0x3F40000,0x3F40000,0x3F40000,0x7FFC0000,0x7FFC0000,0xA8000000,0x3F40000,0x3F40000,0x3F40000,0x7FFC0000,0x7FFC0000,0xA8000000,0x7FFC0000,0x7FFC0000,0xA8000000,0xA8000000,0x3F40000,0x3F40000,0x3F40000,0x7FFC0000,0x7FFC0000,0xA8000000,0x7FFC0000,0x7FFC0000,0xA8000000,0xA8000000,0x7FFC0000, -0x7FFC0000,0xA8000000,0xA8000000,0xA8000000,0x1880000,0x1680000,0x1500001,0x1CC0000,0x2DFC0000,0x5FF80000,0x6FFC0000,0x8BF80000,0x1A80000,0x3F40000,0x5FF80000,0xA8000000,0x5FF80000,0x18C0000,0x53FC0000,0xABF80000,0xC4000001,0x53FC0000,0xABF80000,0xC4000001,0xABF80000,0xC4000001,0xC4000001,0x53FC0000,0xABF80000,0xC4000001,0xABF80000,0xC4000001, -0xC4000001,0xABF80000,0xC4000001,0xC4000001,0xC4000001,0x53FC0000,0xABF80000,0xC4000001,0xABF80000,0xC4000001,0xC4000001,0xABF80000,0xC4000001,0xC4000001,0xC4000001,0xABF80000,0xC4000001,0xC4000001,0xC4000001,0xC4000001,0x1F00000,0xBA40000,0xBA40000,0x73FC0000,0x9FFC0000,0xB9F00000,0xC4000001,0xC4000001,0x2BFC0000,0x87FC0000,0xC3D80000,0xC4000001, -0x95FC0000,0x1380F1A,0xFF24071A,0xE11C05EA,0xC51C05EB,0xFF0C043A,0xE90401C3,0xC908028A,0xD300034A,0xC30001BE,0xB700034A,0xFEF4065A,0xF0E40153,0xCAF4020D,0xD8E001B2,0xC4E40006,0xB8E801BD,0xC4E405EA,0xBCD8020F,0xB2DC0286,0xA8E405ED,0x1D00F1A,0xF8B405EB,0xC6E405EB,0xEA98034A,0xC6BC01B6,0xB6CC034A,0xDE7805EB,0xC4900154,0xB49C01C5,0xAAAC05ED,0x6DF80F1A, -0xC43805EB,0xB61C034A,0xA80005ED,0x9A000F1E,0xFF2007D6,0xFF2C0C62,0xFF2C0D3B,0xFF000242,0xFCE40003,0xDAE40002,0xC6EC0023,0xC0E00025,0xFF100782,0xFEE801CC,0xCAD8015B,0xB49C01C5,0x45FC0F1A,0x15405EA,0xFF3C019A,0xDB380152,0xC5380153,0xFF2401B2,0xE11C0001,0xC728002A,0xCB1C01A6,0xC11C004B,0xB71C01A6,0x1F805EA,0xEAEC0153,0xC5100153,0xDED001A5,0xC4E80002, -0xB6F801A5,0x81FC05EA,0xC4900153,0xB67801A5,0xA80005ED,0x1F805EA,0xEAEC0153,0xC5100153,0xDED001A5,0xC4E80002,0xB6F801A5,0x81FC05EA,0xC4900153,0xB67801A5,0xA80005ED,0x81FC05EA,0xC4900153,0xB67801A5,0xA80005ED,0xA80005ED,0xFF340333,0xFF4C04BE,0xFF4C04BE,0xFF140109,0xFCE40003,0xDAE40002,0xC6F80002,0xC4D0000B,0xFF240356,0xFD0000DD,0xCCC80152,0xB67801A5, -0x5FFC05EA,0x11C05EA,0x11C05EA,0x11C05EA,0x11C05EA,0xFD0001A5,0xFD0001A5,0xFD0001A5,0xBD0001A5,0xBD0001A5,0xA90001A5,0xF0E40152,0xF0E40152,0xF0E40152,0xC2E40005,0xC2E40005,0xAAF0004C,0xB0E40152,0xB0E40152,0xA4E40029,0x9AE40154,0x3A405EA,0x3A405EA,0x3A405EA,0xCEB401A5,0xCEB401A5,0xA8D801A5,0xC2980153,0xC2980153,0xA8AC0002,0x9ABC0154,0x57FC05EA, -0x57FC05EA,0xA85801A5,0x9A3C0154,0x8C0005ED,0xFF0402D2,0xF5180491,0x11C05EA,0xFEF800C9,0xFAE40002,0xDAE40002,0xD0E8000C,0xBCE40001,0xFEF80289,0xFEE00099,0xC6D80152,0xA8AC0002,0x2BFC05EA,0x1380152,0x1380152,0x1380152,0x1380152,0xDB200001,0xDB200001,0xDB200001,0xB5200001,0xB5200001,0xA91C0001,0x1D00152,0x1D00152,0x1D00152,0xBEF00001,0xBEF00001, -0xA9040000,0x6DF80152,0x6DF80152,0xA8B40000,0x9A000154,0x1D00152,0x1D00152,0x1D00152,0xBEF00001,0xBEF00001,0xA9040000,0x6DF80152,0x6DF80152,0xA8B40000,0x9A000154,0x6DF80152,0x6DF80152,0xA8B40000,0x9A000154,0x9A000154,0xFF200091,0xFF2C00C1,0x1380152,0xFD040034,0xF2EC0000,0xD4EC0001,0xC8F40000,0xBCE00000,0xFD1000A2,0xFCF00029,0x45FC0152,0xA8B40000, -0x45FC0152,0x17001A5,0xFF540001,0xD3540001,0xC5500002,0x25FC01A5,0xE11C0000,0xC5380001,0x95F801A5,0xC4E00001,0xB60001A5,0x25FC01A5,0xE11C0000,0xC5380001,0x95F801A5,0xC4E00001,0xB60001A5,0x95F801A5,0xC4E00001,0xB60001A5,0xB60001A5,0x25FC01A5,0xE11C0000,0xC5380001,0x95F801A5,0xC4E00001,0xB60001A5,0x95F801A5,0xC4E00001,0xB60001A5,0xB60001A5,0x95F801A5, -0xC4E00001,0xB60001A5,0xB60001A5,0xB60001A5,0xFF500109,0x18801A5,0xFD680122,0xFF200061,0xFCE40002,0xDCDC0000,0xC5000001,0xC4B40001,0xF5480109,0xFF100048,0xCB040000,0xB60001A5,0x79FC01A5,0x10001A5,0x10001A5,0x10001A5,0x10001A5,0x10001A5,0x10001A5,0x10001A5,0x10001A5,0x10001A5,0x10001A5,0xC8E40000,0xC8E40000,0xC8E40000,0xC8E40000,0xC8E40000, -0xC8E40000,0x9CE40000,0x9CE40000,0x9CE40000,0x8EE40000,0x17C01A5,0x17C01A5,0x17C01A5,0x17C01A5,0x17C01A5,0x17C01A5,0xAAAC0000,0xAAAC0000,0xAAAC0000,0x8EC80000,0x43F801A5,0x43F801A5,0x43F801A5,0x8E6C0000,0x800001A5,0xF4F800DD,0x10001A5,0x10001A5,0xFAEC0034,0xFEE40000,0xDEE40000,0xDEE40000,0xB8E40000,0xFCE400A2,0xFED80022,0xACDC0000,0xAAAC0000, -0x11FC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table116[] = { -0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x3BFC0000, -0x3BFC0000,0x3BFC0000,0x3BFC0000,0x7A000000,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xB040000,0xB040000,0xB040000,0x16C0000,0x7FC0000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1300000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000, -0x1C40000,0x67F80000,0x67F80000,0x67F80000,0x96000001,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x67F80000,0x67F80000,0x67F80000,0x96000001,0x67F80000,0x67F80000,0x67F80000,0x96000001,0x96000001,0x1440000,0x1300000,0x1300000,0x3600000,0x17C0000,0x19C0000,0x19C0000,0x3FC0000,0x3600000,0x17C0000,0x3FF80000,0x67F80000, -0x3FF80000,0x1640000,0x1640000,0x1640000,0x1640000,0x15FC0000,0x15FC0000,0x15FC0000,0x8DFC0000,0x8DFC0000,0xB0000001,0x15FC0000,0x15FC0000,0x15FC0000,0x8DFC0000,0x8DFC0000,0xB0000001,0x8DFC0000,0x8DFC0000,0xB0000001,0xB0000001,0x15FC0000,0x15FC0000,0x15FC0000,0x8DFC0000,0x8DFC0000,0xB0000001,0x8DFC0000,0x8DFC0000,0xB0000001,0xB0000001,0x8DFC0000, -0x8DFC0000,0xB0000001,0xB0000001,0xB0000001,0x59C0000,0x17C0000,0x1640000,0x1E40000,0x43FC0000,0x6FFC0000,0x7FF80000,0x97F80000,0x3BC0000,0x15FC0000,0x6FFC0000,0xB0000001,0x6FFC0000,0x19C0001,0x6FFC0000,0xB9F80000,0xCE000000,0x6FFC0000,0xB9F80000,0xCE000000,0xB9F80000,0xCE000000,0xCE000000,0x6FFC0000,0xB9F80000,0xCE000000,0xB9F80000,0xCE000000, -0xCE000000,0xB9F80000,0xCE000000,0xCE000000,0xCE000000,0x6FFC0000,0xB9F80000,0xCE000000,0xB9F80000,0xCE000000,0xCE000000,0xB9F80000,0xCE000000,0xCE000000,0xCE000000,0xB9F80000,0xCE000000,0xCE000000,0xCE000000,0xCE000000,0x15FC0000,0x5B80000,0x5B80000,0x8BFC0000,0xAFFC0000,0xC3F80000,0xCE000000,0xCE000000,0x4DFC0000,0x99FC0000,0xCDCC0000,0xCE000000, -0xA5FC0000,0x1480F1E,0xFF340795,0xEB2C05ED,0xCF2C05ED,0xFF2404B6,0xF31801C5,0xD31C0286,0xDB14034A,0xCD1001BD,0xC114034A,0xFF0C06AE,0xF8F80154,0xD508020F,0xE0F001B6,0xCCF80006,0xC0FC01BE,0xCEF405EB,0xC6EC020D,0xBAEC028A,0xB2F405EB,0x3E80F1A,0xFECC05F1,0xCEF805EA,0xF4A8034A,0xD0D001B2,0xC0DC034A,0xE69005EB,0xCEA00153,0xBEB001C3,0xB2C005EA,0x79FC0F1A, -0xCE4805EA,0xC02C034A,0xB21005EA,0xA4000F1A,0xFF340865,0xFB440CAA,0xFD480D3E,0xFF1402EF,0xFEFC0019,0xE2F80004,0xD1000025,0xCAF40023,0xFF240839,0xFF000256,0xD2E80159,0xBEB001C3,0x57FC0F1A,0x16405ED,0xFF5001C8,0xE34C0154,0xCF480154,0xFF3C01D5,0xE9300002,0xCF380029,0xD33001A5,0xC92C004C,0xC13001A5,0x19FC05EA,0xF4FC0153,0xCF200152,0xE6E401A5,0xCCF80005, -0xC10801A5,0x8FF805EA,0xCE9C0152,0xC08801A5,0xB20005EA,0x19FC05EA,0xF4FC0153,0xCF200152,0xE6E401A5,0xCCF80005,0xC10801A5,0x8FF805EA,0xCE9C0152,0xC08801A5,0xB20005EA,0x8FF805EA,0xCE9C0152,0xC08801A5,0xB20005EA,0xB20005EA,0xFF500376,0xF96004ED,0xFB6404E4,0xFF2C016D,0xFEFC0010,0xE2F80003,0xCF080001,0xCCE0000C,0xFD40039E,0xFF180122,0xD4DC0152,0xC08801A5, -0x71FC05EA,0x12C05ED,0x12C05ED,0x12C05ED,0x12C05ED,0xFD1401A9,0xFD1401A9,0xFD1401A9,0xC51401A5,0xC51401A5,0xB11401A6,0xF8F80153,0xF8F80153,0xF8F80153,0xCCF80002,0xCCF80002,0xB300004B,0xB8F80153,0xB8F80153,0xACF4002A,0xA4F40153,0x1C005EA,0x1C005EA,0x1C005EA,0xD8C401A5,0xD8C401A5,0xB0EC01A6,0xCAAC0153,0xCAAC0153,0xB2C00001,0xA4CC0152,0x65F805EA, -0x65F805EA,0xB06C01A6,0xA4480152,0x960005EA,0xFF2002FE,0xFD280492,0x12C05ED,0xFF080103,0xFEF80003,0xE0F80002,0xD8F8000B,0xC4F40002,0xFF0C02D3,0xFEF400CA,0xCEEC0153,0xB2C00001,0x3DF805EA,0x1480154,0x1480154,0x1480154,0x1480154,0xE7300000,0xE7300000,0xE7300000,0xBF300000,0xBF300000,0xB1300001,0x3E80152,0x3E80152,0x3E80152,0xC9000001,0xC9000001, -0xB1180001,0x79FC0152,0x79FC0152,0xB0C80001,0xA4000152,0x3E80152,0x3E80152,0x3E80152,0xC9000001,0xC9000001,0xB1180001,0x79FC0152,0x79FC0152,0xB0C80001,0xA4000152,0x79FC0152,0x79FC0152,0xB0C80001,0xA4000152,0xA4000152,0xFB3400A4,0xFB4400C8,0x1480154,0xFF180041,0xFCFC0000,0xDEFC0000,0xD1080000,0xC6F00000,0xF92800B5,0xFF04003A,0x57FC0152,0xB0C80001, -0x57FC0152,0x18001A5,0xFF680008,0xDD640000,0xCF640000,0x41FC01A5,0xEB2C0000,0xCF480000,0xA1FC01A5,0xCEEC0000,0xC00001A5,0x41FC01A5,0xEB2C0000,0xCF480000,0xA1FC01A5,0xCEEC0000,0xC00001A5,0xA1FC01A5,0xCEEC0000,0xC00001A5,0xC00001A5,0x41FC01A5,0xEB2C0000,0xCF480000,0xA1FC01A5,0xCEEC0000,0xC00001A5,0xA1FC01A5,0xCEEC0000,0xC00001A5,0xC00001A5,0xA1FC01A5, -0xCEEC0000,0xC00001A5,0xC00001A5,0xC00001A5,0xFD680120,0x19C01A5,0xF77C0139,0xFF400071,0xFEFC0010,0xE4F00000,0xCF100000,0xCEC00000,0xFF5C0109,0xFF2C0064,0xD3180000,0xC00001A5,0x89FC01A5,0x11401A5,0x11401A5,0x11401A5,0x11401A5,0x11401A5,0x11401A5,0x11401A5,0x11401A5,0x11401A5,0x11401A5,0xD0F80001,0xD0F80001,0xD0F80001,0xD0F80001,0xD0F80001, -0xD0F80001,0xA4F80001,0xA4F80001,0xA4F80001,0x96F40002,0x19801A5,0x19801A5,0x19801A5,0x19801A5,0x19801A5,0x19801A5,0xB2C00000,0xB2C00000,0xB2C00000,0x96DC0001,0x51F801A5,0x51F801A5,0x51F801A5,0x96840001,0x880001A5,0xFF0C00DD,0x11401A5,0x11401A5,0xFD00003D,0xFEF80002,0xE4F80001,0xE4F80001,0xC0F80001,0xFEF400AA,0xFAF00032,0xB4F00001,0xB2C00000, -0x21FC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table117[] = { -0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x1840000,0x47FC0000, -0x47FC0000,0x47FC0000,0x47FC0000,0x82000000,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1040001,0x1180000,0x1180000,0x1180000,0x1840000,0x17FC0000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000, -0x1DC0000,0x73F80000,0x73F80000,0x73F80000,0x9E000001,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x73F80000,0x73F80000,0x73F80000,0x9E000001,0x73F80000,0x73F80000,0x73F80000,0x9E000001,0x9E000001,0x3540000,0x1400000,0x1400000,0x1740000,0x1900000,0x1B40000,0x1B40000,0x17FC0000,0x1740000,0x1900000,0x4DFC0000,0x73F80000, -0x4DFC0000,0x1740000,0x1740000,0x1740000,0x1740000,0x2FFC0000,0x2FFC0000,0x2FFC0000,0x99FC0000,0x99FC0000,0xB8000001,0x2FFC0000,0x2FFC0000,0x2FFC0000,0x99FC0000,0x99FC0000,0xB8000001,0x99FC0000,0x99FC0000,0xB8000001,0xB8000001,0x2FFC0000,0x2FFC0000,0x2FFC0000,0x99FC0000,0x99FC0000,0xB8000001,0x99FC0000,0x99FC0000,0xB8000001,0xB8000001,0x99FC0000, -0x99FC0000,0xB8000001,0xB8000001,0xB8000001,0x1B00000,0x18C0000,0x1740000,0x3F80000,0x57FC0000,0x7FF80000,0x8BFC0000,0xA1FC0000,0x5D00000,0x2FFC0000,0x7FF80000,0xB8000001,0x7FF80000,0x1AC0001,0x87FC0000,0xC5F80000,0xD6000000,0x87FC0000,0xC5F80000,0xD6000000,0xC5F80000,0xD6000000,0xD6000000,0x87FC0000,0xC5F80000,0xD6000000,0xC5F80000,0xD6000000, -0xD6000000,0xC5F80000,0xD6000000,0xD6000000,0xD6000000,0x87FC0000,0xC5F80000,0xD6000000,0xC5F80000,0xD6000000,0xD6000000,0xC5F80000,0xD6000000,0xD6000000,0xD6000000,0xC5F80000,0xD6000000,0xD6000000,0xD6000000,0xD6000000,0x3DFC0000,0xDC80000,0xDC80000,0x9DFC0000,0xBDF80000,0xCDF80000,0xD6000000,0xD6000000,0x6BFC0000,0xABFC0000,0xD5DC0000,0xD6000000, -0xB5FC0000,0x1580F1E,0xFF4407E9,0xF33C05ED,0xD73C05ED,0xFF300542,0xFB2801C5,0xDB2C0286,0xE324034A,0xD52001BD,0xC924034A,0xFF240716,0xFF080155,0xDD18020F,0xE90001B6,0xD5080006,0xC90C01BE,0xD70405EB,0xCEFC020D,0xC2FC028A,0xBB0405EB,0x7FC0F1A,0xFEE40611,0xD70805EA,0xFCB8034A,0xD8E001B2,0xC8EC034A,0xEEA005EB,0xD6B00153,0xC6C001C3,0xBAD005EA,0x85FC0F1A, -0xD65805EA,0xC83C034A,0xBA2005EA,0xAC000F1A,0xFF4408FE,0xFF4C0CEA,0xF5580D89,0xFF2C03A6,0xFF10005D,0xEB080004,0xD9100025,0xD3040023,0xFF340886,0xFF180302,0xDAF80159,0xC6C001C3,0x65FC0F1A,0x17405ED,0xFF60020D,0xEB5C0154,0xD7580154,0xFF5001FD,0xF1400002,0xD7480029,0xDB4001A5,0xD13C004C,0xC94001A5,0x31FC05EA,0xFD0C0153,0xD7300152,0xEEF401A5,0xD5080005, -0xC91801A5,0x9BF805EA,0xD6AC0152,0xC89801A5,0xBA0005EA,0x31FC05EA,0xFD0C0153,0xD7300152,0xEEF401A5,0xD5080005,0xC91801A5,0x9BF805EA,0xD6AC0152,0xC89801A5,0xBA0005EA,0x9BF805EA,0xD6AC0152,0xC89801A5,0xBA0005EA,0xBA0005EA,0xFF5803C9,0xFF6C04F5,0xFF6C0504,0xFF4001AA,0xFF18003B,0xEB080003,0xD7180001,0xD4F0000C,0xFF5403C9,0xFF2C017D,0xDCEC0152,0xC89801A5, -0x7FFC05EA,0x13C05ED,0x13C05ED,0x13C05ED,0x13C05ED,0xFF2401B5,0xFF2401B5,0xFF2401B5,0xCD2401A5,0xCD2401A5,0xB92401A6,0xFD080155,0xFD080155,0xFD080155,0xD5080002,0xD5080002,0xBB10004B,0xC1080153,0xC1080153,0xB504002A,0xAD040153,0x1D805EA,0x1D805EA,0x1D805EA,0xE0D401A5,0xE0D401A5,0xB8FC01A6,0xD2BC0153,0xD2BC0153,0xBAD00001,0xACDC0152,0x71F805EA, -0x71F805EA,0xB87C01A6,0xAC580152,0x9E0005EA,0xFF2C032B,0xF53804C1,0x13C05ED,0xFF1C0141,0xFF0C0013,0xE9080002,0xE108000B,0xCD040002,0xFF2002F9,0xFF100109,0xD6FC0153,0xBAD00001,0x4BFC05EA,0x1580154,0x1580154,0x1580154,0x1580154,0xEF400000,0xEF400000,0xEF400000,0xC7400000,0xC7400000,0xB9400001,0x7FC0152,0x7FC0152,0x7FC0152,0xD1100001,0xD1100001, -0xB9280001,0x85FC0152,0x85FC0152,0xB8D80001,0xAC000152,0x7FC0152,0x7FC0152,0x7FC0152,0xD1100001,0xD1100001,0xB9280001,0x85FC0152,0x85FC0152,0xB8D80001,0xAC000152,0x85FC0152,0x85FC0152,0xB8D80001,0xAC000152,0xAC000152,0xF74800B5,0xF35400DD,0x1580154,0xFD300055,0xFD140002,0xE70C0000,0xD9180000,0xCF000000,0xFF3400B9,0xFD200048,0x65FC0152,0xB8D80001, -0x65FC0152,0x19001A5,0xFF78001D,0xE5740000,0xD7740000,0x59FC01A5,0xF33C0000,0xD7580000,0xADFC01A5,0xD6FC0000,0xC80001A5,0x59FC01A5,0xF33C0000,0xD7580000,0xADFC01A5,0xD6FC0000,0xC80001A5,0xADFC01A5,0xD6FC0000,0xC80001A5,0xC80001A5,0x59FC01A5,0xF33C0000,0xD7580000,0xADFC01A5,0xD6FC0000,0xC80001A5,0xADFC01A5,0xD6FC0000,0xC80001A5,0xC80001A5,0xADFC01A5, -0xD6FC0000,0xC80001A5,0xC80001A5,0xC80001A5,0xFF780122,0x1AC01A5,0xFF8C0139,0xFF580091,0xFF240020,0xED000000,0xD7200000,0xD6D00000,0xFF700120,0xFF40007D,0xDB280000,0xC80001A5,0x99FC01A5,0x12401A5,0x12401A5,0x12401A5,0x12401A5,0x12401A5,0x12401A5,0x12401A5,0x12401A5,0x12401A5,0x12401A5,0xD9080001,0xD9080001,0xD9080001,0xD9080001,0xD9080001, -0xD9080001,0xAD080001,0xAD080001,0xAD080001,0x9F040002,0x1B001A5,0x1B001A5,0x1B001A5,0x1B001A5,0x1B001A5,0x1B001A5,0xBAD00000,0xBAD00000,0xBAD00000,0x9EEC0001,0x5DF401A5,0x5DF401A5,0x5DF401A5,0x9E940001,0x900001A5,0xF71C00F2,0x12401A5,0x12401A5,0xFF10004A,0xFD08000A,0xED080001,0xED080001,0xC9080001,0xFD0C00B5,0xFF00003D,0xBD000001,0xBAD00000, -0x31FC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table118[] = { -0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x53FC0000, -0x53FC0000,0x53FC0000,0x53FC0000,0x8A000000,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1140001,0x1280000,0x1280000,0x1280000,0x19C0000,0x25FC0000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000, -0x1F40000,0x7FF80000,0x7FF80000,0x7FF80000,0xA6000001,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x1F40000,0x7FF80000,0x7FF80000,0x7FF80000,0xA6000001,0x7FF80000,0x7FF80000,0x7FF80000,0xA6000001,0xA6000001,0xB640000,0x1500000,0x1500000,0x7840000,0x1A40000,0x1C80000,0x1C80000,0x2BFC0000,0x7840000,0x1A40000,0x5DF80000,0x7FF80000, -0x5DF80000,0x1840000,0x1840000,0x1840000,0x1840000,0x47FC0000,0x47FC0000,0x47FC0000,0xA5F80000,0xA5F80000,0xC0000001,0x47FC0000,0x47FC0000,0x47FC0000,0xA5F80000,0xA5F80000,0xC0000001,0xA5F80000,0xA5F80000,0xC0000001,0xC0000001,0x47FC0000,0x47FC0000,0x47FC0000,0xA5F80000,0xA5F80000,0xC0000001,0xA5F80000,0xA5F80000,0xC0000001,0xC0000001,0xA5F80000, -0xA5F80000,0xC0000001,0xC0000001,0xC0000001,0x1C40000,0x79C0000,0x1840000,0x1BFC0000,0x6BFC0000,0x8DFC0000,0x99FC0000,0xADF80000,0x5E40000,0x47FC0000,0x8DFC0000,0xC0000001,0x8DFC0000,0x1BC0001,0x9FFC0000,0xD1F80000,0xDE000000,0x9FFC0000,0xD1F80000,0xDE000000,0xD1F80000,0xDE000000,0xDE000000,0x9FFC0000,0xD1F80000,0xDE000000,0xD1F80000,0xDE000000, -0xDE000000,0xD1F80000,0xDE000000,0xDE000000,0xDE000000,0x9FFC0000,0xD1F80000,0xDE000000,0xD1F80000,0xDE000000,0xDE000000,0xD1F80000,0xDE000000,0xDE000000,0xDE000000,0xD1F80000,0xDE000000,0xDE000000,0xDE000000,0xDE000000,0x63FC0000,0x1DC0000,0x1DC0000,0xB1FC0000,0xC9FC0000,0xD7F80000,0xDE000000,0xDE000000,0x89FC0000,0xBBFC0000,0xDDEC0000,0xDE000000, -0xC3FC0000,0x1680F1E,0xFF5C0861,0xFB4C05ED,0xDF4C05ED,0xFF4405BA,0xFF3801D5,0xE33C0286,0xEB34034A,0xDD3001BD,0xD134034A,0xFF300786,0xFF1C017C,0xE528020F,0xF11001B6,0xDD180006,0xD11C01BE,0xDF1405EB,0xD70C020D,0xCB0C028A,0xC31405EB,0x1FFC0F1A,0xFEFC0651,0xDF1805EA,0xFED4035A,0xE0F001B2,0xD0FC034A,0xF6B005EB,0xDEC00153,0xCED001C3,0xC2E005EA,0x91FC0F1A, -0xDE6805EA,0xD04C034A,0xC23005EA,0xB4000F1A,0xFF58097A,0xFB640D0E,0xFD680D89,0xFF400463,0xFF2400D1,0xF3180004,0xE1200025,0xDB140023,0xFF440933,0xFF3003D3,0xE3080159,0xCED001C3,0x75FC0F1A,0x18405ED,0xFF740248,0xF36C0154,0xDF680154,0xFF5C0239,0xF9500002,0xDF580029,0xE35001A5,0xD94C004C,0xD15001A5,0x49FC05EA,0xFF280163,0xDF400152,0xF70401A5,0xDD180005, -0xD12801A5,0xA7F805EA,0xDEBC0152,0xD0A801A5,0xC20005EA,0x49FC05EA,0xFF280163,0xDF400152,0xF70401A5,0xDD180005,0xD12801A5,0xA7F805EA,0xDEBC0152,0xD0A801A5,0xC20005EA,0xA7F805EA,0xDEBC0152,0xD0A801A5,0xC20005EA,0xC20005EA,0xFD7403F6,0xF980051D,0xFB840515,0xFF540209,0xFF30007D,0xF3180003,0xDF280001,0xDD00000C,0xFF5C040A,0xFF4801C4,0xE4FC0152,0xD0A801A5, -0x8FFC05EA,0x14C05ED,0x14C05ED,0x14C05ED,0x14C05ED,0xFD3801D5,0xFD3801D5,0xFD3801D5,0xD53401A5,0xD53401A5,0xC13401A6,0xFF1C0163,0xFF1C0163,0xFF1C0163,0xDD180002,0xDD180002,0xC320004B,0xC9180153,0xC9180153,0xBD14002A,0xB5140153,0x1F005EA,0x1F005EA,0x1F005EA,0xE8E401A5,0xE8E401A5,0xC10C01A6,0xDACC0153,0xDACC0153,0xC2E00001,0xB4EC0152,0x7DF805EA, -0x7DF805EA,0xC08C01A6,0xB4680152,0xA60005EA,0xFF3C035D,0xFD4804C1,0x14C05ED,0xFF2C0182,0xFF200033,0xF1180002,0xE918000B,0xD5140002,0xFF340322,0xFF1C014E,0xDF0C0153,0xC2E00001,0x5BFC05EA,0x1680154,0x1680154,0x1680154,0x1680154,0xF7500000,0xF7500000,0xF7500000,0xCF500000,0xCF500000,0xC1500001,0x1FFC0152,0x1FFC0152,0x1FFC0152,0xD9200001,0xD9200001, -0xC1380001,0x91FC0152,0x91FC0152,0xC0E80001,0xB4000152,0x1FFC0152,0x1FFC0152,0x1FFC0152,0xD9200001,0xD9200001,0xC1380001,0x91FC0152,0x91FC0152,0xC0E80001,0xB4000152,0x91FC0152,0x91FC0152,0xC0E80001,0xB4000152,0xB4000152,0xFF5800B5,0xFB6400DD,0x1680154,0xFF440062,0xFD28000A,0xEF1C0000,0xE1280000,0xD7100000,0xFD4C00C8,0xFF300059,0x75FC0152,0xC0E80001, -0x75FC0152,0x1A001A5,0xFF8C0034,0xED840000,0xDF840000,0x71FC01A5,0xFB4C0000,0xDF680000,0xB9FC01A5,0xDF0C0000,0xD00001A5,0x71FC01A5,0xFB4C0000,0xDF680000,0xB9FC01A5,0xDF0C0000,0xD00001A5,0xB9FC01A5,0xDF0C0000,0xD00001A5,0xD00001A5,0x71FC01A5,0xFB4C0000,0xDF680000,0xB9FC01A5,0xDF0C0000,0xD00001A5,0xB9FC01A5,0xDF0C0000,0xD00001A5,0xD00001A5,0xB9FC01A5, -0xDF0C0000,0xD00001A5,0xD00001A5,0xD00001A5,0xFD900139,0x1BC01A5,0xF79C0154,0xFF6C00AA,0xFF3C003A,0xF5100000,0xDF300000,0xDEE00000,0xF7880139,0xFF6400A2,0xE3380000,0xD00001A5,0xA7FC01A5,0x13401A5,0x13401A5,0x13401A5,0x13401A5,0x13401A5,0x13401A5,0x13401A5,0x13401A5,0x13401A5,0x13401A5,0xE1180001,0xE1180001,0xE1180001,0xE1180001,0xE1180001, -0xE1180001,0xB5180001,0xB5180001,0xB5180001,0xA7140002,0x1C801A5,0x1C801A5,0x1C801A5,0x1C801A5,0x1C801A5,0x1C801A5,0xC2E00000,0xC2E00000,0xC2E00000,0xA6FC0001,0x67FC01A5,0x67FC01A5,0x67FC01A5,0xA6A40001,0x980001A5,0xFF2C00F2,0x13401A5,0x13401A5,0xFF200059,0xFD1C0012,0xF5180001,0xF5180001,0xD1180001,0xFD1C00C8,0xFF140048,0xC5100001,0xC2E00000, -0x3FFC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table119[] = { -0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x5FF80000, -0x5FF80000,0x5FF80000,0x5FF80000,0x92000000,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x5380000,0x5380000,0x5380000,0x1B40000,0x35FC0000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0x1600000,0xFFC0000,0xFFC0000,0xFFC0000,0xFFC0000,0xFFC0000, -0xFFC0000,0x8BF80000,0x8BF80000,0x8BF80000,0xAE000001,0xFFC0000,0xFFC0000,0xFFC0000,0xFFC0000,0xFFC0000,0xFFC0000,0x8BF80000,0x8BF80000,0x8BF80000,0xAE000001,0x8BF80000,0x8BF80000,0x8BF80000,0xAE000001,0xAE000001,0x1780000,0x1600000,0x1600000,0x3980000,0x1B80000,0x3DC0000,0x3DC0000,0x3DFC0000,0x3980000,0x1B80000,0x6BFC0000,0x8BF80000, -0x6BFC0000,0x1940000,0x1940000,0x1940000,0x1940000,0x5FFC0000,0x5FFC0000,0x5FFC0000,0xB1F80000,0xB1F80000,0xC8000001,0x5FFC0000,0x5FFC0000,0x5FFC0000,0xB1F80000,0xB1F80000,0xC8000001,0xB1F80000,0xB1F80000,0xC8000001,0xC8000001,0x5FFC0000,0x5FFC0000,0x5FFC0000,0xB1F80000,0xB1F80000,0xC8000001,0xB1F80000,0xB1F80000,0xC8000001,0xC8000001,0xB1F80000, -0xB1F80000,0xC8000001,0xC8000001,0xC8000001,0x3D40000,0xFAC0000,0x1940000,0x39FC0000,0x7DFC0000,0x9DF80000,0xA7F80000,0xB7FC0000,0x5F80000,0x5FFC0000,0x9DF80000,0xC8000001,0x9DF80000,0x1CC0001,0xB7FC0000,0xDDF40000,0xE6000000,0xB7FC0000,0xDDF40000,0xE6000000,0xDDF40000,0xE6000000,0xE6000000,0xB7FC0000,0xDDF40000,0xE6000000,0xDDF40000,0xE6000000, -0xE6000000,0xDDF40000,0xE6000000,0xE6000000,0xE6000000,0xB7FC0000,0xDDF40000,0xE6000000,0xDDF40000,0xE6000000,0xE6000000,0xDDF40000,0xE6000000,0xE6000000,0xE6000000,0xDDF40000,0xE6000000,0xE6000000,0xE6000000,0xE6000000,0x8BFC0000,0x1EC0000,0x1EC0000,0xC5FC0000,0xD7FC0000,0xE1FC0000,0xE6000000,0xE6000000,0xA7FC0000,0xCDFC0000,0xE5FC0000,0xE6000000, -0xD3FC0000,0x1780F1E,0xFF6808D9,0xFF6005F1,0xE75C05ED,0xFF5C0662,0xFF480215,0xEB4C0286,0xF344034A,0xE54001BD,0xD944034A,0xFF4407F7,0xFF3401DC,0xED38020F,0xF92001B6,0xE5280006,0xD92C01BE,0xE72405EB,0xDF1C020D,0xD31C028A,0xCB2405EB,0x37FC0F1A,0xFF1406B1,0xE72805EA,0xFEF00392,0xE90001B2,0xD90C034A,0xFEC005EB,0xE6D00153,0xD6E001C3,0xCAF005EA,0x9DFC0F1A, -0xE67805EA,0xD85C034A,0xCA4005EA,0xBC000F1A,0xFF6409F4,0xFF6C0D5E,0xF5780DD6,0xFF54052A,0xFF380175,0xFB280004,0xE9300025,0xE3240023,0xFF5C09AA,0xFF40048F,0xEB180159,0xD6E001C3,0x83FC0F1A,0x19405ED,0xFF8402A5,0xFB7C0154,0xE7780154,0xFF740279,0xFF600004,0xE7680029,0xEB6001A5,0xE15C004C,0xD96001A5,0x63FC05EA,0xFF40018B,0xE7500152,0xFF1401A5,0xE5280005, -0xD93801A5,0xB3F805EA,0xE6CC0152,0xD8B801A5,0xCA0005EA,0x63FC05EA,0xFF40018B,0xE7500152,0xFF1401A5,0xE5280005,0xD93801A5,0xB3F805EA,0xE6CC0152,0xD8B801A5,0xCA0005EA,0xB3F805EA,0xE6CC0152,0xD8B801A5,0xCA0005EA,0xCA0005EA,0xFF800435,0xFF8C052D,0xFF8C053D,0xFF6C026A,0xFF4800CE,0xFB280003,0xE7380001,0xE510000C,0xFF78042A,0xFF5C022E,0xED0C0152,0xD8B801A5, -0x9FF805EA,0x15C05ED,0x15C05ED,0x15C05ED,0x15C05ED,0xFF4801F1,0xFF4801F1,0xFF4801F1,0xDD4401A5,0xDD4401A5,0xC94401A6,0xFF30017D,0xFF30017D,0xFF30017D,0xE5280002,0xE5280002,0xCB30004B,0xD1280153,0xD1280153,0xC524002A,0xBD240153,0xDFC05EA,0xDFC05EA,0xDFC05EA,0xF0F401A5,0xF0F401A5,0xC91C01A6,0xE2DC0153,0xE2DC0153,0xCAF00001,0xBCFC0152,0x89F805EA, -0x89F805EA,0xC89C01A6,0xBC780152,0xAE0005EA,0xFF4C038A,0xF75C04EE,0x15C05ED,0xFF4401C3,0xFF300062,0xF9280002,0xF128000B,0xDD240002,0xFF3C0371,0xFF300182,0xE71C0153,0xCAF00001,0x69FC05EA,0x1780154,0x1780154,0x1780154,0x1780154,0xFF600000,0xFF600000,0xFF600000,0xD7600000,0xD7600000,0xC9600001,0x37FC0152,0x37FC0152,0x37FC0152,0xE1300001,0xE1300001, -0xC9480001,0x9DFC0152,0x9DFC0152,0xC8F80001,0xBC000152,0x37FC0152,0x37FC0152,0x37FC0152,0xE1300001,0xE1300001,0xC9480001,0x9DFC0152,0x9DFC0152,0xC8F80001,0xBC000152,0x9DFC0152,0x9DFC0152,0xC8F80001,0xBC000152,0xBC000152,0xFB6C00C8,0xF37400F4,0x1780154,0xFD580071,0xFF400012,0xF72C0000,0xE9380000,0xDF200000,0xF56400DD,0xFF480064,0x83FC0152,0xC8F80001, -0x83FC0152,0x1B001A5,0xFF9C0061,0xF5940000,0xE7940000,0x89FC01A5,0xFF640002,0xE7780000,0xC5FC01A5,0xE71C0000,0xD80001A5,0x89FC01A5,0xFF640002,0xE7780000,0xC5FC01A5,0xE71C0000,0xD80001A5,0xC5FC01A5,0xE71C0000,0xD80001A5,0xD80001A5,0x89FC01A5,0xFF640002,0xE7780000,0xC5FC01A5,0xE71C0000,0xD80001A5,0xC5FC01A5,0xE71C0000,0xD80001A5,0xD80001A5,0xC5FC01A5, -0xE71C0000,0xD80001A5,0xD80001A5,0xD80001A5,0xF7A40152,0x1CC01A5,0xFFAC0154,0xFF8400D0,0xFF640062,0xFD200000,0xE7400000,0xE6F00000,0xFF980139,0xFD8000C8,0xEB480000,0xD80001A5,0xB7FC01A5,0x14401A5,0x14401A5,0x14401A5,0x14401A5,0x14401A5,0x14401A5,0x14401A5,0x14401A5,0x14401A5,0x14401A5,0xE9280001,0xE9280001,0xE9280001,0xE9280001,0xE9280001, -0xE9280001,0xBD280001,0xBD280001,0xBD280001,0xAF240002,0x1E001A5,0x1E001A5,0x1E001A5,0x1E001A5,0x1E001A5,0x1E001A5,0xCAF00000,0xCAF00000,0xCAF00000,0xAF0C0001,0x73FC01A5,0x73FC01A5,0x73FC01A5,0xAEB40001,0xA00001A5,0xF73C0109,0x14401A5,0x14401A5,0xFB340071,0xFB2C0022,0xFD280001,0xFD280001,0xD9280001,0xF93000DD,0xFF200061,0xCD200001,0xCAF00000, -0x4FFC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table120[] = { -0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x6DF80000, -0x6DF80000,0x6DF80000,0x6DF80000,0x9A000001,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x14C0000,0x14C0000,0x14C0000,0x1D00000,0x45FC0000,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x1700001,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000, -0x2BFC0000,0x97FC0000,0x97FC0000,0x97FC0000,0xB8000000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x2BFC0000,0x97FC0000,0x97FC0000,0x97FC0000,0xB8000000,0x97FC0000,0x97FC0000,0x97FC0000,0xB8000000,0xB8000000,0xD880000,0x1700001,0x1700001,0x5AC0000,0x1D00000,0x1F80000,0x1F80000,0x53FC0000,0x5AC0000,0x1D00000,0x7DF80000,0x97FC0000, -0x7DF80000,0x1A40001,0x1A40001,0x1A40001,0x1A40001,0x7BFC0000,0x7BFC0000,0x7BFC0000,0xBFF80000,0xBFF80000,0xD2000000,0x7BFC0000,0x7BFC0000,0x7BFC0000,0xBFF80000,0xBFF80000,0xD2000000,0xBFF80000,0xBFF80000,0xD2000000,0xD2000000,0x7BFC0000,0x7BFC0000,0x7BFC0000,0xBFF80000,0xBFF80000,0xD2000000,0xBFF80000,0xBFF80000,0xD2000000,0xD2000000,0xBFF80000, -0xBFF80000,0xD2000000,0xD2000000,0xD2000000,0x7E80000,0x9C00000,0x1A40001,0x5BFC0000,0x93FC0000,0xADFC0000,0xB5FC0000,0xC3FC0000,0x29FC0000,0x7BFC0000,0xADFC0000,0xD2000000,0xADFC0000,0x1E00000,0xD3FC0000,0xE9FC0000,0xEE000001,0xD3FC0000,0xE9FC0000,0xEE000001,0xE9FC0000,0xEE000001,0xEE000001,0xD3FC0000,0xE9FC0000,0xEE000001,0xE9FC0000,0xEE000001, -0xEE000001,0xE9FC0000,0xEE000001,0xEE000001,0xEE000001,0xD3FC0000,0xE9FC0000,0xEE000001,0xE9FC0000,0xEE000001,0xEE000001,0xE9FC0000,0xEE000001,0xEE000001,0xEE000001,0xE9FC0000,0xEE000001,0xEE000001,0xEE000001,0xEE000001,0xB7FC0000,0x17FC0000,0x17FC0000,0xDBFC0000,0xE7F80000,0xEDF40000,0xEE000001,0xEE000001,0xC9FC0000,0xE1FC0000,0xEFF00000,0xEE000001, -0xE3FC0000,0x18C0F1A,0xFF800983,0xFF70062A,0xEF7005EB,0xFF680746,0xFF6002AB,0xF35C028A,0xFD54034A,0xED5401BE,0xE154034A,0xFF5C0899,0xFF4C028C,0xF548020D,0xFF3801BA,0xEF380006,0xE33C01BD,0xEF3805EA,0xE72C020F,0xDD300286,0xD33805ED,0x53FC0F1A,0xFF38073B,0xF13805EB,0xFF140416,0xF11001B6,0xE120034A,0xFEE40615,0xEEE40154,0xDEF001C5,0xD50005ED,0xABF80F1A, -0xEE8C05EB,0xE070034A,0xD25405ED,0xC4000F1E,0xFF780A99,0xFD880D72,0xFD880DDA,0xFF6C0649,0xFF500261,0xFF3C001F,0xF1400023,0xEB340025,0xFF700A53,0xFF54059A,0xF52C015B,0xDEF001C5,0x95FC0F1A,0x1A805EA,0xFF980303,0xFF8C0162,0xEF8C0153,0xFF8C02DE,0xFF78003B,0xF17C002A,0xF57001A6,0xEB70004B,0xE17001A6,0x7DFC05EA,0xFF5801E4,0xEF640153,0xFF3801BA,0xEF3C0002, -0xE14C01A5,0xBFFC05EA,0xEEE40153,0xE0CC01A5,0xD20005ED,0x7DFC05EA,0xFF5801E4,0xEF640153,0xFF3801BA,0xEF3C0002,0xE14C01A5,0xBFFC05EA,0xEEE40153,0xE0CC01A5,0xD20005ED,0xBFFC05EA,0xEEE40153,0xE0CC01A5,0xD20005ED,0xD20005ED,0xFF94046D,0xFBA4054E,0xFBA4054B,0xFF8002E8,0xFF680146,0xFF400011,0xF14C0002,0xEF24000B,0xFD940481,0xFF7402C9,0xF71C0152,0xE0CC01A5, -0xAFFC05EA,0x17005EA,0x17005EA,0x17005EA,0x17005EA,0xFF5C0221,0xFF5C0221,0xFF5C0221,0xE75401A5,0xE75401A5,0xD35401A5,0xFF4401A8,0xFF4401A8,0xFF4401A8,0xED380005,0xED380005,0xD544004C,0xDB380152,0xDB380152,0xCF380029,0xC5380154,0x29FC05EA,0x29FC05EA,0x29FC05EA,0xF90801A5,0xF90801A5,0xD32C01A5,0xECEC0153,0xECEC0153,0xD3000002,0xC5100154,0x97F805EA, -0x97F805EA,0xD2AC01A5,0xC4900154,0xB60005ED,0xFD6403CC,0xFF6C04ED,0x17005EA,0xFF540211,0xFF4400AD,0xFF3C0006,0xFB3C000C,0xE7380001,0xFF58039E,0xFD4C01E2,0xF12C0152,0xD3000002,0x7BFC05EA,0x18C0152,0x18C0152,0x18C0152,0x18C0152,0xFF740005,0xFF740005,0xFF740005,0xDF740001,0xDF740001,0xD3700001,0x53FC0152,0x53FC0152,0x53FC0152,0xE9440001,0xE9440001, -0xD3580000,0xABF80152,0xABF80152,0xD3080000,0xC4000154,0x53FC0152,0x53FC0152,0x53FC0152,0xE9440001,0xE9440001,0xD3580000,0xABF80152,0xABF80152,0xD3080000,0xC4000154,0xABF80152,0xABF80152,0xD3080000,0xC4000154,0xC4000154,0xF78000DD,0xFD8800F2,0x18C0152,0xFB700091,0xFF540028,0xFF400001,0xF3480000,0xE7340000,0xFD7400DD,0xFF5C007D,0x95FC0152,0xD3080000, -0x95FC0152,0x1C401A5,0xFFB00092,0xFDA80001,0xEFA40002,0xA5FC01A5,0xFF880020,0xEF8C0001,0xD3FC01A5,0xEF340001,0xE00001A5,0xA5FC01A5,0xFF880020,0xEF8C0001,0xD3FC01A5,0xEF340001,0xE00001A5,0xD3FC01A5,0xEF340001,0xE00001A5,0xE00001A5,0xA5FC01A5,0xFF880020,0xEF8C0001,0xD3FC01A5,0xEF340001,0xE00001A5,0xD3FC01A5,0xEF340001,0xE00001A5,0xE00001A5,0xD3FC01A5, -0xEF340001,0xE00001A5,0xE00001A5,0xE00001A5,0xFFB40154,0x1E001A5,0xF9C0016D,0xFFA000FA,0xFF7C0092,0xFF48000A,0xEF540001,0xEF080001,0xFFAC015A,0xFF9400E9,0xF5580000,0xE00001A5,0xC7FC01A5,0x15401A5,0x15401A5,0x15401A5,0x15401A5,0x15401A5,0x15401A5,0x15401A5,0x15401A5,0x15401A5,0x15401A5,0xF3380000,0xF3380000,0xF3380000,0xF3380000,0xF3380000, -0xF3380000,0xC7380000,0xC7380000,0xC7380000,0xB9380000,0x1F801A5,0x1F801A5,0x1F801A5,0x1F801A5,0x1F801A5,0x1F801A5,0xD5000000,0xD5000000,0xD5000000,0xB91C0000,0x81FC01A5,0x81FC01A5,0x81FC01A5,0xB8C00000,0xAA0001A5,0xFF4C010D,0x15401A5,0x15401A5,0xFD480080,0xFD400034,0xFF3C0005,0xFF3C0005,0xE3380000,0xFF3C00E9,0xFF3C0071,0xD7300000,0xD5000000, -0x5FFC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table121[] = { -0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0x79F80000, -0x79F80000,0x79F80000,0x79F80000,0xA2000001,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x1480000,0x75C0000,0x75C0000,0x75C0000,0x1E80000,0x55FC0000,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x1800001,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000, -0x43FC0000,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xC0000000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0x43FC0000,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xC0000000,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xC0000000,0xC0000000,0x19C0000,0x1800001,0x1800001,0x1C00000,0x1E40000,0x17FC0000,0x17FC0000,0x67FC0000,0x1C00000,0x1E40000,0x8BFC0000,0xA3FC0000, -0x8BFC0000,0x1B40001,0x1B40001,0x1B40001,0x1B40001,0x93FC0000,0x93FC0000,0x93FC0000,0xCBF80000,0xCBF80000,0xDA000000,0x93FC0000,0x93FC0000,0x93FC0000,0xCBF80000,0xCBF80000,0xDA000000,0xCBF80000,0xCBF80000,0xDA000000,0xDA000000,0x93FC0000,0x93FC0000,0x93FC0000,0xCBF80000,0xCBF80000,0xDA000000,0xCBF80000,0xCBF80000,0xDA000000,0xDA000000,0xCBF80000, -0xCBF80000,0xDA000000,0xDA000000,0xDA000000,0x3FC0000,0x1D40000,0x1B40001,0x79FC0000,0xA7FC0000,0xBDF80000,0xC3FC0000,0xCFF80000,0x51FC0000,0x93FC0000,0xBDF80000,0xDA000000,0xBDF80000,0x1F00000,0xEBFC0000,0xF5FC0000,0xF6000001,0xEBFC0000,0xF5FC0000,0xF6000001,0xF5FC0000,0xF6000001,0xF6000001,0xEBFC0000,0xF5FC0000,0xF6000001,0xF5FC0000,0xF6000001, -0xF6000001,0xF5FC0000,0xF6000001,0xF6000001,0xF6000001,0xEBFC0000,0xF5FC0000,0xF6000001,0xF5FC0000,0xF6000001,0xF6000001,0xF5FC0000,0xF6000001,0xF6000001,0xF6000001,0xF5FC0000,0xF6000001,0xF6000001,0xF6000001,0xF6000001,0xDFFC0000,0x97FC0000,0x97FC0000,0xEFFC0000,0xF5F80000,0xF7F40000,0xF6000001,0xF6000001,0xE7FC0000,0xF1FC0000,0xF9C40000,0xF6000001, -0xF3FC0000,0x19C0F1A,0xFF8C0A17,0xFF84069F,0xF78005EB,0xFF8007FE,0xFF700371,0xFB6C028A,0xFF640362,0xF56401BE,0xE964034A,0xFF740939,0xFF60036B,0xFD58020D,0xFF4C021D,0xF7480006,0xEB4C01BD,0xF74805EA,0xEF3C020F,0xE5400286,0xDB4805ED,0x6BFC0F1A,0xFF4C07E9,0xF94805EB,0xFF2C04BE,0xF92001B6,0xE930034A,0xFF080675,0xF6F40154,0xE70001C5,0xDD1005ED,0xB7F80F1A, -0xF69C05EB,0xE880034A,0xDA6405ED,0xCC000F1E,0xFF8C0B22,0xF5980DDA,0xF79C0E27,0xFF800738,0xFF680366,0xFF540098,0xF9500023,0xF3440025,0xFF800AF5,0xFF7006AA,0xFD3C015B,0xE70001C5,0xA3FC0F1A,0x1B805EA,0xFFA8037E,0xFFA0019A,0xF79C0153,0xFF980356,0xFF8800A9,0xF98C002A,0xFD8001A6,0xF380004B,0xE98001A6,0x95FC05EA,0xFF7C0248,0xF7740153,0xFF580205,0xF74C0002, -0xE95C01A5,0xCBFC05EA,0xF6F40153,0xE8DC01A5,0xDA0005ED,0x95FC05EA,0xFF7C0248,0xF7740153,0xFF580205,0xF74C0002,0xE95C01A5,0xCBFC05EA,0xF6F40153,0xE8DC01A5,0xDA0005ED,0xCBFC05EA,0xF6F40153,0xE8DC01A5,0xDA0005ED,0xDA0005ED,0xFBAC04B5,0xFFAC057E,0xF5B8057E,0xFF940349,0xFF7C01D6,0xFF5C005E,0xF95C0002,0xF734000B,0xFFA404B5,0xFF900329,0xFF2C0152,0xE8DC01A5, -0xBFF805EA,0x18005EA,0x18005EA,0x18005EA,0x18005EA,0xFF6C0266,0xFF6C0266,0xFF6C0266,0xEF6401A5,0xEF6401A5,0xDB6401A5,0xFF5C01E8,0xFF5C01E8,0xFF5C01E8,0xF5480005,0xF5480005,0xDD54004C,0xE3480152,0xE3480152,0xD7480029,0xCD480154,0x41FC05EA,0x41FC05EA,0x41FC05EA,0xFF1C01A6,0xFF1C01A6,0xDB3C01A5,0xF4FC0153,0xF4FC0153,0xDB100002,0xCD200154,0xA1FC05EA, -0xA1FC05EA,0xDABC01A5,0xCCA00154,0xBE0005ED,0xFF7403FE,0xF77C051E,0x18005EA,0xFF68026D,0xFF5800F9,0xFF50002D,0xFF4C000D,0xEF480001,0xFF6403EA,0xFF5C022E,0xF93C0152,0xDB100002,0x89FC05EA,0x19C0152,0x19C0152,0x19C0152,0x19C0152,0xFD880019,0xFD880019,0xFD880019,0xE7840001,0xE7840001,0xDB800001,0x6BFC0152,0x6BFC0152,0x6BFC0152,0xF1540001,0xF1540001, -0xDB680000,0xB7F80152,0xB7F80152,0xDB180000,0xCC000154,0x6BFC0152,0x6BFC0152,0x6BFC0152,0xF1540001,0xF1540001,0xDB680000,0xB7F80152,0xB7F80152,0xDB180000,0xCC000154,0xB7F80152,0xB7F80152,0xDB180000,0xCC000154,0xCC000154,0xFF9000DD,0xF5980109,0x19C0152,0xFD8400A2,0xFF70003D,0xFF58000A,0xFB580000,0xEF440000,0xFD8800F2,0xFF740095,0xA3FC0152,0xDB180000, -0xA3FC0152,0x1D401A5,0xFFC400C1,0xFFB80011,0xF7B40002,0xBDFC01A5,0xFFA00050,0xF79C0001,0xDFF801A5,0xF7440001,0xE80001A5,0xBDFC01A5,0xFFA00050,0xF79C0001,0xDFF801A5,0xF7440001,0xE80001A5,0xDFF801A5,0xF7440001,0xE80001A5,0xE80001A5,0xBDFC01A5,0xFFA00050,0xF79C0001,0xDFF801A5,0xF7440001,0xE80001A5,0xDFF801A5,0xF7440001,0xE80001A5,0xE80001A5,0xDFF801A5, -0xF7440001,0xE80001A5,0xE80001A5,0xE80001A5,0xFDCC016D,0x1F001A5,0xFFCC0179,0xFDBC0122,0xFFA800C8,0xFF70003A,0xF7640001,0xF7180001,0xF7CC016D,0xFFB80120,0xFD680000,0xE80001A5,0xD7FC01A5,0x16401A5,0x16401A5,0x16401A5,0x16401A5,0x16401A5,0x16401A5,0x16401A5,0x16401A5,0x16401A5,0x16401A5,0xFB480000,0xFB480000,0xFB480000,0xFB480000,0xFB480000, -0xFB480000,0xCF480000,0xCF480000,0xCF480000,0xC1480000,0x15FC01A5,0x15FC01A5,0x15FC01A5,0x15FC01A5,0x15FC01A5,0x15FC01A5,0xDD100000,0xDD100000,0xDD100000,0xC12C0000,0x8DFC01A5,0x8DFC01A5,0x8DFC01A5,0xC0D00000,0xB20001A5,0xF9600120,0x16401A5,0x16401A5,0xFF580091,0xFF500041,0xFF4C000D,0xFF4C000D,0xEB480000,0xFD5400F2,0xFD4C0082,0xDF400000,0xDD100000, -0x6FFC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table122[] = { -0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x3FC0000,0x85F80000, -0x85F80000,0x85F80000,0x85F80000,0xAA000001,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0xF6C0000,0xF6C0000,0xF6C0000,0x3FC0000,0x63FC0000,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x1900001,0x5BFC0000,0x5BFC0000,0x5BFC0000,0x5BFC0000,0x5BFC0000, -0x5BFC0000,0xAFFC0000,0xAFFC0000,0xAFFC0000,0xC8000000,0x5BFC0000,0x5BFC0000,0x5BFC0000,0x5BFC0000,0x5BFC0000,0x5BFC0000,0xAFFC0000,0xAFFC0000,0xAFFC0000,0xC8000000,0xAFFC0000,0xAFFC0000,0xAFFC0000,0xC8000000,0xC8000000,0x1AC0000,0x1900001,0x1900001,0x1D40000,0x1F80000,0x35FC0000,0x35FC0000,0x7BFC0000,0x1D40000,0x1F80000,0x9BFC0000,0xAFFC0000, -0x9BFC0000,0x1C40001,0x1C40001,0x1C40001,0x1C40001,0xABFC0000,0xABFC0000,0xABFC0000,0xD7F80000,0xD7F80000,0xE2000000,0xABFC0000,0xABFC0000,0xABFC0000,0xD7F80000,0xD7F80000,0xE2000000,0xD7F80000,0xD7F80000,0xE2000000,0xE2000000,0xABFC0000,0xABFC0000,0xABFC0000,0xD7F80000,0xD7F80000,0xE2000000,0xD7F80000,0xD7F80000,0xE2000000,0xE2000000,0xD7F80000, -0xD7F80000,0xE2000000,0xE2000000,0xE2000000,0x3BFC0000,0x1E40000,0x1C40001,0x97FC0000,0xBBFC0000,0xCBFC0000,0xD1FC0000,0xD9FC0000,0x77FC0000,0xABFC0000,0xCBFC0000,0xE2000000,0xCBFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1AC0EC7,0xFF9C0A97,0xFF940717,0xFF9005EA,0xFF9808B1,0xFF84044E,0xFF8002A5,0xFF7C03A7,0xFD7401B9,0xF174032D,0xFF8C09AC,0xFF700455,0xFF680234,0xFF6402A2,0xFF580005,0xF35C01A4,0xFF5805B3,0xF74C01FE,0xED500261,0xE35805B4,0x83FC0EC7,0xFF640876,0xFF5805ED,0xFF4C057F,0xFF3801B9,0xF140032D,0xFF2C06C8,0xFF040153,0xEF1001A4,0xE32005B5,0xC3F80EC7, -0xFEAC05EA,0xF090032D,0xE27405B4,0xD4000EC9,0xFFA00B5D,0xFDA80D89,0xFDA80DDA,0xFF8807F3,0xFF7C0465,0xFF680153,0xFF600030,0xFB54001C,0xFF980B2F,0xFF800755,0xFF50016C,0xEF1001A4,0xB3FC0EC7,0x1C805B3,0xFFBC03B6,0xFFAC01FB,0xFFAC0152,0xFFB00389,0xFFA00122,0xFF98002D,0xFF94019B,0xFB900042,0xF1900189,0xAFFC05B3,0xFF9402A5,0xFF840152,0xFF70024E,0xFF5C0001, -0xF16C0188,0xD7FC05B3,0xFF040152,0xF0EC0188,0xE20005B4,0xAFFC05B3,0xFF9402A5,0xFF840152,0xFF70024E,0xFF5C0001,0xF16C0188,0xD7FC05B3,0xFF040152,0xF0EC0188,0xE20005B4,0xD7FC05B3,0xFF040152,0xF0EC0188,0xE20005B4,0xE20005B4,0xFFBC04B5,0xFBC4054B,0xFBC4054B,0xFFB003A2,0xFF900261,0xFF7C00D1,0xFF6C0004,0xFD480009,0xFFB404BE,0xFFA40388,0xFF50016B,0xF0EC0188, -0xCDFC05B3,0x19005EA,0x19005EA,0x19005EA,0x19005EA,0xFF8002A5,0xFF8002A5,0xFF8002A5,0xF77401A5,0xF77401A5,0xE37401A5,0xFF680234,0xFF680234,0xFF680234,0xFD580005,0xFD580005,0xE564004C,0xEB580152,0xEB580152,0xDF580029,0xD5580154,0x59FC05EA,0x59FC05EA,0x59FC05EA,0xFF3801B9,0xFF3801B9,0xE34C01A5,0xFD0C0153,0xFD0C0153,0xE3200002,0xD5300154,0xADFC05EA, -0xADFC05EA,0xE2CC01A5,0xD4B00154,0xC60005ED,0xFF84042D,0xFF8C051E,0x19005EA,0xFF7802C9,0xFF6C0155,0xFF64006D,0xFF600030,0xF7580001,0xFF780411,0xFF740289,0xFF500153,0xE3200002,0x99FC05EA,0x1AC0152,0x1AC0152,0x1AC0152,0x1AC0152,0xFF98002D,0xFF98002D,0xFF98002D,0xEF940001,0xEF940001,0xE3900001,0x83FC0152,0x83FC0152,0x83FC0152,0xF9640001,0xF9640001, -0xE3780000,0xC3F80152,0xC3F80152,0xE3280000,0xD4000154,0x83FC0152,0x83FC0152,0x83FC0152,0xF9640001,0xF9640001,0xE3780000,0xC3F80152,0xC3F80152,0xE3280000,0xD4000154,0xC3F80152,0xC3F80152,0xE3280000,0xD4000154,0xD4000154,0xFFA000F4,0xFDA80109,0x1AC0152,0xFF9800B5,0xFF840059,0xFF740019,0xFF6C0004,0xF7540000,0xFF940104,0xFB9000B5,0xB3FC0152,0xE3280000, -0xB3FC0152,0x1E0018A,0xFFDC00F2,0xFFCC0049,0xFFC40001,0xD5FC0188,0xFFB80089,0xFFAC0000,0xEBF80188,0xFF540000,0xF0000188,0xD5FC0188,0xFFB80089,0xFFAC0000,0xEBF80188,0xFF540000,0xF0000188,0xEBF80188,0xFF540000,0xF0000188,0xF0000188,0xD5FC0188,0xFFB80089,0xFFAC0000,0xEBF80188,0xFF540000,0xF0000188,0xEBF80188,0xFF540000,0xF0000188,0xF0000188,0xEBF80188, -0xFF540000,0xF0000188,0xF0000188,0xF0000188,0xF3E0016D,0x27FC0188,0xF9E0016D,0xFDD40139,0xFFBC00E9,0xFF980074,0xFF740000,0xFF280000,0xFDD80154,0xFFCC0122,0xFF90000D,0xF0000188,0xE5FC0188,0x17401A5,0x17401A5,0x17401A5,0x17401A5,0x17401A5,0x17401A5,0x17401A5,0x17401A5,0x17401A5,0x17401A5,0xFD580004,0xFD580004,0xFD580004,0xFD580004,0xFD580004, -0xFD580004,0xD7580000,0xD7580000,0xD7580000,0xC9580000,0x2FFC01A5,0x2FFC01A5,0x2FFC01A5,0x2FFC01A5,0x2FFC01A5,0x2FFC01A5,0xE5200000,0xE5200000,0xE5200000,0xC93C0000,0x99FC01A5,0x99FC01A5,0x99FC01A5,0xC8E00000,0xBA0001A5,0xFF6C0128,0x17401A5,0x17401A5,0xFF6800A4,0xFF640055,0xFF5C001D,0xFF5C001D,0xF3580000,0xF9680109,0xFD6000A2,0xE7500000,0xE5200000, -0x7FF801A5,}; -static const uint32_t g_etc1_to_bc7_m6_table123[] = { -0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x91F80000, -0x91F80000,0x91F80000,0x91F80000,0xB2000001,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1800000,0x1800000,0x1800000,0x1BFC0000,0x73FC0000,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x1A00001,0x75FC0000,0x75FC0000,0x75FC0000,0x75FC0000,0x75FC0000, -0x75FC0000,0xBBFC0000,0xBBFC0000,0xBBFC0000,0xD0000000,0x75FC0000,0x75FC0000,0x75FC0000,0x75FC0000,0x75FC0000,0x75FC0000,0xBBFC0000,0xBBFC0000,0xBBFC0000,0xD0000000,0xBBFC0000,0xBBFC0000,0xBBFC0000,0xD0000000,0xD0000000,0x7BC0000,0x1A00001,0x1A00001,0x5E40000,0x1FFC0000,0x53FC0000,0x53FC0000,0x8FFC0000,0x5E40000,0x1FFC0000,0xA9FC0000,0xBBFC0000, -0xA9FC0000,0x1D40001,0x1D40001,0x1D40001,0x1D40001,0xC3FC0000,0xC3FC0000,0xC3FC0000,0xE1FC0000,0xE1FC0000,0xEA000000,0xC3FC0000,0xC3FC0000,0xC3FC0000,0xE1FC0000,0xE1FC0000,0xEA000000,0xE1FC0000,0xE1FC0000,0xEA000000,0xEA000000,0xC3FC0000,0xC3FC0000,0xC3FC0000,0xE1FC0000,0xE1FC0000,0xEA000000,0xE1FC0000,0xE1FC0000,0xEA000000,0xEA000000,0xE1FC0000, -0xE1FC0000,0xEA000000,0xEA000000,0xEA000000,0x75FC0000,0x3F40000,0x1D40001,0xB5FC0000,0xCFFC0000,0xDBFC0000,0xDFF80000,0xE5F40000,0x9FFC0000,0xC3FC0000,0xDBFC0000,0xEA000000,0xDBFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1B80C63,0xFFB0094E,0xFFA006CB,0xFFA005EA,0xFFA4076D,0xFF940436,0xFF8C0301,0xFF880313,0xFF8401A5,0xF584026D,0xFF980818,0xFF88041D,0xFF800284,0xFF7C0222,0xFF6C001D,0xF56C00F0,0xFF6C0438,0xFB600192,0xF1640165,0xE9680428,0x95FC0C63,0xFF7C07BE,0xFF7005ED,0xFF6404BF,0xFF4C01F6,0xF554026D,0xFF380594,0xFF280163,0xF32400D8,0xE9340428,0xCBFC0C63, -0xFEE005EA,0xF4B4026D,0xE88C0428,0xDA000C65,0xFFA809C9,0xFFAC0B85,0xF5B80BDB,0xFF9806E9,0xFF8C041A,0xFF7C0175,0xFF78007A,0xFF680001,0xFFA409B3,0xFF90067F,0xFF640186,0xF32400D8,0xBFF80C63,0x1D0042B,0xFFC402DB,0xFFC001BE,0xFFBC0152,0xFFC4028B,0xFFB400FA,0xFFB00055,0xFFA800F1,0xFDA0000E,0xF5A000C9,0xBDFC0428,0xFFAC022D,0xFF9C0152,0xFF88018E,0xFF780005, -0xF58000C8,0xDFF80428,0xFF340152,0xF50C00C8,0xE8000428,0xBDFC0428,0xFFAC022D,0xFF9C0152,0xFF88018E,0xFF780005,0xF58000C8,0xDFF80428,0xFF340152,0xF50C00C8,0xE8000428,0xDFF80428,0xFF340152,0xF50C00C8,0xE8000428,0xE8000428,0xFDCC0378,0xFFCC03D3,0xFFCC03E3,0xFFC002B6,0xFFA801D5,0xFF9000C1,0xFF880019,0xFF640000,0xFFC4037B,0xFDBC02B6,0xFF700162,0xF50C00C8, -0xD7FC0428,0x1A005EA,0x1A005EA,0x1A005EA,0x1A005EA,0xFF8C0301,0xFF8C0301,0xFF8C0301,0xFF8401A5,0xFF8401A5,0xEB8401A5,0xFF800284,0xFF800284,0xFF800284,0xFF6C001D,0xFF6C001D,0xED74004C,0xF3680152,0xF3680152,0xE7680029,0xDD680154,0x71FC05EA,0x71FC05EA,0x71FC05EA,0xFF4C01F6,0xFF4C01F6,0xEB5C01A5,0xFF280163,0xFF280163,0xEB300002,0xDD400154,0xB9FC05EA, -0xB9FC05EA,0xEADC01A5,0xDCC00154,0xCE0005ED,0xFB980484,0xF79C0551,0x1A005EA,0xFF900321,0xFF8001C1,0xFF7800C2,0xFF78007A,0xFF680001,0xFF900452,0xFF8002F4,0xFF64016D,0xEB300002,0xA7FC05EA,0x1BC0152,0x1BC0152,0x1BC0152,0x1BC0152,0xFFB00055,0xFFB00055,0xFFB00055,0xF7A40001,0xF7A40001,0xEBA00001,0x9BFC0152,0x9BFC0152,0x9BFC0152,0xFF780005,0xFF780005, -0xEB880000,0xCFF80152,0xCFF80152,0xEB380000,0xDC000154,0x9BFC0152,0x9BFC0152,0x9BFC0152,0xFF780005,0xFF780005,0xEB880000,0xCFF80152,0xCFF80152,0xEB380000,0xDC000154,0xCFF80152,0xCFF80152,0xEB380000,0xDC000154,0xDC000154,0xFBB40109,0xF5B80122,0x1BC0152,0xFFA400DA,0xFF98007D,0xFF8C0041,0xFF880019,0xFF640000,0xFDB00109,0xFFA400C8,0xC1FC0152,0xEB380000, -0xC1FC0152,0x1E800CA,0xFFE0007D,0xFFD80025,0xFFD40001,0xE3FC00C8,0xFFCC0049,0xFFC00001,0xF1F800C8,0xFF840000,0xF40000C8,0xE3FC00C8,0xFFCC0049,0xFFC00001,0xF1F800C8,0xFF840000,0xF40000C8,0xF1F800C8,0xFF840000,0xF40000C8,0xF40000C8,0xE3FC00C8,0xFFCC0049,0xFFC00001,0xF1F800C8,0xFF840000,0xF40000C8,0xF1F800C8,0xFF840000,0xF40000C8,0xF40000C8,0xF1F800C8, -0xFF840000,0xF40000C8,0xF40000C8,0xF40000C8,0xF7E800B5,0x67FC00C8,0xFDE800B5,0xFFD8009D,0xFFD00075,0xFFC0003D,0xFF9C0000,0xFF640000,0xFFDC00B4,0xFFD80095,0xFFB00008,0xF40000C8,0xEDFC00C8,0x18401A5,0x18401A5,0x18401A5,0x18401A5,0x18401A5,0x18401A5,0x18401A5,0x18401A5,0x18401A5,0x18401A5,0xFF6C000D,0xFF6C000D,0xFF6C000D,0xFF6C000D,0xFF6C000D, -0xFF6C000D,0xDF680000,0xDF680000,0xDF680000,0xD1680000,0x47FC01A5,0x47FC01A5,0x47FC01A5,0x47FC01A5,0x47FC01A5,0x47FC01A5,0xED300000,0xED300000,0xED300000,0xD14C0000,0xA5F801A5,0xA5F801A5,0xA5F801A5,0xD0F00000,0xC20001A5,0xF9800139,0x18401A5,0x18401A5,0xFB7C00C8,0xFF780071,0xFF700034,0xFF700034,0xFB680000,0xFF74010D,0xFD7400B5,0xEF600000,0xED300000, -0x8DFC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table124[] = { -0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x9DFC0000, -0x9DFC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1940000,0x1940000,0x1940000,0x37FC0000,0x83FC0000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000, -0x8FFC0000,0xC9F80000,0xC9F80000,0xC9F80000,0xD8000001,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0x8FFC0000,0xC9F80000,0xC9F80000,0xC9F80000,0xD8000001,0xC9F80000,0xC9F80000,0xC9F80000,0xD8000001,0xD8000001,0x1D00000,0x1B40000,0x1B40000,0x1FC0000,0x4BFC0000,0x75FC0000,0x75FC0000,0xA5FC0000,0x1FC0000,0x4BFC0000,0xBBFC0000,0xC9F80000, -0xBBFC0000,0x1E80000,0x1E80000,0x1E80000,0x1E80000,0xDFFC0000,0xDFFC0000,0xDFFC0000,0xEFFC0000,0xEFFC0000,0xF2000001,0xDFFC0000,0xDFFC0000,0xDFFC0000,0xEFFC0000,0xEFFC0000,0xF2000001,0xEFFC0000,0xEFFC0000,0xF2000001,0xF2000001,0xDFFC0000,0xDFFC0000,0xDFFC0000,0xEFFC0000,0xEFFC0000,0xF2000001,0xEFFC0000,0xEFFC0000,0xF2000001,0xF2000001,0xEFFC0000, -0xEFFC0000,0xF2000001,0xF2000001,0xF2000001,0xB5FC0000,0x57FC0000,0x1E80000,0xD7FC0000,0xE5FC0000,0xEBFC0000,0xEDFC0000,0xF1F80000,0xCBFC0000,0xDFFC0000,0xEBFC0000,0xF2000001,0xEBFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1C40A26,0xFFBC0819,0xFFB4067D,0xFFB005ED,0xFFB00662,0xFFA8043D,0xFFA40362,0xFFA002BE,0xFF9801C9,0xF99801E2,0xFFA406D7,0xFF9C0415,0xFF9802FB,0xFF8801E3,0xFF84007A,0xFB840073,0xFF84031B,0xFD780159,0xF77000A2,0xEF7802D3,0xA9FC0A26,0xFF940717,0xFF8C05EA,0xFF7C0432,0xFF70024D,0xF96801E2,0xFF640489,0xFF4C01A8,0xF93C003F,0xEF4802D2,0xD5F80A26, -0xFF1405EA,0xF8D801E2,0xEEAC02D2,0xE2000A26,0xFFBC082A,0xFBC40972,0xFBC409B6,0xFFAC061E,0xFFA003F3,0xFF9001DC,0xFF8C00FA,0xFF7C0029,0xFFB407F7,0xFFA405EA,0xFF7C01C3,0xF93C003F,0xC9FC0A26,0x1DC02D5,0xFFD40228,0xFFCC0194,0xFFCC0154,0xFFD001B5,0xFFC400E4,0xFFC40080,0xFFB8007D,0xFFB40001,0xF9B4003D,0xCFFC02D2,0xFFC001C8,0xFFB80152,0xFFAC00FE,0xFF940025, -0xF994003E,0xE7FC02D2,0xFF6C0152,0xF934003D,0xEE0002D2,0xCFFC02D2,0xFFC001C8,0xFFB80152,0xFFAC00FE,0xFF940025,0xF994003E,0xE7FC02D2,0xFF6C0152,0xF934003D,0xEE0002D2,0xE7FC02D2,0xFF6C0152,0xF934003D,0xEE0002D2,0xEE0002D2,0xFFD80263,0xF7DC02A9,0xF7DC02B4,0xFDD00209,0xFFC0016A,0xFFAC00B1,0xFFA8003D,0xFF8C0019,0xFDD8026D,0xFFC801D6,0xFF98015B,0xF934003D, -0xE1FC02D2,0x1B005ED,0x1B005ED,0x1B005ED,0x1B005ED,0xFFA40362,0xFFA40362,0xFFA40362,0xFF9801C9,0xFF9801C9,0xF39801A6,0xFF9802FB,0xFF9802FB,0xFF9802FB,0xFF84007A,0xFF84007A,0xF584004B,0xFB7C0153,0xFB7C0153,0xEF78002A,0xE7780153,0x8DFC05EA,0x8DFC05EA,0x8DFC05EA,0xFF70024D,0xFF70024D,0xF37001A6,0xFF4C01A8,0xFF4C01A8,0xF5440001,0xE7500152,0xC7FC05EA, -0xC7FC05EA,0xF2F001A6,0xE6CC0152,0xD80005EA,0xFFAC04B2,0xFFAC055A,0x1B005ED,0xFFA403A1,0xFF940252,0xFF900163,0xFF8C00FA,0xFF7C0029,0xFFA00491,0xFF980365,0xFF7C01B3,0xF5440001,0xB9FC05EA,0x1CC0154,0x1CC0154,0x1CC0154,0x1CC0154,0xFFC40080,0xFFC40080,0xFFC40080,0xFFB40001,0xFFB40001,0xF3B40001,0xB7FC0152,0xB7FC0152,0xB7FC0152,0xFF940025,0xFF940025, -0xF39C0001,0xDDF40152,0xDDF40152,0xF34C0001,0xE6000152,0xB7FC0152,0xB7FC0152,0xB7FC0152,0xFF940025,0xFF940025,0xF39C0001,0xDDF40152,0xDDF40152,0xF34C0001,0xE6000152,0xDDF40152,0xDDF40152,0xF34C0001,0xE6000152,0xE6000152,0xFDC80120,0xFFCC0120,0x1CC0154,0xFDC000F4,0xFFB000B4,0xFFA8006A,0xFFA8003D,0xFF8C0019,0xFFC40120,0xFBC000F2,0xD3FC0152,0xF34C0001, -0xD3FC0152,0x1F4003D,0xFFEC0028,0xFFEC000D,0xFFE80000,0xEFFC003D,0xFFE40014,0xFFDC0000,0xF7F8003D,0xFFB80000,0xF800003D,0xEFFC003D,0xFFE40014,0xFFDC0000,0xF7F8003D,0xFFB80000,0xF800003D,0xF7F8003D,0xFFB80000,0xF800003D,0xF800003D,0xEFFC003D,0xFFE40014,0xFFDC0000,0xF7F8003D,0xFFB80000,0xF800003D,0xF7F8003D,0xFFB80000,0xF800003D,0xF800003D,0xF7F8003D, -0xFFB80000,0xF800003D,0xF800003D,0xF800003D,0xFBF00034,0xA7FC003D,0xF3F4003D,0xFFEC0029,0xFFE80020,0xFFD40011,0xFFC80000,0xFFA80000,0xFFF00032,0xFFEC0032,0xFFD00001,0xF800003D,0xF5FC003D,0x19801A5,0x19801A5,0x19801A5,0x19801A5,0x19801A5,0x19801A5,0x19801A5,0x19801A5,0x19801A5,0x19801A5,0xFF800022,0xFF800022,0xFF800022,0xFF800022,0xFF800022, -0xFF800022,0xE77C0001,0xE77C0001,0xE77C0001,0xD9780002,0x63FC01A5,0x63FC01A5,0x63FC01A5,0x63FC01A5,0x63FC01A5,0x63FC01A5,0xF5440000,0xF5440000,0xF5440000,0xD9600001,0xB3F801A5,0xB3F801A5,0xB3F801A5,0xD9080001,0xCA0001A5,0xFF8C0151,0x19801A5,0x19801A5,0xFF9000DD,0xFF8C0091,0xFF880055,0xFF880055,0xFD7C0005,0xFD8C0120,0xFF8000DA,0xF7740001,0xF5440000, -0x9FF801A5,}; -static const uint32_t g_etc1_to_bc7_m6_table125[] = { -0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0xA9FC0000, -0xA9FC0000,0xA9FC0000,0xA9FC0000,0xC4000000,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1880001,0x1A40000,0x1A40000,0x1A40000,0x4FFC0000,0x93FC0000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0x1C40000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000, -0xA9FC0000,0xD5F80000,0xD5F80000,0xD5F80000,0xE0000001,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xA9FC0000,0xD5F80000,0xD5F80000,0xD5F80000,0xE0000001,0xD5F80000,0xD5F80000,0xD5F80000,0xE0000001,0xE0000001,0x9E00000,0x1C40000,0x1C40000,0x35FC0000,0x73FC0000,0x93FC0000,0x93FC0000,0xB9FC0000,0x35FC0000,0x73FC0000,0xC9FC0000,0xD5F80000, -0xC9FC0000,0x1F80000,0x1F80000,0x1F80000,0x1F80000,0xF7FC0000,0xF7FC0000,0xF7FC0000,0xFBFC0000,0xFBFC0000,0xFA000001,0xF7FC0000,0xF7FC0000,0xF7FC0000,0xFBFC0000,0xFBFC0000,0xFA000001,0xFBFC0000,0xFBFC0000,0xFA000001,0xFA000001,0xF7FC0000,0xF7FC0000,0xF7FC0000,0xFBFC0000,0xFBFC0000,0xFA000001,0xFBFC0000,0xFBFC0000,0xFA000001,0xFA000001,0xFBFC0000, -0xFBFC0000,0xFA000001,0xFA000001,0xFA000001,0xEDFC0000,0xD7FC0000,0x1F80000,0xF5FC0000,0xF9FC0000,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xF3FC0000,0xF7FC0000,0xFBFC0000,0xFA000001,0xFBFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1D0088E,0xFFC80749,0xFFC00651,0xFFC005ED,0xFFC405B2,0xFFBC0463,0xFFB403DD,0xFFAC02BE,0xFFAC0215,0xFDA801AA,0xFFBC05EF,0xFFB0042C,0xFFA40383,0xFFA001F3,0xFF9C010A,0xFD94004F,0xFF94028E,0xFF900163,0xFB84003E,0xF58801FF,0xBBFC088E,0xFFAC069F,0xFFA405EA,0xFF9403FA,0xFF8802BD,0xFD7C01AA,0xFF7C0401,0xFF640218,0xFD540005,0xF55C01FE,0xDDFC088E, -0xFF4805EA,0xFCF801AA,0xF4C401FE,0xE800088E,0xFFCC0739,0xFFCC07F2,0xFFCC0846,0xFFC005B1,0xFFB003FD,0xFFA4024E,0xFFA4019A,0xFF940096,0xFFC4071A,0xFFB40585,0xFF94023D,0xFD540005,0xD5FC088E,0x1E801FD,0xFDE401B5,0xFFE0016D,0xFFDC0154,0xFFDC0149,0xFFD400EA,0xFFD000B4,0xFFCC0069,0xFFC80028,0xFDC40005,0xDFFC01FD,0xFFD80188,0xFFCC0154,0xFFB800D2,0xFFB80059, -0xFDA80006,0xEFFC01FD,0xFF9C0152,0xFD540005,0xF40001FE,0xDFFC01FD,0xFFD80188,0xFFCC0154,0xFFB800D2,0xFFB80059,0xFDA80006,0xEFFC01FD,0xFF9C0152,0xFD540005,0xF40001FE,0xEFFC01FD,0xFF9C0152,0xFD540005,0xF40001FE,0xF40001FE,0xFFE401C5,0xFDE801D9,0xFDE801E8,0xFFDC0179,0xFFD40132,0xFFC800CE,0xFFBC007D,0xFFA80049,0xFFE001CA,0xFFDC0172,0xFFB80156,0xFD540005, -0xEBFC01FD,0x1C005ED,0x1C005ED,0x1C005ED,0x1C005ED,0xFFB403DD,0xFFB403DD,0xFFB403DD,0xFFAC0215,0xFFAC0215,0xFBA801A6,0xFFA40383,0xFFA40383,0xFFA40383,0xFF9C010A,0xFF9C010A,0xFD94004B,0xFF900163,0xFF900163,0xF788002A,0xEF880153,0xA5FC05EA,0xA5FC05EA,0xA5FC05EA,0xFF8802BD,0xFF8802BD,0xFB8001A6,0xFF640218,0xFF640218,0xFD540001,0xEF600152,0xD3FC05EA, -0xD3FC05EA,0xFB0001A6,0xEEDC0152,0xE00005EA,0xFFBC04E6,0xF9C00581,0x1C005ED,0xFFB40402,0xFFAC02EA,0xFFA401FD,0xFFA4019A,0xFF940096,0xFFB404BE,0xFFB003CE,0xFF940234,0xFD540001,0xC7FC05EA,0x1DC0154,0x1DC0154,0x1DC0154,0x1DC0154,0xFFD000B4,0xFFD000B4,0xFFD000B4,0xFFC80028,0xFFC80028,0xFBC40001,0xCFFC0152,0xCFFC0152,0xCFFC0152,0xFFB80059,0xFFB80059, -0xFBAC0001,0xE7FC0152,0xE7FC0152,0xFB5C0001,0xEE000152,0xCFFC0152,0xCFFC0152,0xCFFC0152,0xFFB80059,0xFFB80059,0xFBAC0001,0xE7FC0152,0xE7FC0152,0xFB5C0001,0xEE000152,0xE7FC0152,0xE7FC0152,0xFB5C0001,0xEE000152,0xEE000152,0xFFD80122,0xF7DC0139,0x1DC0154,0xFDD80109,0xFFC800DA,0xFFBC00A9,0xFFBC007D,0xFFA80049,0xFFD00132,0xFBD40109,0xE1FC0152,0xFB5C0001, -0xE1FC0152,0x1FC0005,0xFFF80004,0xFFF80001,0xFFF80000,0xFBFC0005,0xFFF80002,0xFFF40000,0xFDF80005,0xFFEC0000,0xFC000005,0xFBFC0005,0xFFF80002,0xFFF40000,0xFDF80005,0xFFEC0000,0xFC000005,0xFDF80005,0xFFEC0000,0xFC000005,0xFC000005,0xFBFC0005,0xFFF80002,0xFFF40000,0xFDF80005,0xFFEC0000,0xFC000005,0xFDF80005,0xFFEC0000,0xFC000005,0xFC000005,0xFDF80005, -0xFFEC0000,0xFC000005,0xFC000005,0xFC000005,0xFFF80004,0xE7FC0005,0xF7FC0005,0xFBFC0005,0xFFF80002,0xFFF00001,0xFFF00000,0xFFE40000,0xF9FC0005,0xFBFC0005,0xFFF00000,0xFC000005,0xFDF80005,0x1A801A5,0x1A801A5,0x1A801A5,0x1A801A5,0x1A801A5,0x1A801A5,0x1A801A5,0x1A801A5,0x1A801A5,0x1A801A5,0xFF900049,0xFF900049,0xFF900049,0xFF900049,0xFF900049, -0xFF900049,0xEF8C0001,0xEF8C0001,0xEF8C0001,0xE1880002,0x7BFC01A5,0x7BFC01A5,0x7BFC01A5,0x7BFC01A5,0x7BFC01A5,0x7BFC01A5,0xFD540000,0xFD540000,0xFD540000,0xE1700001,0xBFF801A5,0xBFF801A5,0xBFF801A5,0xE1180001,0xD20001A5,0xFBA40152,0x1A801A5,0x1A801A5,0xFFA000F2,0xFD9C00B5,0xFF980071,0xFF980071,0xFD900019,0xFD9C0139,0xFF9400E9,0xFF840001,0xFD540000, -0xADFC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table126[] = { -0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0x69FC0000,0xB5FC0000, -0xB5FC0000,0xB5FC0000,0xB5FC0000,0xCC000000,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x1980001,0x3B40000,0x3B40000,0x3B40000,0x69FC0000,0xA1FC0000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0x1D40000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000, -0xC1FC0000,0xE1F80000,0xE1F80000,0xE1F80000,0xE8000001,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xE1F80000,0xE1F80000,0xE1F80000,0xE8000001,0xE1F80000,0xE1F80000,0xE1F80000,0xE8000001,0xE8000001,0x1F40000,0x1D40000,0x1D40000,0x6DFC0000,0x9BFC0000,0xB1FC0000,0xB1FC0000,0xCDFC0000,0x6DFC0000,0x9BFC0000,0xD9FC0000,0xE1F80000, -0xD9FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1DC0693,0xFFD405C6,0xFFD0052E,0xFFD004EE,0xFFD0049F,0xFFC403C6,0xFFC40362,0xFFC00295,0xFFB8021E,0xFFB801A5,0xFFC4048F,0xFFBC037F,0xFFBC0306,0xFFAC01D4,0xFFAC012B,0xFFA4006A,0xFFAC01E3,0xFFA0010E,0xFF980011,0xF798012A,0xC9FC0691,0xFFB80566,0xFFB804ED,0xFFAC0367,0xFFA0029E,0xFF9401A5,0xFF940316,0xFF8801D1,0xFF700008,0xF7700129,0xE5F80691, -0xFF6C04ED,0xFF2001A5,0xF6EC0129,0xEC000691,0xFFD805AB,0xF5D8064D,0xF5D80672,0xFFCC0496,0xFFC00366,0xFFB8022B,0xFFB401AE,0xFFA800C3,0xFFD00576,0xFFC4045F,0xFFB001FA,0xFF700008,0xDFF80691,0x1F00126,0xFFEC0105,0xFFEC00EA,0xFFE800DD,0xFFE800C6,0xFFE000A1,0xFFE00088,0xFFD8004C,0xFFD80028,0xFFD40000,0xEBFC0126,0xFFE400F1,0xFFDC00DD,0xFFD80088,0xFFCC0050, -0xFFC00000,0xF5FC0126,0xFFB800DD,0xFF7C0000,0xF6000129,0xEBFC0126,0xFFE400F1,0xFFDC00DD,0xFFD80088,0xFFCC0050,0xFFC00000,0xF5FC0126,0xFFB800DD,0xFF7C0000,0xF6000129,0xF5FC0126,0xFFB800DD,0xFF7C0000,0xF6000129,0xF6000129,0xFDF0010C,0xFFEC0110,0xFFEC0121,0xFFEC00F3,0xFFDC00C6,0xFFD80092,0xFFD00061,0xFFC00049,0xFFEC010B,0xFFE800E5,0xFFD000DE,0xFF7C0000, -0xF3FC0126,0x1D004EE,0x1D004EE,0x1D004EE,0x1D004EE,0xFFC40362,0xFFC40362,0xFFC40362,0xFFB8021E,0xFFB8021E,0xFFB801A5,0xFFBC0306,0xFFBC0306,0xFFBC0306,0xFFAC012B,0xFFAC012B,0xFFA4006A,0xFFA0010E,0xFFA0010E,0xFD98000E,0xF59800DE,0xB7FC04ED,0xB7FC04ED,0xB7FC04ED,0xFFA0029E,0xFFA0029E,0xFF9401A5,0xFF8801D1,0xFF8801D1,0xFF700008,0xF57400DD,0xDDF404ED, -0xDDF404ED,0xFF2001A5,0xF4F400DD,0xE60004ED,0xFFCC042E,0xFFCC0492,0x1D004EE,0xFFC40382,0xFFC002BD,0xFFB401EE,0xFFB401AE,0xFFA800C3,0xFBC8042D,0xFFBC0366,0xFFA801F5,0xFF700008,0xD3FC04ED,0x1E800DD,0x1E800DD,0x1E800DD,0x1E800DD,0xFFE00088,0xFFE00088,0xFFE00088,0xFFD80028,0xFFD80028,0xFFD40000,0xDFFC00DD,0xDFFC00DD,0xDFFC00DD,0xFFCC0050,0xFFCC0050, -0xFFC00000,0xEFFC00DD,0xEFFC00DD,0xFF7C0000,0xF40000DD,0xDFFC00DD,0xDFFC00DD,0xDFFC00DD,0xFFCC0050,0xFFCC0050,0xFFC00000,0xEFFC00DD,0xEFFC00DD,0xFF7C0000,0xF40000DD,0xEFFC00DD,0xEFFC00DD,0xFF7C0000,0xF40000DD,0xF40000DD,0xF9E800C8,0xFDE800C8,0x1E800DD,0xFFDC00B4,0xFFDC0095,0xFFD80082,0xFFD00061,0xFFC00049,0xF7E800C8,0xFFDC00AA,0xEBFC00DD,0xFF7C0000, -0xEBFC00DD,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1B801A5,0x1B801A5,0x1B801A5,0x1B801A5,0x1B801A5,0x1B801A5,0x1B801A5,0x1B801A5,0x1B801A5,0x1B801A5,0xFFA4006A,0xFFA4006A,0xFFA4006A,0xFFA4006A,0xFFA4006A, -0xFFA4006A,0xF79C0001,0xF79C0001,0xF79C0001,0xE9980002,0x93FC01A5,0x93FC01A5,0x93FC01A5,0x93FC01A5,0x93FC01A5,0x93FC01A5,0xFF700008,0xFF700008,0xFF700008,0xE9800001,0xCBF801A5,0xCBF801A5,0xCBF801A5,0xE9280001,0xDA0001A5,0xF3B4016D,0x1B801A5,0x1B801A5,0xFFAC0115,0xFFA800DA,0xFFAC00A2,0xFFAC00A2,0xFFA0003A,0xF9B00152,0xFBAC0109,0xFF98000D,0xFF700008, -0xBDF801A5,}; -static const uint32_t g_etc1_to_bc7_m6_table127[] = { -0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0xC1FC0000, -0xC1FC0000,0xC1FC0000,0xC1FC0000,0xD4000000,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0xBC40000,0xBC40000,0xBC40000,0x81FC0000,0xB1FC0000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000, -0xD9FC0000,0xEDF80000,0xEDF80000,0xEDF80000,0xF0000001,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xEDF80000,0xEDF80000,0xEDF80000,0xF0000001,0xEDF80000,0xEDF80000,0xEDF80000,0xF0000001,0xF0000001,0x37FC0000,0x1E40000,0x1E40000,0xA7FC0000,0xC1FC0000,0xCFFC0000,0xCFFC0000,0xE1FC0000,0xA7FC0000,0xC1FC0000,0xE7FC0000,0xEDF80000, -0xE7FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1E4049B,0xFFDC041B,0xFFD803C2,0xFFD8039E,0xFFDC0373,0xFFD002EE,0xFFD002AE,0xFFCC0231,0xFFCC01F1,0xFFC801A5,0xFFD0032F,0xFFD0028E,0xFFC40236,0xFFC00181,0xFFBC011B,0xFFBC00A2,0xFFB80127,0xFFAC00AA,0xFFAC0001,0xFBA80072,0xD5FC0499,0xFFCC03E2,0xFFC8039D,0xFFB802AF,0xFFB80236,0xFFAC01A5,0xFFAC0216,0xFFA00149,0xFF880028,0xFB840071,0xEBF80499, -0xFF90039D,0xFF5401A5,0xFB0C0071,0xF0000499,0xFFD8040B,0xF9E00465,0xF9E00482,0xFFD40343,0xFFCC0297,0xFFC401D1,0xFFBC0173,0xFFB800C2,0xFFD803EE,0xFFD0031A,0xFFB8015E,0xFF880028,0xE5FC0499,0x1F40072,0xFFF40062,0xFFF00059,0xFFF00055,0xFFF40052,0xFFEC003D,0xFFEC0034,0xFFE40020,0xFFE40010,0xFFE40000,0xF5FC0071,0xFFF0005D,0xFFE80055,0xFFE40030,0xFFE40020, -0xFFD80000,0xF9FC0071,0xFFD40055,0xFFAC0000,0xFA000071,0xF5FC0071,0xFFF0005D,0xFFE80055,0xFFE40030,0xFFE40020,0xFFD80000,0xF9FC0071,0xFFD40055,0xFFAC0000,0xFA000071,0xF9FC0071,0xFFD40055,0xFFAC0000,0xFA000071,0xFA000071,0xFFF40060,0xF3F40072,0xF3F40072,0xFFF0005A,0xFFF0004A,0xFFE40036,0xFFE40022,0xFFD8001D,0xFDF40060,0xFFF0005A,0xFFE00056,0xFFAC0000, -0xF9FC0071,0x1D8039E,0x1D8039E,0x1D8039E,0x1D8039E,0xFFD002AE,0xFFD002AE,0xFFD002AE,0xFFCC01F1,0xFFCC01F1,0xFFC801A5,0xFFC40236,0xFFC40236,0xFFC40236,0xFFBC011B,0xFFBC011B,0xFFBC00A2,0xFFAC00AA,0xFFAC00AA,0xFFAC0001,0xF9A80056,0xC9FC039D,0xC9FC039D,0xC9FC039D,0xFFB80236,0xFFB80236,0xFFAC01A5,0xFFA00149,0xFFA00149,0xFF880028,0xF9880055,0xE5F8039D, -0xE5F8039D,0xFF5401A5,0xF9140055,0xEC00039D,0xFFD8032A,0xF5D80379,0x1D8039E,0xFFCC02A5,0xFFC4022E,0xFFBC01AB,0xFFBC0173,0xFFB800C2,0xFFD00305,0xFFD0028A,0xFFB8015A,0xFF880028,0xDFF8039D,0x1F00055,0x1F00055,0x1F00055,0x1F00055,0xFFEC0034,0xFFEC0034,0xFFEC0034,0xFFE40010,0xFFE40010,0xFFE40000,0xEBFC0055,0xEBFC0055,0xEBFC0055,0xFFE40020,0xFFE40020, -0xFFD80000,0xF5FC0055,0xF5FC0055,0xFFAC0000,0xF8000055,0xEBFC0055,0xEBFC0055,0xEBFC0055,0xFFE40020,0xFFE40020,0xFFD80000,0xF5FC0055,0xF5FC0055,0xFFAC0000,0xF8000055,0xF5FC0055,0xF5FC0055,0xFFAC0000,0xF8000055,0xF8000055,0xFDF00048,0xFFEC0050,0x1F00055,0xFBF00048,0xFDEC003D,0xFFE4002D,0xFFE40022,0xFFD8001D,0xFBF00048,0xFFE80041,0xF3FC0055,0xFFAC0000, -0xF3FC0055,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1C801A5,0x1C801A5,0x1C801A5,0x1C801A5,0x1C801A5,0x1C801A5,0x1C801A5,0x1C801A5,0x1C801A5,0x1C801A5,0xFFBC00A2,0xFFBC00A2,0xFFBC00A2,0xFFBC00A2,0xFFBC00A2, -0xFFBC00A2,0xFFAC0001,0xFFAC0001,0xFFAC0001,0xF1A80002,0xABFC01A5,0xABFC01A5,0xABFC01A5,0xABFC01A5,0xABFC01A5,0xABFC01A5,0xFF880028,0xFF880028,0xFF880028,0xF1900001,0xD7F801A5,0xD7F801A5,0xD7F801A5,0xF1380001,0xE20001A5,0xFBC4016D,0x1C801A5,0x1C801A5,0xFFBC0132,0xFFBC00FA,0xFFBC00CA,0xFFBC00CA,0xFFB4006A,0xFFBC015A,0xFFBC0122,0xFFB00032,0xFF880028, -0xCBFC01A5,}; -static const uint32_t g_etc1_to_bc7_m6_table128[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x240000,0x240000,0x240000,0x240000,0x340000,0x340000,0x340000,0x680000,0x680000,0x10000001,0x340000,0x340000,0x340000,0x680000,0x680000,0x10000001,0x680000,0x680000,0x10000001,0x10000001,0x340000,0x340000,0x340000,0x680000,0x680000,0x10000001,0x680000,0x680000,0x10000001,0x10000001,0x680000, -0x680000,0x10000001,0x10000001,0x10000001,0x2280000,0xA240000,0x240000,0x300000,0x3C0000,0x4C0000,0x540000,0x800000,0x2C0000,0x340000,0x4C0000,0x10000001,0x4C0000,0x780000,0x2B00000,0x1680000,0x3A000001,0x2B00000,0x1680000,0x3A000001,0x1680000,0x3A000001,0x3A000001,0x2B00000,0x1680000,0x3A000001,0x1680000,0x3A000001, -0x3A000001,0x1680000,0x3A000001,0x3A000001,0x3A000001,0x2B00000,0x1680000,0x3A000001,0x1680000,0x3A000001,0x3A000001,0x1680000,0x3A000001,0x3A000001,0x3A000001,0x1680000,0x3A000001,0x3A000001,0x3A000001,0x3A000001,0x2940000,0x800000,0x800000,0xC80000,0x1240000,0xBF40000,0x3A000001,0x3A000001,0xA40000,0xE00000,0x2FF00000,0x3A000001, -0xFC0000,0x280964,0xAA0C00D8,0x560C00D8,0x3A0C00D9,0x76000372,0x52000061,0x3A000002,0x38000374,0x32000151,0x26000372,0x4E000768,0x460002BA,0x34000153,0x340004A5,0x2E00024E,0x2400040A,0x26000768,0x2600049D,0x2000059E,0x1800076B,0x380964,0x3A000463,0x2E000274,0x2E000596,0x2800031F,0x22000484,0x220007ED,0x24000543,0x1C000607,0x180007AB,0x700964, -0x1C000694,0x1C00070C,0x16000864,0x12000964,0xE4000231,0xF41805B2,0xF820054D,0x74000239,0x4A000252,0x3C00023D,0x3000019B,0x2A000306,0x90000413,0x5E000317,0x2E0003AB,0x1C000607,0x500964,0x340768,0xA01400A4,0x541000A4,0x3A1000A5,0x76000372,0x52000061,0x3A000002,0x38000374,0x32000151,0x26000372,0x4C0768,0x460002BA,0x34000153,0x340004A5,0x2E00024E, -0x2400040A,0x980768,0x2600049D,0x2000059E,0x1800076B,0x4C0768,0x460002BA,0x34000153,0x340004A5,0x2E00024E,0x2400040A,0x980768,0x2600049D,0x2000059E,0x1800076B,0x980768,0x2600049D,0x2000059E,0x1800076B,0x1800076B,0xE4000231,0xF82004F2,0xFC280405,0x74000239,0x4A000252,0x3C00023D,0x3000019B,0x2A000306,0x900003C2,0x5E0002F3,0x2E0003A2,0x2000059E, -0x6C0768,0xC00D8,0xC00D8,0xC00D8,0xC00D8,0x36000000,0x36000000,0x36000000,0x1A000000,0x1A000000,0x10000001,0x1A0000A2,0x1A0000A2,0x1A0000A2,0x1200003D,0x1200003D,0xE000022,0xC0000A2,0xC0000A2,0xA000062,0x80000A2,0x1000D8,0x1000D8,0x1000D8,0x12000061,0x12000061,0xC000039,0xC0000B2,0xC0000B2,0xA000072,0x80000AB,0x2000D8, -0x2000D8,0xA000093,0x60000C3,0x40000DB,0x76000032,0xF8000004,0xC00D8,0x32000041,0x1E00003D,0x1A000041,0x16000034,0x12000050,0x3400006B,0x2600005D,0x100000A3,0xA000072,0x1800D8,0x1000A4,0x1000A4,0x1000A4,0x1000A4,0x36000000,0x36000000,0x36000000,0x1A000000,0x1A000000,0x10000001,0x21800A2,0x21800A2,0x21800A2,0x1200003D,0x1200003D, -0xE000022,0x3000A2,0x3000A2,0xA000062,0x80000A2,0x21800A2,0x21800A2,0x21800A2,0x1200003D,0x1200003D,0xE000022,0x3000A2,0x3000A2,0xA000062,0x80000A2,0x3000A2,0x3000A2,0xA000062,0x80000A2,0x80000A2,0x76000032,0xF8000004,0x1000A4,0x32000041,0x1E00003D,0x1A000041,0x16000034,0x12000050,0x42000061,0x26000059,0x2400A2,0xA000062, -0x2400A2,0x4C0374,0x90240000,0x50240000,0x3A240001,0x740372,0x52000061,0x3A000002,0xE80372,0x32000151,0x26000372,0x740372,0x52000061,0x3A000002,0xE80372,0x32000151,0x26000372,0xE80372,0x32000151,0x26000372,0x26000372,0x740372,0x52000061,0x3A000002,0xE80372,0x32000151,0x26000372,0xE80372,0x32000151,0x26000372,0x26000372,0xE80372, -0x32000151,0x26000372,0x26000372,0x26000372,0xF4000164,0x540372,0xF8400188,0x7C000132,0x56000161,0x3C00013D,0x340000DA,0x320001BA,0xB20001D4,0x68000179,0x360000B9,0x26000372,0xA40372,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table129[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x340000,0x340000,0x340000,0x340000,0x4C0000,0x4C0000,0x4C0000,0x980000,0x980000,0x18000001,0x4C0000,0x4C0000,0x4C0000,0x980000,0x980000,0x18000001,0x980000,0x980000,0x18000001,0x18000001,0x4C0000,0x4C0000,0x4C0000,0x980000,0x980000,0x18000001,0x980000,0x980000,0x18000001,0x18000001,0x980000, -0x980000,0x18000001,0x18000001,0x18000001,0x3C0000,0x380000,0x340000,0x2440000,0x2540000,0x6C0000,0x7C0000,0xBC0000,0x400000,0x4C0000,0x6C0000,0x18000001,0x6C0000,0x880000,0xC80000,0x1980000,0x42000001,0xC80000,0x1980000,0x42000001,0x1980000,0x42000001,0x42000001,0xC80000,0x1980000,0x42000001,0x1980000,0x42000001, -0x42000001,0x1980000,0x42000001,0x42000001,0x42000001,0xC80000,0x1980000,0x42000001,0x1980000,0x42000001,0x42000001,0x1980000,0x42000001,0x42000001,0x42000001,0x1980000,0x42000001,0x42000001,0x42000001,0x42000001,0x2A80000,0x900000,0x900000,0xE40000,0x14C0000,0x15F40000,0x42000001,0x42000001,0xB80000,0x1000000,0x39C40000,0x42000001, -0x1200000,0x300C14,0xBE1001C4,0x601001C4,0x421001C5,0x8E000372,0x62000025,0x4400000A,0x44000374,0x38000109,0x2E000372,0x5C000933,0x5200036A,0x400001DB,0x3A00053D,0x34000266,0x2E000453,0x2E000934,0x2C0005A5,0x26000696,0x1E000933,0x440C14,0x460005BB,0x3A000354,0x340006A6,0x34000387,0x2800050C,0x280009FD,0x2C000686,0x2400073B,0x1E000997,0x880C14, -0x26000891,0x200008C2,0x1C000AAC,0x16000C14,0xFE000264,0xF8200772,0xFC280785,0x8800025D,0x5A000281,0x44000266,0x3C000195,0x36000351,0xB20004DE,0x68000393,0x3600045B,0x2400073B,0x600C14,0x400934,0xB21C0154,0x5E180154,0x42180155,0x8E000372,0x62000025,0x44040009,0x44000374,0x38000109,0x2E000372,0x5C0933,0x5200036A,0x400001DB,0x3A00053D,0x34000266, -0x2E000453,0xB80933,0x2C0005A5,0x26000696,0x1E000933,0x5C0933,0x5200036A,0x400001DB,0x3A00053D,0x34000266,0x2E000453,0xB80933,0x2C0005A5,0x26000696,0x1E000933,0xB80933,0x2C0005A5,0x26000696,0x1E000933,0x1E000933,0xFE000264,0xFE2C061E,0xF23405B4,0x8800025D,0x5A000281,0x44000266,0x3C000195,0x36000351,0xB2000465,0x68000362,0x36000452,0x26000696, -0x800933,0x1001C4,0x1001C4,0x1001C4,0x1001C4,0x4E000000,0x4E000000,0x4E000000,0x26000000,0x26000000,0x18000001,0x26000152,0x26000152,0x26000152,0x1E00007D,0x1E00007D,0x18000041,0x12000152,0x12000152,0x100000CA,0xC000152,0x21801C3,0x21801C3,0x21801C3,0x180000D1,0x180000D1,0x12000079,0x12000176,0x12000176,0x100000EE,0xC000162,0x3001C3, -0x3001C3,0xE000141,0xA00018B,0x80001C3,0x9200006A,0xFC080044,0x1001C4,0x50000089,0x3200007D,0x2600007D,0x2200006A,0x1A00009D,0x560000E9,0x320000BE,0x16000155,0x100000EE,0x2401C3,0x180154,0x180154,0x180154,0x180154,0x4E000000,0x4E000000,0x4E000000,0x26000000,0x26000000,0x18000001,0x2240152,0x2240152,0x2240152,0x1E00007D,0x1E00007D, -0x18000041,0x4C0152,0x4C0152,0x100000CA,0xC000152,0x2240152,0x2240152,0x2240152,0x1E00007D,0x1E00007D,0x18000041,0x4C0152,0x4C0152,0x100000CA,0xC000152,0x4C0152,0x4C0152,0x100000CA,0xC000152,0xC000152,0x9200006A,0xFC080034,0x180154,0x50000089,0x3200007D,0x2600007D,0x2200006A,0x1A00009D,0x560000D0,0x3C0000B4,0x340152,0x100000CA, -0x340152,0x5C0374,0x98340000,0x58340000,0x42340001,0x8C0372,0x62000025,0x420C0001,0x1180372,0x38000109,0x2E000372,0x8C0372,0x62000025,0x420C0001,0x1180372,0x38000109,0x2E000372,0x1180372,0x38000109,0x2E000372,0x2E000372,0x8C0372,0x62000025,0x420C0001,0x1180372,0x38000109,0x2E000372,0x1180372,0x38000109,0x2E000372,0x2E000372,0x1180372, -0x38000109,0x2E000372,0x2E000372,0x2E000372,0xFC080152,0x640372,0xFE4C0190,0x920000E9,0x6000010D,0x4A000104,0x3E000092,0x3600016D,0xD0000190,0x7A000128,0x4200007D,0x2E000372,0xC80372,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table130[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x440000,0x440000,0x440000,0x440000,0x640000,0x640000,0x640000,0xCC0000,0xCC0000,0x20000001,0x640000,0x640000,0x640000,0xCC0000,0xCC0000,0x20000001,0xCC0000,0xCC0000,0x20000001,0x20000001,0x640000,0x640000,0x640000,0xCC0000,0xCC0000,0x20000001,0xCC0000,0xCC0000,0x20000001,0x20000001,0xCC0000, -0xCC0000,0x20000001,0x20000001,0x20000001,0x64C0000,0x480000,0x440000,0x5C0000,0x700000,0x900000,0xA40000,0xF80000,0x540000,0x640000,0x900000,0x20000001,0x900000,0x980000,0xE00000,0x1CC0000,0x4A000001,0xE00000,0x1CC0000,0x4A000001,0x1CC0000,0x4A000001,0x4A000001,0xE00000,0x1CC0000,0x4A000001,0x1CC0000,0x4A000001, -0x4A000001,0x1CC0000,0x4A000001,0x4A000001,0x4A000001,0xE00000,0x1CC0000,0x4A000001,0x1CC0000,0x4A000001,0x4A000001,0x1CC0000,0x4A000001,0x4A000001,0x4A000001,0x1CC0000,0x4A000001,0x4A000001,0x4A000001,0x4A000001,0x2BC0000,0x8A00000,0x8A00000,0x1000000,0x1740000,0x1FF80000,0x4A000001,0x4A000001,0x2CC0000,0x11C0000,0x41D40000,0x4A000001, -0x1400000,0x380F44,0xCE180304,0x6A180304,0x4A180305,0xA6000372,0x6E000005,0x4C04003A,0x50000374,0x440000C1,0x36000372,0x70000B53,0x62000455,0x460002A3,0x460005E5,0x40000296,0x3400049B,0x36000B53,0x320006F5,0x2C0007BE,0x24000B53,0x500F44,0x4C000773,0x40000494,0x400007D6,0x3A000417,0x2E0005B4,0x2E000C75,0x32000816,0x2A00089F,0x22000BD4,0xA00F44, -0x2C000AD9,0x26000ABA,0x20000D5F,0x1A000F44,0xFE000344,0xFC2809B2,0xFE2C0A49,0x9E00029D,0x6C0002C1,0x4C0002A2,0x440001A6,0x3A0003B2,0xD00005C2,0x7C000403,0x3E000573,0x2A00089F,0x700F44,0x480B54,0xC2240244,0x68200244,0x4A200245,0xA6000372,0x6E000005,0x4E080032,0x50000374,0x440000C1,0x36000372,0x6C0B53,0x62000455,0x460002A3,0x460005E5,0x40000296, -0x3400049B,0xDC0B53,0x320006F5,0x2C0007BE,0x24000B53,0x6C0B53,0x62000455,0x460002A3,0x460005E5,0x40000296,0x3400049B,0xDC0B53,0x320006F5,0x2C0007BE,0x24000B53,0xDC0B53,0x320006F5,0x2C0007BE,0x24000B53,0x24000B53,0xFE000344,0xF43807D4,0xF8400788,0x9E00029D,0x6C0002C1,0x4C0002A2,0x440001A6,0x3A0003B2,0xD0000519,0x7C0003C3,0x3E000563,0x2C0007BE, -0x9C0B53,0x180304,0x180304,0x180304,0x180304,0x66000000,0x66000000,0x66000000,0x32000000,0x32000000,0x20000001,0x32000242,0x32000242,0x32000242,0x280000CD,0x280000CD,0x1E00006D,0x18000242,0x18000242,0x1600015A,0x10000242,0x200303,0x200303,0x200303,0x22000156,0x22000156,0x180000D1,0x18000282,0x18000282,0x1600019A,0xE000263,0x3C0303, -0x3C0303,0x10000213,0xE0002AE,0xA000303,0xC40000B4,0xFE0C00D8,0x180304,0x640000F5,0x460000DD,0x2E0000E1,0x2E0000B4,0x22000104,0x64000191,0x5000014A,0x1E000248,0x1600019A,0x2C0303,0x200244,0x200244,0x200244,0x200244,0x66000000,0x66000000,0x66000000,0x32000000,0x32000000,0x20000001,0x2300242,0x2300242,0x2300242,0x280000CD,0x280000CD, -0x1E00006D,0x640242,0x640242,0x1600015A,0x10000242,0x2300242,0x2300242,0x2300242,0x280000CD,0x280000CD,0x1E00006D,0x640242,0x640242,0x1600015A,0x10000242,0x640242,0x640242,0x1600015A,0x10000242,0x10000242,0xC40000B4,0xFE0C00B4,0x200244,0x640000F5,0x460000DD,0x2E0000E1,0x2E0000B4,0x22000104,0x74000164,0x50000131,0x480242,0x1600015A, -0x480242,0x6C0374,0xA0440000,0x60440000,0x4A440001,0xA40372,0x6E000005,0x4A1C0001,0x14C0372,0x440000C1,0x36000372,0xA40372,0x6E000005,0x4A1C0001,0x14C0372,0x440000C1,0x36000372,0x14C0372,0x440000C1,0x36000372,0x36000372,0xA40372,0x6E000005,0x4A1C0001,0x14C0372,0x440000C1,0x36000372,0x14C0372,0x440000C1,0x36000372,0x36000372,0x14C0372, -0x440000C1,0x36000372,0x36000372,0x36000372,0xFE140164,0x2740372,0xF86001A5,0xA60000A4,0x6C0000DD,0x540000B9,0x46000059,0x42000132,0xF200015A,0x8C0000F4,0x4C000041,0x36000372,0xE80372,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table131[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0x100000,0x100000,0x100000,0x100000,0x100000, -0x100000,0x200000,0x200000,0x200000,0x4000001,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x200000,0x200000,0x200000,0x4000001,0x200000,0x200000,0x200000,0x4000001,0x4000001,0xC0000,0xC0000,0xC0000,0x20C0000,0x40C0000,0x100000,0x100000,0x140000,0x20C0000,0x40C0000,0x180000,0x200000, -0x180000,0x540000,0x540000,0x540000,0x540000,0x7C0000,0x7C0000,0x7C0000,0xFC0000,0xFC0000,0x28000001,0x7C0000,0x7C0000,0x7C0000,0xFC0000,0xFC0000,0x28000001,0xFC0000,0xFC0000,0x28000001,0x28000001,0x7C0000,0x7C0000,0x7C0000,0xFC0000,0xFC0000,0x28000001,0xFC0000,0xFC0000,0x28000001,0x28000001,0xFC0000, -0xFC0000,0x28000001,0x28000001,0x28000001,0x2600000,0x4580000,0x540000,0x2700000,0x8C0000,0xB00000,0xCC0000,0x1340000,0x680000,0x7C0000,0xB00000,0x28000001,0xB00000,0xA80000,0xF80000,0x1FC0000,0x52000001,0xF80000,0x1FC0000,0x52000001,0x1FC0000,0x52000001,0x52000001,0xF80000,0x1FC0000,0x52000001,0x1FC0000,0x52000001, -0x52000001,0x1FC0000,0x52000001,0x52000001,0x52000001,0xF80000,0x1FC0000,0x52000001,0x1FC0000,0x52000001,0x52000001,0x1FC0000,0x52000001,0x52000001,0x52000001,0x1FC0000,0x52000001,0x52000001,0x52000001,0x52000001,0x4D00000,0xB40000,0xB40000,0x1180000,0x19C0000,0x29F80000,0x52000001,0x52000001,0xE40000,0x13C0000,0x49E40000,0x52000001, -0x1640000,0x401198,0xDE200408,0x74200408,0x52200409,0xB6080386,0x78080015,0x560C007E,0x5C040386,0x4C0400A9,0x3E040386,0x84000BE8,0x680003F9,0x5200029B,0x5200058D,0x460001EE,0x3A00043B,0x40000BE8,0x3E0006C9,0x320007A2,0x2A000BEB,0x601194,0x62000829,0x4C00052C,0x4C00083E,0x460003FF,0x3A0005A4,0x3A000D81,0x38000852,0x320008C3,0x28000CAC,0xC41194, -0x32000C15,0x2C000BBE,0x26000ED7,0x20001194,0xFE0C03FE,0xFE2C0B9E,0xF63C0C94,0xB40001F1,0x76000211,0x5A0001E1,0x4C000105,0x4200030B,0xF200058B,0x90000386,0x46000541,0x320008C3,0x8C1194,0x580BE8,0xCE300288,0x70300288,0x52300289,0xB20C0372,0x760C0005,0x54140042,0x5A0C0372,0x4C0400A5,0x3E0C0372,0x2800BE8,0x680003F9,0x5200029B,0x5200058D,0x460001EE, -0x3A00043B,0x1080BE8,0x3E0006C9,0x320007A2,0x2A000BEB,0x2800BE8,0x680003F9,0x5200029B,0x5200058D,0x460001EE,0x3A00043B,0x1080BE8,0x3E0006C9,0x320007A2,0x2A000BEB,0x1080BE8,0x3E0006C9,0x320007A2,0x2A000BEB,0x2A000BEB,0xFE1403AD,0xFA440844,0xFE4C0810,0xB40001F1,0x76000211,0x5A0001E1,0x4C000105,0x4200030B,0xF20004AA,0x90000335,0x46000531,0x320007A2, -0xB80BE8,0x200408,0x200408,0x200408,0x200408,0x76080014,0x76080014,0x76080014,0x3C040014,0x3C040014,0x28040015,0x48000288,0x48000288,0x48000288,0x3400009D,0x3400009D,0x28000032,0x22000288,0x22000288,0x20000151,0x1600028A,0x300408,0x300408,0x300408,0x2E000196,0x2E000196,0x220000C3,0x1E0002FE,0x1E0002FE,0x1C0001B6,0x160002CA,0x5C0408, -0x5C0408,0x1600029B,0x1400035E,0xE00040B,0xF6000082,0xF2140195,0x200408,0x820000C1,0x5A0000A9,0x440000A9,0x40000082,0x2E0000DA,0x960001AB,0x5A000144,0x2C000291,0x1C0001B6,0x400408,0x300288,0x300288,0x300288,0x300288,0x720C0000,0x720C0000,0x720C0000,0x3A0C0000,0x3A0C0000,0x280C0001,0x2440288,0x2440288,0x2440288,0x3400009D,0x3400009D, -0x28000032,0x8C0288,0x8C0288,0x20000151,0x1600028A,0x2440288,0x2440288,0x2440288,0x3400009D,0x3400009D,0x28000032,0x8C0288,0x8C0288,0x20000151,0x1600028A,0x8C0288,0x8C0288,0x20000151,0x1600028A,0x1600028A,0xF6000082,0xF82000DD,0x300288,0x820000C1,0x5A0000A9,0x440000A9,0x40000082,0x2E0000DA,0x9600015A,0x5A000120,0x640288,0x20000151, -0x640288,0x7C0374,0xA8540000,0x68540000,0x52540001,0xBC0372,0x78080001,0x522C0001,0x17C0372,0x4A000091,0x3E000372,0xBC0372,0x78080001,0x522C0001,0x17C0372,0x4A000091,0x3E000372,0x17C0372,0x4A000091,0x3E000372,0x3E000372,0xBC0372,0x78080001,0x522C0001,0x17C0372,0x4A000091,0x3E000372,0x17C0372,0x4A000091,0x3E000372,0x3E000372,0x17C0372, -0x4A000091,0x3E000372,0x3E000372,0x3E000372,0xFC30016D,0xA840372,0xFE6C01B1,0xBC000071,0x80000095,0x5E000080,0x5000002D,0x4A0000FA,0xF80C0152,0x9E0000B5,0x56000014,0x3E000372,0x10C0372,0x40014,0x40014,0x40014,0x40014,0x40014,0x40014,0x40014,0x40014,0x40014,0x40014,0x10000000,0x10000000,0x10000000,0x10000000,0x10000000, -0x10000000,0x8000000,0x8000000,0x8000000,0x4000001,0x80012,0x80012,0x80012,0x80012,0x80012,0x80012,0x6000005,0x6000005,0x6000005,0x4000005,0xC0012,0xC0012,0xC0012,0x400000A,0x2000012,0x58000000,0x40014,0x40014,0x28000000,0x1C000000,0x14000000,0x14000000,0xE000000,0x20000005,0x16000002,0xA000001,0x6000005, -0xC0012,}; -static const uint32_t g_etc1_to_bc7_m6_table132[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000, -0x2C0000,0x580000,0x580000,0x580000,0xE000000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x580000,0x580000,0x580000,0xE000000,0x580000,0x580000,0x580000,0xE000000,0xE000000,0x200000,0x1C0001,0x1C0001,0x6200000,0x240000,0x280000,0x280000,0x300000,0x6200000,0x240000,0x3C0000,0x580000, -0x3C0000,0x640001,0x640001,0x640001,0x640001,0x980000,0x980000,0x980000,0x1300000,0x1300000,0x32000000,0x980000,0x980000,0x980000,0x1300000,0x1300000,0x32000000,0x1300000,0x1300000,0x32000000,0x32000000,0x980000,0x980000,0x980000,0x1300000,0x1300000,0x32000000,0x1300000,0x1300000,0x32000000,0x32000000,0x1300000, -0x1300000,0x32000000,0x32000000,0x32000000,0x4740000,0x6C0000,0x640001,0x2880000,0xAC0000,0xD80000,0xF80000,0x1780000,0x800000,0x980000,0xD80000,0x32000000,0xD80000,0xB80001,0x1140000,0xFF80000,0x5C000000,0x1140000,0xFF80000,0x5C000000,0xFF80000,0x5C000000,0x5C000000,0x1140000,0xFF80000,0x5C000000,0xFF80000,0x5C000000, -0x5C000000,0xFF80000,0x5C000000,0x5C000000,0x5C000000,0x1140000,0xFF80000,0x5C000000,0xFF80000,0x5C000000,0x5C000000,0xFF80000,0x5C000000,0x5C000000,0x5C000000,0xFF80000,0x5C000000,0x5C000000,0x5C000000,0x5C000000,0xE80000,0xAC40000,0xAC40000,0x1380000,0x1C80000,0x35F00000,0x5C000000,0x5C000000,0xFC0000,0x15C0000,0x53D80000,0x5C000000, -0x18C0000,0x501423,0xEC2C055E,0x7E2C055E,0x5C2C055E,0xCA1003E3,0x86100072,0x6014011D,0x661003E3,0x560C00EA,0x461003E5,0xA0000BE8,0x7A00034E,0x5C04028E,0x620004C5,0x5200010D,0x460003B4,0x4E000BE8,0x4A00060E,0x3E0006ED,0x34000BE8,0x2741423,0x6E0008EE,0x58000601,0x5800089F,0x4C0003D6,0x400005B1,0x46000E44,0x4400082B,0x3C000898,0x34000D09,0xF01423, -0x38000D6E,0x32000CD1,0x2C00102C,0x26001425,0xFE200511,0xF8400DDB,0xFC480EE3,0xD000011D,0x8A000126,0x6A00010D,0x5A00005E,0x50000214,0xFE00052B,0xA80002B6,0x5400046A,0x3C000898,0xA81423,0x680BEB,0xD444028A,0x7A40028A,0x5C40028A,0xBA200373,0x821C0002,0x5E280041,0x62200373,0x561800A6,0x461C0375,0x9C0BE8,0x7A00034E,0x5C080288,0x620004C5,0x5200010D, -0x460003B4,0x13C0BE8,0x4A00060E,0x3E0006ED,0x34000BE8,0x9C0BE8,0x7A00034E,0x5C080288,0x620004C5,0x5200010D,0x460003B4,0x13C0BE8,0x4A00060E,0x3E0006ED,0x34000BE8,0x13C0BE8,0x4A00060E,0x3E0006ED,0x34000BE8,0x34000BE8,0xFA2C0402,0xF458087D,0xF860084A,0xD000011D,0x8A000126,0x6A00010D,0x5A00005E,0x50000214,0xFC040428,0xA800023D,0x54000451,0x3E0006ED, -0xE00BE8,0x2C055E,0x2C055E,0x2C055E,0x2C055E,0x8A100071,0x8A100071,0x8A100071,0x46100072,0x46100072,0x32100071,0x64000288,0x64000288,0x64000288,0x46000049,0x46000049,0x32000004,0x30000288,0x30000288,0x2A000104,0x20000288,0x40055E,0x40055E,0x40055E,0x3A0001F2,0x3A0001F2,0x2E0000F1,0x28000363,0x28000363,0x260001B2,0x1E0002F4,0x80055E, -0x80055E,0x20000359,0x1C000411,0x14000561,0xFC0C00AC,0xF8200291,0x2C055E,0xAC00006A,0x72000050,0x56000059,0x4C000032,0x40000082,0xC2000199,0x84000101,0x3E000298,0x260001B2,0x5C055E,0x40028A,0x40028A,0x40028A,0x40028A,0x7A200001,0x7A200001,0x7A200001,0x441C0001,0x441C0001,0x321C0001,0x600288,0x600288,0x600288,0x46000049,0x46000049, -0x32000004,0xC40288,0xC40288,0x2A000104,0x20000288,0x600288,0x600288,0x600288,0x46000049,0x46000049,0x32000004,0xC40288,0xC40288,0x2A000104,0x20000288,0xC40288,0xC40288,0x2A000104,0x20000288,0x20000288,0xFE100080,0xF23400F2,0x40028A,0xAC00006A,0x72000050,0x56000059,0x4C000032,0x40000082,0xC2000109,0x840000C1,0x8C0288,0x2A000104, -0x8C0288,0x900372,0xB0680001,0x70680001,0x5C640001,0xD40372,0x82180001,0x5C3C0000,0x1B00372,0x56000055,0x46000374,0xD40372,0x82180001,0x5C3C0000,0x1B00372,0x56000055,0x46000374,0x1B00372,0x56000055,0x46000374,0x46000374,0xD40372,0x82180001,0x5C3C0000,0x1B00372,0x56000055,0x46000374,0x1B00372,0x56000055,0x46000374,0x46000374,0x1B00372, -0x56000055,0x46000374,0x46000374,0x46000374,0xFE440188,0x4980372,0xFA8401C2,0xDA000041,0x9600006D,0x6C000055,0x5A00000D,0x540000B9,0xFE180164,0xBA00007D,0x62000004,0x46000374,0x1300372,0x100071,0x100071,0x100071,0x100071,0x100071,0x100071,0x100071,0x100071,0x100071,0x100071,0x2C000000,0x2C000000,0x2C000000,0x2C000000,0x2C000000, -0x2C000000,0x16000000,0x16000000,0x16000000,0xE000000,0x140071,0x140071,0x140071,0x140071,0x140071,0x140071,0x12000028,0x12000028,0x12000028,0xC000014,0x240071,0x240071,0x240071,0xA000041,0x6000071,0xE8000000,0x100071,0x100071,0x68000000,0x48000000,0x36000000,0x36000000,0x24000000,0x52000022,0x42000011,0x1C000004,0x12000028, -0x1C0071,}; -static const uint32_t g_etc1_to_bc7_m6_table133[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x440000,0x440000,0x440000,0x440000,0x440000, -0x440000,0x880000,0x880000,0x880000,0x16000000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x880000,0x880000,0x880000,0x16000000,0x880000,0x880000,0x880000,0x16000000,0x16000000,0x300000,0x2C0001,0x2C0001,0x2340000,0x380000,0x23C0000,0x23C0000,0x4C0000,0x2340000,0x380000,0x600000,0x880000, -0x600000,0x740001,0x740001,0x740001,0x740001,0xB00000,0xB00000,0xB00000,0x1640000,0x1640000,0x3A000000,0xB00000,0xB00000,0xB00000,0x1640000,0x1640000,0x3A000000,0x1640000,0x1640000,0x3A000000,0x3A000000,0xB00000,0xB00000,0xB00000,0x1640000,0x1640000,0x3A000000,0x1640000,0x1640000,0x3A000000,0x3A000000,0x1640000, -0x1640000,0x3A000000,0x3A000000,0x3A000000,0x880000,0x67C0000,0x740001,0xA00000,0xC40000,0xF80000,0x1200000,0x1B40000,0x940000,0xB00000,0xF80000,0x3A000000,0xF80000,0xC80001,0x12C0000,0x1BF80000,0x64000000,0x12C0000,0x1BF80000,0x64000000,0x1BF80000,0x64000000,0x64000000,0x12C0000,0x1BF80000,0x64000000,0x1BF80000,0x64000000, -0x64000000,0x1BF80000,0x64000000,0x64000000,0x64000000,0x12C0000,0x1BF80000,0x64000000,0x1BF80000,0x64000000,0x64000000,0x1BF80000,0x64000000,0x64000000,0x64000000,0x1BF80000,0x64000000,0x64000000,0x64000000,0x64000000,0xFC0000,0xD80000,0xD80000,0x1540000,0x1EC0000,0x3FF00000,0x64000000,0x64000000,0x3100000,0x17C0000,0x5BE80000,0x64000000, -0x1AC0000,0x5C16CF,0xF83806EA,0x883806EA,0x643806EA,0xDA18047B,0x90180112,0x662001F9,0x7018047B,0x6014016E,0x4E18047D,0xB8000BE8,0x8C0002DE,0x660C02C2,0x6E00044D,0x5E00007D,0x4E00037D,0x5A000BE8,0x5000056A,0x44000675,0x3C000BE8,0x8816CF,0x7A000A0E,0x6200072E,0x68000911,0x580003FE,0x4C0005F9,0x52000F0C,0x4C00082B,0x44000886,0x3A000D5D,0x11416CF, -0x44000EFE,0x3E000E01,0x32001198,0x2C0016D1,0xFE280685,0xFE4C1017,0xFE4C119F,0xE6000086,0x9A0000A1,0x78000083,0x6200000D,0x5800015D,0xFE00061B,0xBC00020A,0x5E0003DE,0x44000886,0xC016CF,0x780BEB,0xDC54028A,0x8250028A,0x6450028A,0xC2300373,0x8A2C0002,0x66380041,0x6A300373,0x5E2800A6,0x4E2C0375,0xB40BE8,0x8C0002DE,0x64180288,0x6E00044D,0x5E00007D, -0x4E00037D,0x1700BE8,0x5000056A,0x44000675,0x3C000BE8,0xB40BE8,0x8C0002DE,0x64180288,0x6E00044D,0x5E00007D,0x4E00037D,0x1700BE8,0x5000056A,0x44000675,0x3C000BE8,0x1700BE8,0x5000056A,0x44000675,0x3C000BE8,0x3C000BE8,0xFE340432,0xFC68087D,0xFE6C085E,0xE6000086,0x9A0000A1,0x78000083,0x6200000D,0x5800015D,0xFE180455,0xBC00017A,0x5E0003BA,0x44000675, -0x1000BE8,0x3806EA,0x3806EA,0x3806EA,0x3806EA,0x9A180109,0x9A180109,0x9A180109,0x5018010A,0x5018010A,0x3A180109,0x7C000288,0x7C000288,0x7C000288,0x52000019,0x52000019,0x3A040008,0x3C000288,0x3C000288,0x320000B9,0x28000288,0x5006E9,0x5006E9,0x5006E9,0x4600028A,0x4600028A,0x34000169,0x340003DB,0x340003DB,0x2E0001C2,0x28000331,0xA006E9, -0xA006E9,0x2600043D,0x200004EC,0x1A0006E9,0xFE100140,0xFE2C03C9,0x3806EA,0xD000002D,0x86000020,0x6A00001D,0x62000009,0x4A000041,0xE40001A5,0x960000E2,0x4C0002A1,0x2E0001C2,0x7006E9,0x50028A,0x50028A,0x50028A,0x50028A,0x82300001,0x82300001,0x82300001,0x4C2C0001,0x4C2C0001,0x3A2C0001,0x780288,0x780288,0x780288,0x52000019,0x52000019, -0x3A0C0000,0xF40288,0xF40288,0x320000B9,0x28000288,0x780288,0x780288,0x780288,0x52000019,0x52000019,0x3A0C0000,0xF40288,0xF40288,0x320000B9,0x28000288,0xF40288,0xF40288,0x320000B9,0x28000288,0x28000288,0xFE200091,0xFA4400F2,0x50028A,0xD000002D,0x86000020,0x6A00001D,0x62000009,0x4A000041,0xF40000CA,0x9A000080,0xAC0288,0x320000B9, -0xAC0288,0xA00372,0xB8780001,0x78780001,0x64740001,0xEC0372,0x8A280001,0x644C0000,0x1E40372,0x60000034,0x4E000374,0xEC0372,0x8A280001,0x644C0000,0x1E40372,0x60000034,0x4E000374,0x1E40372,0x60000034,0x4E000374,0x4E000374,0xEC0372,0x8A280001,0x644C0000,0x1E40372,0x60000034,0x4E000374,0x1E40372,0x60000034,0x4E000374,0x4E000374,0x1E40372, -0x60000034,0x4E000374,0x4E000374,0x4E000374,0xFE50019A,0xCA80372,0xF29401E1,0xEE000020,0xA000003D,0x7A00002D,0x64000001,0x5C000091,0xFE2C0185,0xCC000059,0x6A080000,0x4E000374,0x1540372,0x180109,0x180109,0x180109,0x180109,0x180109,0x180109,0x180109,0x180109,0x180109,0x180109,0x44000000,0x44000000,0x44000000,0x44000000,0x44000000, -0x44000000,0x20000001,0x20000001,0x20000001,0x16000000,0x200109,0x200109,0x200109,0x200109,0x200109,0x200109,0x18000064,0x18000064,0x18000064,0x12000034,0x3C0109,0x3C0109,0x3C0109,0x1000009D,0xA000109,0xFC080019,0x180109,0x180109,0xA0000000,0x6E000000,0x54000000,0x54000000,0x38000000,0x84000050,0x64000028,0x26000008,0x18000064, -0x2C0109,}; -static const uint32_t g_etc1_to_bc7_m6_table134[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000, -0x5C0000,0xB80000,0xB80000,0xB80000,0x1E000000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0xB80000,0xB80000,0xB80000,0x1E000000,0xB80000,0xB80000,0xB80000,0x1E000000,0x1E000000,0x8400000,0x3C0001,0x3C0001,0x480000,0x24C0000,0x540000,0x540000,0x680000,0x480000,0x24C0000,0x800000,0xB80000, -0x800000,0x840001,0x840001,0x840001,0x840001,0x2C40000,0x2C40000,0x2C40000,0x1940000,0x1940000,0x42000000,0x2C40000,0x2C40000,0x2C40000,0x1940000,0x1940000,0x42000000,0x1940000,0x1940000,0x42000000,0x42000000,0x2C40000,0x2C40000,0x2C40000,0x1940000,0x1940000,0x42000000,0x1940000,0x1940000,0x42000000,0x42000000,0x1940000, -0x1940000,0x42000000,0x42000000,0x42000000,0x9C0000,0xE8C0000,0x840001,0x2B40000,0xE00000,0x11C0000,0x1480000,0x1F00000,0xA80000,0x2C40000,0x11C0000,0x42000000,0x11C0000,0xD80001,0x1440000,0x27F80000,0x6C000000,0x1440000,0x27F80000,0x6C000000,0x27F80000,0x6C000000,0x6C000000,0x1440000,0x27F80000,0x6C000000,0x27F80000,0x6C000000, -0x6C000000,0x27F80000,0x6C000000,0x6C000000,0x6C000000,0x1440000,0x27F80000,0x6C000000,0x27F80000,0x6C000000,0x6C000000,0x27F80000,0x6C000000,0x6C000000,0x6C000000,0x27F80000,0x6C000000,0x6C000000,0x6C000000,0x6C000000,0x1100000,0xE80000,0xE80000,0x36C0000,0x9FC0000,0x49F00000,0x6C000000,0x6C000000,0x1280000,0x1980000,0x63F80000,0x6C000000, -0x1D00000,0x6819DB,0xFE4408D2,0x924008CA,0x6C4008CA,0xEA200553,0x9A2001FA,0x70280319,0x7A200553,0x681C0235,0x56200555,0xD0000BE8,0x9E00029E,0x6E10032E,0x800003ED,0x68000029,0x56000378,0x66000BE8,0x5C0004DA,0x500005E5,0x44000BE8,0x29819DB,0x86000B8E,0x6A0008DD,0x740009E9,0x62000465,0x5200068D,0x5E000FF4,0x58000853,0x4C00088E,0x40000DD1,0x13819DB, -0x4A0010D2,0x44000F69,0x3E001338,0x320019DD,0xFE340856,0xF65C1329,0xF860148A,0xFE000033,0xAC00003D,0x84000029,0x6C040006,0x640000D5,0xFE1007BB,0xD4000193,0x66000384,0x4C00088E,0xDC19DB,0x880BEB,0xE464028A,0x8A60028A,0x6C60028A,0xCA400373,0x923C0002,0x6E480041,0x72400373,0x663800A6,0x563C0375,0xCC0BE8,0x9E00029E,0x6C280288,0x800003ED,0x68000029, -0x56080374,0x1A00BE8,0x5C0004DA,0x500005E5,0x44000BE8,0xCC0BE8,0x9E00029E,0x6C280288,0x800003ED,0x68000029,0x56080374,0x1A00BE8,0x5C0004DA,0x500005E5,0x44000BE8,0x1A00BE8,0x5C0004DA,0x500005E5,0x44000BE8,0x44000BE8,0xFE50045E,0xF47808BB,0xF880088B,0xFE000033,0xAC00003D,0x84000029,0x6C040002,0x640000D5,0xFE2404A1,0xDA0000E6,0x6A000359,0x500005E5, -0x1240BE8,0x4008CA,0x4008CA,0x4008CA,0x4008CA,0xAA2001E1,0xAA2001E1,0xAA2001E1,0x5A2001E2,0x5A2001E2,0x422001E1,0x94000288,0x94000288,0x94000288,0x62000001,0x62000001,0x42080034,0x48000288,0x48000288,0x3C000088,0x30000288,0x6008C9,0x6008C9,0x6008C9,0x52000362,0x52000362,0x40000221,0x40000473,0x40000473,0x360001F8,0x2E000371,0xC408C9, -0xC408C9,0x2C000569,0x260005F4,0x200008C9,0xFE200221,0xF438058A,0x4008CA,0xF200000D,0x9E000005,0x7C000005,0x70000001,0x56000019,0xFE0001FA,0xB60000CE,0x5C0002AC,0x360001F8,0x8C08C9,0x60028A,0x60028A,0x60028A,0x60028A,0x8A400001,0x8A400001,0x8A400001,0x543C0001,0x543C0001,0x423C0001,0x900288,0x900288,0x900288,0x62000001,0x62000001, -0x421C0000,0x1240288,0x1240288,0x3C000088,0x30000288,0x900288,0x900288,0x900288,0x62000001,0x62000001,0x421C0000,0x1240288,0x1240288,0x3C000088,0x30000288,0x1240288,0x1240288,0x3C000088,0x30000288,0x30000288,0xFA3400A2,0xF2540109,0x60028A,0xF200000D,0x9E000005,0x7C000005,0x6E040000,0x56000019,0xFE0C00C8,0xBC00004A,0xD00288,0x3C000088, -0xD00288,0xB00372,0xC0880001,0x80880001,0x6C840001,0x1040372,0x92380001,0x6C5C0000,0x7FC0372,0x6A00001D,0x56000374,0x1040372,0x92380001,0x6C5C0000,0x7FC0372,0x6A00001D,0x56000374,0x7FC0372,0x6A00001D,0x56000374,0x56000374,0x1040372,0x92380001,0x6C5C0000,0x7FC0372,0x6A00001D,0x56000374,0x7FC0372,0x6A00001D,0x56000374,0x56000374,0x7FC0372, -0x6A00001D,0x56000374,0x56000374,0x56000374,0xF86C01A5,0xBC0372,0xFAA401E1,0xFE040014,0xAC040029,0x84000019,0x6C0C0000,0x64000071,0xFA4C0188,0xDE000034,0x72180000,0x56000374,0x1740372,0x2001E1,0x2001E1,0x2001E1,0x2001E1,0x2001E1,0x2001E1,0x2001E1,0x2001E1,0x2001E1,0x2001E1,0x5C000000,0x5C000000,0x5C000000,0x5C000000,0x5C000000, -0x5C000000,0x2C000001,0x2C000001,0x2C000001,0x1E000000,0x2C01E1,0x2C01E1,0x2C01E1,0x2C01E1,0x2C01E1,0x2C01E1,0x220000AA,0x220000AA,0x220000AA,0x1A000061,0x5801E1,0x5801E1,0x5801E1,0x16000121,0xE0001E1,0xFE0C0075,0x2001E1,0x2001E1,0xD8000000,0x96000000,0x72000000,0x72000000,0x4C000000,0xB6000092,0x82000050,0x3600000D,0x220000AA, -0x3C01E1,}; -static const uint32_t g_etc1_to_bc7_m6_table135[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x740000,0x740000,0x740000,0x740000,0x740000, -0x740000,0xE80000,0xE80000,0xE80000,0x26000000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0xE80000,0xE80000,0xE80000,0x26000000,0xE80000,0xE80000,0xE80000,0x26000000,0x26000000,0x540000,0x4C0001,0x4C0001,0x4580000,0x2600000,0x2680000,0x2680000,0x2800000,0x4580000,0x2600000,0xA40000,0xE80000, -0xA40000,0x940001,0x940001,0x940001,0x940001,0x2DC0000,0x2DC0000,0x2DC0000,0x1C40000,0x1C40000,0x4A000000,0x2DC0000,0x2DC0000,0x2DC0000,0x1C40000,0x1C40000,0x4A000000,0x1C40000,0x1C40000,0x4A000000,0x4A000000,0x2DC0000,0x2DC0000,0x2DC0000,0x1C40000,0x1C40000,0x4A000000,0x1C40000,0x1C40000,0x4A000000,0x4A000000,0x1C40000, -0x1C40000,0x4A000000,0x4A000000,0x4A000000,0x4AC0000,0xA00000,0x940001,0xCC0000,0xFC0000,0x13C0000,0x16C0000,0xBF80000,0xBC0000,0x2DC0000,0x13C0000,0x4A000000,0x13C0000,0xE80001,0x15C0000,0x33F80000,0x74000000,0x15C0000,0x33F80000,0x74000000,0x33F80000,0x74000000,0x74000000,0x15C0000,0x33F80000,0x74000000,0x33F80000,0x74000000, -0x74000000,0x33F80000,0x74000000,0x74000000,0x74000000,0x15C0000,0x33F80000,0x74000000,0x33F80000,0x74000000,0x74000000,0x33F80000,0x74000000,0x74000000,0x74000000,0x33F80000,0x74000000,0x74000000,0x74000000,0x74000000,0x1240000,0x4F80000,0x4F80000,0x1880000,0x17FC0000,0x53F00000,0x74000000,0x74000000,0x33C0000,0x1B80000,0x6DCC0000,0x74000000, -0x1F00000,0x741D47,0xFE500B2E,0x9A4C0AFE,0x744C0AFE,0xFA28066B,0xA62C0322,0x7A340492,0x8428066B,0x6E24033A,0x5E28066D,0xEA000BE8,0xAC000289,0x781803CA,0x8C0003A5,0x72000009,0x60080394,0x72000BE8,0x6600046C,0x5600057D,0x4C000BE8,0xAC1D47,0x92000D6E,0x74000AFE,0x80000B01,0x6800053D,0x5E000775,0x680010D8,0x5E00088F,0x540008B8,0x48000E58,0x15C1D47, -0x5000130E,0x4A001121,0x440014EC,0x38001D49,0xFE3C0AD7,0xFC68162D,0xFE6C17C2,0xFE0C0095,0xC000000D,0x90000005,0x7608002D,0x6C000072,0xFE180A0E,0xE6000156,0x7600031A,0x540008B8,0xF41D47,0x980BEB,0xEC74028A,0x9270028A,0x7470028A,0xD2500373,0x9A4C0002,0x76580041,0x7A500373,0x6E4800A6,0x5E4C0375,0xE40BE8,0xAA040289,0x74380288,0x8C0003A5,0x72000009, -0x5E180374,0x1D00BE8,0x6600046C,0x5600057D,0x4C000BE8,0xE40BE8,0xAA040289,0x74380288,0x8C0003A5,0x72000009,0x5E180374,0x1D00BE8,0x6600046C,0x5600057D,0x4C000BE8,0x1D00BE8,0x6600046C,0x5600057D,0x4C000BE8,0x4C000BE8,0xFC6004C1,0xFC8808BB,0xFE8C08A3,0xFE14004A,0xC000000D,0x90000005,0x74140002,0x6C000072,0xFC4004E8,0xEC00007E,0x760002E9,0x5600057D, -0x1480BE8,0x4C0AFE,0x4C0AFE,0x4C0AFE,0x4C0AFE,0xBA2802F9,0xBA2802F9,0xBA2802F9,0x642802FA,0x642802FA,0x4A2802F9,0xAC000288,0xAC000288,0xAC000288,0x6E040005,0x6E040005,0x4C100084,0x54000288,0x54000288,0x44000055,0x38000288,0x700AFE,0x700AFE,0x700AFE,0x62000471,0x62000471,0x46000321,0x4C00052B,0x4C00052B,0x40000236,0x340003C9,0xE40AFE, -0xE40AFE,0x320006DD,0x2C00072C,0x24000B01,0xFE2C0378,0xFA440776,0x4C0AFE,0xFE080020,0xB8000001,0x8E000002,0x80040011,0x62000005,0xFE00031A,0xD40000D2,0x660002B4,0x40000236,0xA00AFE,0x70028A,0x70028A,0x70028A,0x70028A,0x92500001,0x92500001,0x92500001,0x5C4C0001,0x5C4C0001,0x4A4C0001,0xA80288,0xA80288,0xA80288,0x6A0C0001,0x6A0C0001, -0x4A2C0000,0x1580288,0x1580288,0x44000055,0x38000288,0xA80288,0xA80288,0xA80288,0x6A0C0001,0x6A0C0001,0x4A2C0000,0x1580288,0x1580288,0x44000055,0x38000288,0x1580288,0x1580288,0x44000055,0x38000288,0x38000288,0xF64800B5,0xFA640109,0x70028A,0xFA10000D,0xAE0C0001,0x88080000,0x76140000,0x62000005,0xF62400DD,0xDA000022,0xF00288,0x44000055, -0xF00288,0xC00372,0xC8980001,0x88980001,0x74940001,0x11C0372,0x9A480001,0x746C0000,0x13FC0372,0x72000008,0x5E000374,0x11C0372,0x9A480001,0x746C0000,0x13FC0372,0x72000008,0x5E000374,0x13FC0372,0x72000008,0x5E000374,0x5E000374,0x11C0372,0x9A480001,0x746C0000,0x13FC0372,0x72000008,0x5E000374,0x13FC0372,0x72000008,0x5E000374,0x5E000374,0x13FC0372, -0x72000008,0x5E000374,0x5E000374,0x5E000374,0xFE7801B1,0xCC0372,0xF2B40202,0xFE200020,0xC000000D,0x90000005,0x741C0000,0x70000050,0xFE5C018A,0xF400001D,0x7A280000,0x5E000374,0x1980372,0x2802F9,0x2802F9,0x2802F9,0x2802F9,0x2802F9,0x2802F9,0x2802F9,0x2802F9,0x2802F9,0x2802F9,0x76000000,0x76000000,0x76000000,0x76000000,0x76000000, -0x76000000,0x38000001,0x38000001,0x38000001,0x26000000,0x3802F9,0x3802F9,0x3802F9,0x3802F9,0x3802F9,0x3802F9,0x2E000112,0x2E000112,0x2E000112,0x22000089,0x7002F9,0x7002F9,0x7002F9,0x1C0001CD,0x120002F9,0xF4180120,0x2802F9,0x2802F9,0xFE040005,0xBE000000,0x90000000,0x90000000,0x5E000000,0xF60000F1,0xA400007D,0x46000011,0x2E000112, -0x5002F9,}; -static const uint32_t g_etc1_to_bc7_m6_table136[] = { -0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x200000, -0x200000,0x200000,0x200000,0x4000001,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0x100000,0x180000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000, -0x28C0000,0x1200000,0x1200000,0x1200000,0x2E000001,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x1200000,0x1200000,0x1200000,0x2E000001,0x1200000,0x1200000,0x1200000,0x2E000001,0x2E000001,0xA640000,0x600000,0x600000,0x700000,0x780000,0x2800000,0x2800000,0xA00000,0x700000,0x780000,0xCC0000,0x1200000, -0xCC0000,0xA80000,0xA80000,0xA80000,0xA80000,0xF80000,0xF80000,0xF80000,0x1FC0000,0x1FC0000,0x52000001,0xF80000,0xF80000,0xF80000,0x1FC0000,0x1FC0000,0x52000001,0x1FC0000,0x1FC0000,0x52000001,0x52000001,0xF80000,0xF80000,0xF80000,0x1FC0000,0x1FC0000,0x52000001,0x1FC0000,0x1FC0000,0x52000001,0x52000001,0x1FC0000, -0x1FC0000,0x52000001,0x52000001,0x52000001,0xC40000,0xB40000,0xA80000,0xE40000,0x1180000,0x1640000,0x19C0000,0x17F80000,0x4D00000,0xF80000,0x1640000,0x52000001,0x1640000,0xFC0000,0x3740000,0x3FFC0000,0x7C000001,0x3740000,0x3FFC0000,0x7C000001,0x3FFC0000,0x7C000001,0x7C000001,0x3740000,0x3FFC0000,0x7C000001,0x3FFC0000,0x7C000001, -0x7C000001,0x3FFC0000,0x7C000001,0x7C000001,0x7C000001,0x3740000,0x3FFC0000,0x7C000001,0x3FFC0000,0x7C000001,0x7C000001,0x3FFC0000,0x7C000001,0x7C000001,0x7C000001,0x3FFC0000,0x7C000001,0x7C000001,0x7C000001,0x7C000001,0x13C0000,0x10C0000,0x10C0000,0x1A80000,0x27F80000,0x5DFC0000,0x7C000001,0x7C000001,0x3540000,0x1D80000,0x75FC0000,0x7C000001, -0xDFC0000,0x841EA8,0xFE600C58,0xA45C0BE8,0x7C5C0BE9,0xFC3806F8,0xB23803A1,0x82440531,0x8E3406E6,0x7A3403AE,0x683406E6,0xF80C0BE8,0xB80C028A,0x84240411,0x9608038E,0x7C0C000E,0x6A1403AE,0x7C0C0BE8,0x72000417,0x62040533,0x540C0BEB,0xC41EA8,0xA8000D6B,0x7C100BE9,0x8C000A7E,0x740004CE,0x68000746,0x7400107B,0x6A000782,0x5E0007AB,0x52000DBC,0x18C1EA8, -0x5C001329,0x560010EE,0x4A0014E3,0x40001EAC,0xFE500C45,0xFE6C17DE,0xF67C1978,0xFE1C0128,0xCA0C0011,0x9A0C0006,0x80140046,0x78040046,0xFE340B52,0xFE000098,0x820002B5,0x5E0007AB,0x1181EA8,0xAC0BE8,0xF8840288,0x9A840288,0x7C840289,0xDC600372,0xA0600005,0x7E680042,0x84600372,0x765800A5,0x68600372,0x1000BE8,0xB4140289,0x7E480289,0x9A00037D,0x7C14000A, -0x68280372,0x5F80BE8,0x720003F3,0x60000513,0x54000BEB,0x1000BE8,0xB4140289,0x7E480289,0x9A00037D,0x7C14000A,0x68280372,0x5F80BE8,0x720003F3,0x60000513,0x54000BEB,0x5F80BE8,0x720003F3,0x60000513,0x54000BEB,0x54000BEB,0xFE7804ED,0xF69C08F6,0xFAA408C9,0xFE300081,0xCA10000D,0x98100006,0x7E280001,0x7A000031,0xFC580519,0xFE040033,0x820002B1,0x60000513, -0x16C0BE8,0x5C0BE8,0x5C0BE8,0x5C0BE8,0x5C0BE8,0xC6380374,0xC6380374,0xC6380374,0x6E380374,0x6E380374,0x52340375,0xBC0C0288,0xBC0C0288,0xBC0C0288,0x7A100009,0x7A100009,0x561800A6,0x5E0C0288,0x5E0C0288,0x4E080041,0x400C028A,0x880BE8,0x880BE8,0x880BE8,0x6E000465,0x6E000465,0x52000373,0x580004E1,0x580004E1,0x4C0001AA,0x4000036B,0x1140BE8, -0x1140BE8,0x3E0006ED,0x3800072A,0x2C000BEB,0xFE3C0434,0xFE4C0864,0x5C0BE8,0xFE180059,0xC80C0005,0x9A0C0005,0x88140021,0x6E0C0002,0xFE1403B9,0xF600005E,0x7C00028C,0x4C0001AA,0xC00BE8,0x840288,0x840288,0x840288,0x840288,0x9C600000,0x9C600000,0x9C600000,0x64600000,0x64600000,0x52600001,0xC40288,0xC40288,0xC40288,0x72200001,0x72200001, -0x543C0001,0x18C0288,0x18C0288,0x5000002D,0x4000028A,0xC40288,0xC40288,0xC40288,0x72200001,0x72200001,0x543C0001,0x18C0288,0x18C0288,0x5000002D,0x4000028A,0x18C0288,0x18C0288,0x5000002D,0x4000028A,0x4000028A,0xFE5800B5,0xF4780120,0x840288,0xFE240012,0xBC180000,0x92180000,0x7E280000,0x70080000,0xFE3400DD,0xFC00000A,0x1180288,0x5000002D, -0x1180288,0xD00374,0xD2A80000,0x92A80000,0x7CA80001,0x1380372,0xA25C0001,0x7C800001,0x21F80372,0x7C000001,0x68000372,0x1380372,0xA25C0001,0x7C800001,0x21F80372,0x7C000001,0x68000372,0x21F80372,0x7C000001,0x68000372,0x68000372,0x1380372,0xA25C0001,0x7C800001,0x21F80372,0x7C000001,0x68000372,0x21F80372,0x7C000001,0x68000372,0x68000372,0x21F80372, -0x7C000001,0x68000372,0x68000372,0x68000372,0xFE9401C2,0xE00372,0xFCC80200,0xFC3C0032,0xCE040005,0x9C040000,0x7C300001,0x7A00002D,0xFE7001B1,0xFE080014,0x84380000,0x68000372,0x1BC0372,0x340374,0x340374,0x340374,0x340374,0x340374,0x340374,0x340374,0x340374,0x340374,0x340374,0x840C0000,0x840C0000,0x840C0000,0x840C0000,0x840C0000, -0x840C0000,0x440C0000,0x440C0000,0x440C0000,0x2E0C0001,0x500372,0x500372,0x500372,0x500372,0x500372,0x500372,0x3A0000E9,0x3A0000E9,0x3A0000E9,0x2E000052,0xA00372,0xA00372,0xA00372,0x200001E1,0x1A000372,0xFC28016D,0x340374,0x340374,0xFE10001D,0xD20C0000,0xA20C0000,0xA20C0000,0x6C0C0000,0xF80400F2,0xD6000041,0x5A000000,0x3A0000E9, -0x700372,}; -static const uint32_t g_etc1_to_bc7_m6_table137[] = { -0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x280000,0x500000, -0x500000,0x500000,0x500000,0xC000001,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x61C0000,0x61C0000,0x61C0000,0x280000,0x380000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x2A40000, -0x2A40000,0x1500000,0x1500000,0x1500000,0x36000001,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x1500000,0x1500000,0x1500000,0x36000001,0x1500000,0x1500000,0x1500000,0x36000001,0x36000001,0x780000,0x700000,0x700000,0x4800000,0x8C0000,0x980000,0x980000,0xBC0000,0x4800000,0x8C0000,0xEC0000,0x1500000, -0xEC0000,0xB80000,0xB80000,0xB80000,0xB80000,0x1100000,0x1100000,0x1100000,0xDFC0000,0xDFC0000,0x5A000001,0x1100000,0x1100000,0x1100000,0xDFC0000,0xDFC0000,0x5A000001,0xDFC0000,0xDFC0000,0x5A000001,0x5A000001,0x1100000,0x1100000,0x1100000,0xDFC0000,0xDFC0000,0x5A000001,0xDFC0000,0xDFC0000,0x5A000001,0x5A000001,0xDFC0000, -0xDFC0000,0x5A000001,0x5A000001,0x5A000001,0x2D40000,0xC40000,0xB80000,0x2F80000,0x1340000,0x1880000,0x1C00000,0x21FC0000,0x4E40000,0x1100000,0x1880000,0x5A000001,0x1880000,0x10C0000,0x38C0000,0x4BFC0000,0x84000001,0x38C0000,0x4BFC0000,0x84000001,0x4BFC0000,0x84000001,0x84000001,0x38C0000,0x4BFC0000,0x84000001,0x4BFC0000,0x84000001, -0x84000001,0x4BFC0000,0x84000001,0x84000001,0x84000001,0x38C0000,0x4BFC0000,0x84000001,0x4BFC0000,0x84000001,0x84000001,0x4BFC0000,0x84000001,0x84000001,0x84000001,0x4BFC0000,0x84000001,0x84000001,0x84000001,0x84000001,0x1500000,0x71C0000,0x71C0000,0x3C00000,0x35F80000,0x67FC0000,0x84000001,0x84000001,0x16C0000,0x1F80000,0x7FD00000,0x84000001, -0x1DF80000,0x941EA8,0xFE740C91,0xAC6C0BE8,0x846C0BE9,0xFC4C0716,0xBA4803A1,0x8A540531,0x964406E6,0x824403AE,0x704406E6,0xFE1C0BEB,0xC01C028A,0x8C340411,0x9E18038E,0x841C000E,0x722403AE,0x841C0BE8,0x780C0413,0x6A140533,0x5C1C0BEB,0xDC1EA8,0xBA000CBB,0x84200BE9,0x9800096E,0x80000406,0x700006EC,0x86000F87,0x7400061B,0x68000663,0x5A000D03,0x1BC1EA8, -0x6600124C,0x5C000FB6,0x5600140B,0x48001EAC,0xFE640CCE,0xFC8817E8,0xFE8C1978,0xFE34018E,0xD21C0011,0xA21C0006,0x88240046,0x80140046,0xFE440C0F,0xFE1000C2,0x8A08029D,0x68000663,0x1381EA8,0xBC0BE8,0xFC940289,0xA2940288,0x84940289,0xE4700372,0xA8700005,0x86780042,0x8C700372,0x7E6800A5,0x70700372,0x1180BE8,0xBC240289,0x86580289,0xA6040372,0x8424000A, -0x70380372,0x11F80BE8,0x78000393,0x6C0004B3,0x5C000BEB,0x1180BE8,0xBC240289,0x86580289,0xA6040372,0x8424000A,0x70380372,0x11F80BE8,0x78000393,0x6C0004B3,0x5C000BEB,0x11F80BE8,0x78000393,0x6C0004B3,0x5C000BEB,0x5C000BEB,0xFE800552,0xFEAC08F6,0xFEAC0901,0xFE4000AB,0xD220000D,0xA0200006,0x86380001,0x82000022,0xFE5C0579,0xFE180059,0x8C00028E,0x6C0004B3, -0x1900BE8,0x6C0BE8,0x6C0BE8,0x6C0BE8,0x6C0BE8,0xCE480374,0xCE480374,0xCE480374,0x76480374,0x76480374,0x5A440375,0xC41C0288,0xC41C0288,0xC41C0288,0x82200009,0x82200009,0x5E2800A6,0x661C0288,0x661C0288,0x56180041,0x481C028A,0xA00BE8,0xA00BE8,0xA00BE8,0x800003ED,0x800003ED,0x5A100373,0x6800042A,0x6800042A,0x540000F6,0x460002EB,0x1440BE8, -0x1440BE8,0x4A000655,0x3E00068A,0x34000BEB,0xFE4C0461,0xFA64087D,0x6C0BE8,0xFE2C0081,0xD01C0005,0xA21C0005,0x90240021,0x761C0002,0xFA2C03F9,0xFC080049,0x840C028A,0x540000F6,0xE40BE8,0x940288,0x940288,0x940288,0x940288,0xA4700000,0xA4700000,0xA4700000,0x6C700000,0x6C700000,0x5A700001,0xDC0288,0xDC0288,0xDC0288,0x7A300001,0x7A300001, -0x5C4C0001,0x1BC0288,0x1BC0288,0x58000019,0x4800028A,0xDC0288,0xDC0288,0xDC0288,0x7A300001,0x7A300001,0x5C4C0001,0x1BC0288,0x1BC0288,0x58000019,0x4800028A,0x1BC0288,0x1BC0288,0x58000019,0x4800028A,0x4800028A,0xFA6C00C8,0xFC880120,0x940288,0xFE34001D,0xC4280000,0x9A280000,0x86380000,0x78180000,0xFA4800F4,0xFC14000D,0x1380288,0x58000019, -0x1380288,0xE00374,0xDAB80000,0x9AB80000,0x84B80001,0x1500372,0xAA6C0001,0x84900001,0x2DF80372,0x84100001,0x70000372,0x1500372,0xAA6C0001,0x84900001,0x2DF80372,0x84100001,0x70000372,0x2DF80372,0x84100001,0x70000372,0x70000372,0x1500372,0xAA6C0001,0x84900001,0x2DF80372,0x84100001,0x70000372,0x2DF80372,0x84100001,0x70000372,0x70000372,0x2DF80372, -0x84100001,0x70000372,0x70000372,0x70000372,0xF8A801E1,0xF00372,0xF4D80221,0xFC540048,0xDC080001,0xA4140000,0x84400001,0x82000019,0xFC8C01C2,0xFE200028,0x8C480000,0x70000372,0x1E00372,0x440374,0x440374,0x440374,0x440374,0x440374,0x440374,0x440374,0x440374,0x440374,0x440374,0x8C1C0000,0x8C1C0000,0x8C1C0000,0x8C1C0000,0x8C1C0000, -0x8C1C0000,0x4C1C0000,0x4C1C0000,0x4C1C0000,0x361C0001,0x680372,0x680372,0x680372,0x680372,0x680372,0x680372,0x4C000089,0x4C000089,0x4C000089,0x36000011,0xD00372,0xD00372,0xD00372,0x2C000179,0x22000372,0xF4380188,0x440374,0x440374,0xFE200028,0xDA1C0000,0xAA1C0000,0xAA1C0000,0x741C0000,0xFE1000FA,0xFE000014,0x62100000,0x4C000089, -0x940372,}; -static const uint32_t g_etc1_to_bc7_m6_table138[] = { -0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x400000,0x800000, -0x800000,0x800000,0x800000,0x14000001,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0xE2C0000,0xE2C0000,0xE2C0000,0x400000,0x5C0000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000, -0x2BC0000,0x1800000,0x1800000,0x1800000,0x3E000001,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x1800000,0x1800000,0x1800000,0x3E000001,0x1800000,0x1800000,0x1800000,0x3E000001,0x3E000001,0x880000,0x800000,0x800000,0x940000,0xA00000,0x2AC0000,0x2AC0000,0x2D40000,0x940000,0xA00000,0x1100000,0x1800000, -0x1100000,0xC80000,0xC80000,0xC80000,0xC80000,0x1280000,0x1280000,0x1280000,0x19FC0000,0x19FC0000,0x62000001,0x1280000,0x1280000,0x1280000,0x19FC0000,0x19FC0000,0x62000001,0x19FC0000,0x19FC0000,0x62000001,0x62000001,0x1280000,0x1280000,0x1280000,0x19FC0000,0x19FC0000,0x62000001,0x19FC0000,0x19FC0000,0x62000001,0x62000001,0x19FC0000, -0x19FC0000,0x62000001,0x62000001,0x62000001,0xE80000,0x2D40000,0xC80000,0x1100000,0x1500000,0x1A80000,0x1E80000,0x2DF80000,0x4F80000,0x1280000,0x1A80000,0x62000001,0x1A80000,0x11C0000,0x3A40000,0x57FC0000,0x8C000001,0x3A40000,0x57FC0000,0x8C000001,0x57FC0000,0x8C000001,0x8C000001,0x3A40000,0x57FC0000,0x8C000001,0x57FC0000,0x8C000001, -0x8C000001,0x57FC0000,0x8C000001,0x8C000001,0x8C000001,0x3A40000,0x57FC0000,0x8C000001,0x57FC0000,0x8C000001,0x8C000001,0x57FC0000,0x8C000001,0x8C000001,0x8C000001,0x57FC0000,0x8C000001,0x8C000001,0x8C000001,0x8C000001,0x1640000,0xF2C0000,0xF2C0000,0x1DC0000,0x41FC0000,0x71FC0000,0x8C000001,0x8C000001,0x3800000,0xFFC0000,0x87E00000,0x8C000001, -0x2BFC0000,0xA41EA8,0xFE800CD5,0xB47C0BE8,0x8C7C0BE9,0xFE5C073E,0xC25803A1,0x92640531,0x9E5406E6,0x8A5403AE,0x785406E6,0xFE300BF8,0xC82C028A,0x94440411,0xA628038E,0x8C2C000E,0x7A3403AE,0x8C2C0BE8,0x801C0413,0x72240533,0x642C0BEB,0xF41EA8,0xC6000C43,0x8C300BE9,0xA8000866,0x8C00039E,0x780806E6,0x92000EA7,0x800004F3,0x70000563,0x64000C64,0x1F01EA8, -0x72001164,0x66000EB4,0x5C00131B,0x50001EAC,0xFE780D71,0xF498186E,0xF8A019DD,0xFE440206,0xDA2C0011,0xAA2C0006,0x90340046,0x88240046,0xFE540C9B,0xFE240136,0x9218029D,0x70000563,0x15C1EA8,0xCC0BE8,0xFEA40291,0xAAA40288,0x8CA40289,0xEC800372,0xB0800005,0x8E880042,0x94800372,0x867800A5,0x78800372,0x1300BE8,0xC4340289,0x8E680289,0xAE140372,0x8C34000A, -0x78480372,0x1DF40BE8,0x8400033B,0x72000463,0x64000BEB,0x1300BE8,0xC4340289,0x8E680289,0xAE140372,0x8C34000A,0x78480372,0x1DF40BE8,0x8400033B,0x72000463,0x64000BEB,0x1DF40BE8,0x8400033B,0x72000463,0x64000BEB,0x64000BEB,0xFE94057B,0xF8C00934,0xFAC4090C,0xFE5400EA,0xDA30000D,0xA8300006,0x8E480001,0x8A100022,0xFE780595,0xFE300095,0x96080288,0x72000463, -0x1B00BE8,0x7C0BE8,0x7C0BE8,0x7C0BE8,0x7C0BE8,0xD6580374,0xD6580374,0xD6580374,0x7E580374,0x7E580374,0x62540375,0xCC2C0288,0xCC2C0288,0xCC2C0288,0x8A300009,0x8A300009,0x663800A6,0x6E2C0288,0x6E2C0288,0x5E280041,0x502C028A,0xB80BE8,0xB80BE8,0xB80BE8,0x8C00039D,0x8C00039D,0x62200373,0x7400039A,0x7400039A,0x5E00006A,0x500002A3,0x1740BE8, -0x1740BE8,0x500005CD,0x4A0005FA,0x3C000BEB,0xFE5804B9,0xFE6C08A5,0x7C0BE8,0xFE3C00A8,0xD82C0005,0xAA2C0005,0x98340021,0x7E2C0002,0xFE3C0428,0xFE180062,0x8C1C028A,0x5E00006A,0x1080BE8,0xA40288,0xA40288,0xA40288,0xA40288,0xAC800000,0xAC800000,0xAC800000,0x74800000,0x74800000,0x62800001,0xF40288,0xF40288,0xF40288,0x82400001,0x82400001, -0x645C0001,0x1F00288,0x1F00288,0x6000000A,0x5000028A,0xF40288,0xF40288,0xF40288,0x82400001,0x82400001,0x645C0001,0x1F00288,0x1F00288,0x6000000A,0x5000028A,0x1F00288,0x1F00288,0x6000000A,0x5000028A,0x5000028A,0xF68000DD,0xF4980139,0xA40288,0xF8500029,0xCC380000,0xA2380000,0x8E480000,0x80280000,0xF2600109,0xFE240014,0x15C0288,0x6000000A, -0x15C0288,0xF00374,0xE2C80000,0xA2C80000,0x8CC80001,0x1680372,0xB27C0001,0x8CA00001,0x39F80372,0x8C200001,0x78000372,0x1680372,0xB27C0001,0x8CA00001,0x39F80372,0x8C200001,0x78000372,0x39F80372,0x8C200001,0x78000372,0x78000372,0x1680372,0xB27C0001,0x8CA00001,0x39F80372,0x8C200001,0x78000372,0x39F80372,0x8C200001,0x78000372,0x78000372,0x39F80372, -0x8C200001,0x78000372,0x78000372,0x78000372,0xFEB401ED,0x9000372,0xFCE80221,0xFE6C0055,0xE4180001,0xAC240000,0x8C500001,0x8A00000D,0xFE9801D4,0xFE400032,0x94580000,0x78000372,0x3FC0372,0x540374,0x540374,0x540374,0x540374,0x540374,0x540374,0x540374,0x540374,0x540374,0x540374,0x942C0000,0x942C0000,0x942C0000,0x942C0000,0x942C0000, -0x942C0000,0x542C0000,0x542C0000,0x542C0000,0x3E2C0001,0x800372,0x800372,0x800372,0x800372,0x800372,0x800372,0x58000041,0x58000041,0x58000041,0x3E040001,0x1000372,0x1000372,0x1000372,0x38000131,0x2A000372,0xFC480188,0x540374,0x540374,0xFA340034,0xE22C0000,0xB22C0000,0xB22C0000,0x7C2C0000,0xFE200115,0xFE140019,0x6A200000,0x58000041, -0xB40372,}; -static const uint32_t g_etc1_to_bc7_m6_table139[] = { -0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0x580000,0xB00000, -0xB00000,0xB00000,0xB00000,0x1C000001,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x3C0000,0x400000,0x400000,0x400000,0x580000,0x7C0000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000, -0xD40000,0x1B00000,0x1B00000,0x1B00000,0x46000001,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0x1B00000,0x1B00000,0x1B00000,0x46000001,0x1B00000,0x1B00000,0x1B00000,0x46000001,0x46000001,0x4980000,0x900000,0x900000,0xA80000,0xB40000,0xC40000,0xC40000,0xF00000,0xA80000,0xB40000,0x1300000,0x1B00000, -0x1300000,0xD80000,0xD80000,0xD80000,0xD80000,0x1400000,0x1400000,0x1400000,0x25F80000,0x25F80000,0x6A000001,0x1400000,0x1400000,0x1400000,0x25F80000,0x25F80000,0x6A000001,0x25F80000,0x25F80000,0x6A000001,0x6A000001,0x1400000,0x1400000,0x1400000,0x25F80000,0x25F80000,0x6A000001,0x25F80000,0x25F80000,0x6A000001,0x6A000001,0x25F80000, -0x25F80000,0x6A000001,0x6A000001,0x6A000001,0xFC0000,0xAE40000,0xD80000,0x3240000,0x16C0000,0x1CC0000,0x9F80000,0x37FC0000,0x50C0000,0x1400000,0x1CC0000,0x6A000001,0x1CC0000,0x12C0000,0x3BC0000,0x63FC0000,0x94000001,0x3BC0000,0x63FC0000,0x94000001,0x63FC0000,0x94000001,0x94000001,0x3BC0000,0x63FC0000,0x94000001,0x63FC0000,0x94000001, -0x94000001,0x63FC0000,0x94000001,0x94000001,0x94000001,0x3BC0000,0x63FC0000,0x94000001,0x63FC0000,0x94000001,0x94000001,0x63FC0000,0x94000001,0x94000001,0x94000001,0x63FC0000,0x94000001,0x94000001,0x94000001,0x94000001,0x1780000,0x1400000,0x1400000,0x1F80000,0x4FFC0000,0x7BFC0000,0x94000001,0x94000001,0x1980000,0x21FC0000,0x8FF00000,0x94000001, -0x3BFC0000,0xB41EA8,0xFE980D35,0xBC8C0BE8,0x948C0BE9,0xFE74078E,0xCA6803A1,0x9A740531,0xA66406E6,0x926403AE,0x806406E6,0xFE440C13,0xD03C028A,0x9C540411,0xAE38038E,0x943C000E,0x824403AE,0x943C0BE8,0x882C0413,0x7A340533,0x6C3C0BEB,0x10C1EA8,0xD8000BFB,0x94400BE9,0xBA0007BE,0x9608038E,0x801806E6,0x9E000DE7,0x8C00040B,0x7A000497,0x6C000C0F,0xBF81EA8, -0x7800109C,0x72000DB4,0x6600126C,0x58001EAC,0xFE8C0E18,0xFCA8186E,0xFEAC19E1,0xFE5C0296,0xE23C0011,0xB23C0006,0x98440046,0x90340046,0xFE700D55,0xFE3401B7,0x9A28029D,0x7A000497,0x17C1EA8,0xDC0BE8,0xFCB802A9,0xB2B40288,0x94B40289,0xF4900372,0xB8900005,0x96980042,0x9C900372,0x8E8800A5,0x80900372,0x3440BE8,0xCC440289,0x96780289,0xB6240372,0x9444000A, -0x80580372,0x27FC0BE8,0x8E000301,0x7C000422,0x6C000BEB,0x3440BE8,0xCC440289,0x96780289,0xB6240372,0x9444000A,0x80580372,0x27FC0BE8,0x8E000301,0x7C000422,0x6C000BEB,0x27FC0BE8,0x8E000301,0x7C000422,0x6C000BEB,0x6C000BEB,0xFCB005BD,0xFECC0938,0xFECC094C,0xFE6C0129,0xE240000D,0xB0400006,0x96580001,0x92200022,0xFE8805E8,0xFE4800B9,0x9E180288,0x7C000422, -0x1D40BE8,0x8C0BE8,0x8C0BE8,0x8C0BE8,0x8C0BE8,0xDE680374,0xDE680374,0xDE680374,0x86680374,0x86680374,0x6A640375,0xD43C0288,0xD43C0288,0xD43C0288,0x92400009,0x92400009,0x6E4800A6,0x763C0288,0x763C0288,0x66380041,0x583C028A,0xD00BE8,0xD00BE8,0xD00BE8,0x9E000375,0x9E000375,0x6A300373,0x8000032A,0x8000032A,0x6800001A,0x5A00028A,0x1A40BE8, -0x1A40BE8,0x5C000545,0x5000056A,0x44000BEB,0xFE6804EE,0xFA8408B8,0x8C0BE8,0xFE4C00D9,0xE03C0005,0xB23C0005,0xA0440021,0x863C0002,0xFE500455,0xFE300081,0x942C028A,0x6800001A,0x1280BE8,0xB40288,0xB40288,0xB40288,0xB40288,0xB4900000,0xB4900000,0xB4900000,0x7C900000,0x7C900000,0x6A900001,0x10C0288,0x10C0288,0x10C0288,0x8A500001,0x8A500001, -0x6C6C0001,0xBF80288,0xBF80288,0x6A000001,0x5800028A,0x10C0288,0x10C0288,0x10C0288,0x8A500001,0x8A500001,0x6C6C0001,0xBF80288,0xBF80288,0x6A000001,0x5800028A,0xBF80288,0xBF80288,0x6A000001,0x5800028A,0x5800028A,0xFE9000DD,0xFCA80139,0xB40288,0xFE5C002D,0xD4480000,0xAA480000,0x96580000,0x88380000,0xFA700109,0xFA400020,0x17C0288,0x6A000001, -0x17C0288,0x1000374,0xEAD80000,0xAAD80000,0x94D80001,0x1800372,0xBA8C0001,0x94B00001,0x45F80372,0x94300001,0x80000372,0x1800372,0xBA8C0001,0x94B00001,0x45F80372,0x94300001,0x80000372,0x45F80372,0x94300001,0x80000372,0x80000372,0x1800372,0xBA8C0001,0x94B00001,0x45F80372,0x94300001,0x80000372,0x45F80372,0x94300001,0x80000372,0x80000372,0x45F80372, -0x94300001,0x80000372,0x80000372,0x80000372,0xFCCC0202,0x1140372,0xF4F80244,0xFE800071,0xEC280001,0xB4340000,0x94600001,0x94000002,0xFEAC01F9,0xFE58004A,0x9C680000,0x80000372,0x13FC0372,0x640374,0x640374,0x640374,0x640374,0x640374,0x640374,0x640374,0x640374,0x640374,0x640374,0x9C3C0000,0x9C3C0000,0x9C3C0000,0x9C3C0000,0x9C3C0000, -0x9C3C0000,0x5C3C0000,0x5C3C0000,0x5C3C0000,0x463C0001,0x980372,0x980372,0x980372,0x980372,0x980372,0x980372,0x68000011,0x68000011,0x68000011,0x46140001,0x1300372,0x1300372,0x1300372,0x3E0000E1,0x32000372,0xF45801A5,0x640374,0x640374,0xFC48003D,0xEA3C0000,0xBA3C0000,0xBA3C0000,0x843C0000,0xFC380120,0xF8280029,0x72300000,0x68000011, -0xD80372,}; -static const uint32_t g_etc1_to_bc7_m6_table140[] = { -0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0xE80000, -0xE80000,0xE80000,0xE80000,0x26000000,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x540000,0x540000,0x540000,0x740000,0xA40000,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xA00001,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000, -0xF00000,0x1E80000,0x1E80000,0x1E80000,0x50000000,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000,0xF00000,0x1E80000,0x1E80000,0x1E80000,0x50000000,0x1E80000,0x1E80000,0x1E80000,0x50000000,0x50000000,0xAC0000,0xA00001,0xA00001,0xBC0000,0xCC0000,0xDC0000,0xDC0000,0x1100000,0xBC0000,0xCC0000,0x1580000,0x1E80000, -0x1580000,0xE80001,0xE80001,0xE80001,0xE80001,0x15C0000,0x15C0000,0x15C0000,0x33F80000,0x33F80000,0x74000000,0x15C0000,0x15C0000,0x15C0000,0x33F80000,0x33F80000,0x74000000,0x33F80000,0x33F80000,0x74000000,0x74000000,0x15C0000,0x15C0000,0x15C0000,0x33F80000,0x33F80000,0x74000000,0x33F80000,0x33F80000,0x74000000,0x74000000,0x33F80000, -0x33F80000,0x74000000,0x74000000,0x74000000,0x1100000,0x4F80000,0xE80001,0x33C0000,0x1880000,0x1F00000,0x17FC0000,0x43FC0000,0x1240000,0x15C0000,0x1F00000,0x74000000,0x1F00000,0x13C0001,0x1D80000,0x71F80000,0x9E000000,0x1D80000,0x71F80000,0x9E000000,0x71F80000,0x9E000000,0x9E000000,0x1D80000,0x71F80000,0x9E000000,0x71F80000,0x9E000000, -0x9E000000,0x71F80000,0x9E000000,0x9E000000,0x9E000000,0x1D80000,0x71F80000,0x9E000000,0x71F80000,0x9E000000,0x9E000000,0x71F80000,0x9E000000,0x9E000000,0x9E000000,0x71F80000,0x9E000000,0x9E000000,0x9E000000,0x9E000000,0x58C0000,0x1540000,0x1540000,0x13FC0000,0x5FF80000,0x87F80000,0x9E000000,0x9E000000,0x1B00000,0x33FC0000,0x99E40000,0x9E000000, -0x4BFC0000,0xC41EAC,0xFEA40D93,0xC4A00BEB,0x9E9C0BEB,0xFE8407F4,0xD47C03A3,0xA2840533,0xAE7806E6,0x9A7403AE,0x887806E6,0xFE5C0C49,0xDA50028A,0xA6680413,0xB84C038E,0x9E4C000E,0x8A5403AE,0x9C500BE9,0x92400411,0x82440531,0x764C0BE9,0x3241EA8,0xE8080BE9,0x9E500BE8,0xC6000736,0xA01C038E,0x882C06E6,0xAE000D2F,0x98000345,0x840003FD,0x76000BE8,0x17FC1EA8, -0x84000FB8,0x78000C98,0x6C001198,0x62001EA8,0xFEA00EBE,0xF6BC18F4,0xF8C01A44,0xFE70034E,0xF04C000E,0xBC500006,0xA2580046,0x9A480046,0xFE800E11,0xFE500235,0xA438029D,0x840003FD,0x1A41EA8,0xEC0BEB,0xFEC802CA,0xBCC4028A,0x9EC4028A,0xFCA40373,0xC4A00002,0xA0AC0041,0xA4A40373,0x989C00A6,0x88A00375,0x1600BE8,0xD4580289,0x9E8C0288,0xBE380372,0x9C540009, -0x886C0374,0x35FC0BE8,0x9A0002CA,0x840003E4,0x76000BE8,0x1600BE8,0xD4580289,0x9E8C0288,0xBE380372,0x9C540009,0x886C0374,0x35FC0BE8,0x9A0002CA,0x840003E4,0x76000BE8,0x35FC0BE8,0x9A0002CA,0x840003E4,0x76000BE8,0x76000BE8,0xFEBC0611,0xF8E00975,0xFCE8094E,0xFE800189,0xEA54000D,0xBA540005,0x9E680002,0x9A380021,0xFEA40632,0xFE5C0116,0xA62C0289,0x840003E4, -0x1F80BE8,0x9C0BEB,0x9C0BEB,0x9C0BEB,0x9C0BEB,0xE8780372,0xE8780372,0xE8780372,0x90780372,0x90780372,0x74780372,0xDA500289,0xDA500289,0xDA500289,0x9A50000A,0x9A50000A,0x785C00A5,0x804C0289,0x804C0289,0x704C0042,0x624C0289,0x2E80BE8,0x2E80BE8,0x2E80BE8,0xAA0C0372,0xAA0C0372,0x74400372,0x8C0002D9,0x8C0002D9,0x74000005,0x62140288,0x1DC0BE8, -0x1DC0BE8,0x660004D8,0x5C0004E1,0x4E000BE8,0xFE840522,0xF49808F6,0x9C0BEB,0xFE680122,0xE6500006,0xB8500005,0xA8540022,0x904C0001,0xFE6404B5,0xFE4400B9,0x9E3C0289,0x74000005,0x1500BE8,0xC4028A,0xC4028A,0xC4028A,0xC4028A,0xBCA40001,0xBCA40001,0xBCA40001,0x86A00001,0x86A00001,0x74A00001,0x3240288,0x3240288,0x3240288,0x94600001,0x94600001, -0x74800000,0x17FC0288,0x17FC0288,0x740C0000,0x62000288,0x3240288,0x3240288,0x3240288,0x94600001,0x94600001,0x74800000,0x17FC0288,0x17FC0288,0x740C0000,0x62000288,0x17FC0288,0x17FC0288,0x740C0000,0x62000288,0x62000288,0xFEA000F4,0xF6BC0152,0xC4028A,0xFE78003D,0xD8600001,0xB25C0000,0xA0680000,0x904C0000,0xF8880120,0xFC580029,0x1A40288,0x740C0000, -0x1A40288,0x1140372,0xF2EC0001,0xB2EC0001,0x9EE80001,0x3980372,0xC49C0001,0x9EC00000,0x51FC0372,0x9E3C0000,0x88000374,0x3980372,0xC49C0001,0x9EC00000,0x51FC0372,0x9E3C0000,0x88000374,0x51FC0372,0x9E3C0000,0x88000374,0x88000374,0x3980372,0xC49C0001,0x9EC00000,0x51FC0372,0x9E3C0000,0x88000374,0x51FC0372,0x9E3C0000,0x88000374,0x88000374,0x51FC0372, -0x9E3C0000,0x88000374,0x88000374,0x88000374,0xF6E80221,0xB240372,0xFF0C0242,0xFE9C0091,0xF43C0001,0xBE440000,0x9E700000,0x9E000000,0xF8D00200,0xFA7C0071,0xA47C0000,0x88000374,0x23FC0372,0x780372,0x780372,0x780372,0x780372,0x780372,0x780372,0x780372,0x780372,0x780372,0x780372,0xA4500001,0xA4500001,0xA4500001,0xA4500001,0xA4500001, -0xA4500001,0x64500001,0x64500001,0x64500001,0x504C0001,0x2B00372,0x2B00372,0x2B00372,0x2B00372,0x2B00372,0x2B00372,0x76000001,0x76000001,0x76000001,0x50240000,0x1680372,0x1680372,0x1680372,0x4A00009D,0x3A000374,0xFE6C01A5,0x780372,0x780372,0xFE58004A,0xF0500001,0xC0500001,0xC0500001,0x8C500001,0xF84C0139,0xFE3C0032,0x78440001,0x76000001, -0xFC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table141[] = { -0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x8C0000,0x1180000, -0x1180000,0x1180000,0x1180000,0x2E000000,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x640000,0x640000,0x640000,0x8C0000,0xC80000,0xB00001,0xB00001,0xB00001,0xB00001,0xB00001,0xB00001,0xB00001,0xB00001,0xB00001,0xB00001,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000, -0x1080000,0x9F80000,0x9F80000,0x9F80000,0x58000000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x9F80000,0x9F80000,0x9F80000,0x58000000,0x9F80000,0x9F80000,0x9F80000,0x58000000,0x58000000,0x6BC0000,0xB00001,0xB00001,0x6CC0000,0xE00000,0x2F00000,0x2F00000,0x12C0000,0x6CC0000,0xE00000,0x1780000,0x9F80000, -0x1780000,0xF80001,0xF80001,0xF80001,0xF80001,0x1740000,0x1740000,0x1740000,0x3FF80000,0x3FF80000,0x7C000000,0x1740000,0x1740000,0x1740000,0x3FF80000,0x3FF80000,0x7C000000,0x3FF80000,0x3FF80000,0x7C000000,0x7C000000,0x1740000,0x1740000,0x1740000,0x3FF80000,0x3FF80000,0x7C000000,0x3FF80000,0x3FF80000,0x7C000000,0x7C000000,0x3FF80000, -0x3FF80000,0x7C000000,0x7C000000,0x7C000000,0x7200000,0xD080000,0xF80001,0x1540000,0x1A40000,0xBFC0000,0x25FC0000,0x4FF80000,0x1380000,0x1740000,0xBFC0000,0x7C000000,0xBFC0000,0x14C0001,0x1F00000,0x7DF80000,0xA6000000,0x1F00000,0x7DF80000,0xA6000000,0x7DF80000,0xA6000000,0xA6000000,0x1F00000,0x7DF80000,0xA6000000,0x7DF80000,0xA6000000, -0xA6000000,0x7DF80000,0xA6000000,0xA6000000,0xA6000000,0x1F00000,0x7DF80000,0xA6000000,0x7DF80000,0xA6000000,0xA6000000,0x7DF80000,0xA6000000,0xA6000000,0xA6000000,0x7DF80000,0xA6000000,0xA6000000,0xA6000000,0xA6000000,0x5A00000,0x1640000,0x1640000,0x27FC0000,0x6BFC0000,0x91F80000,0xA6000000,0xA6000000,0x3C40000,0x45FC0000,0xA1F40000,0xA6000000, -0x5BFC0000,0xD41EAC,0xFEBC0E0B,0xCCB00BEB,0xA6AC0BEB,0xFE980856,0xDC8C03A3,0xAA940533,0xB68806E6,0xA28403AE,0x908806E6,0xFE740C91,0xE260028A,0xAE780413,0xC05C038E,0xA65C000E,0x926403AE,0xA4600BE9,0x9A500411,0x8A540531,0x7E5C0BE9,0x33C1EA8,0xF0180BE9,0xA6600BE8,0xD80006F6,0xA82C038E,0x903C06E6,0xBA000CAF,0xA20002CE,0x8E0003B5,0x7E100BE8,0x23FC1EA8, -0x90000EF8,0x84000BA8,0x780010D8,0x6A001EA8,0xFEB40F7B,0xFECC18F4,0xFECC1A5C,0xFE8003FA,0xF85C000E,0xC4600006,0xAA680046,0xA2580046,0xFE900EA5,0xFE6002FA,0xAC48029D,0x8E0003B5,0x1C81EA8,0xFC0BEB,0xFEDC02EB,0xC4D4028A,0xA6D4028A,0xFEB4037B,0xCCB00002,0xA8BC0041,0xACB40373,0xA0AC00A6,0x90B00375,0x1780BE8,0xDC680289,0xA69C0288,0xC6480372,0xA4640009, -0x907C0374,0x41FC0BE8,0xA20002AA,0x900003B4,0x7E000BE8,0x1780BE8,0xDC680289,0xA69C0288,0xC6480372,0xA4640009,0x907C0374,0x41FC0BE8,0xA20002AA,0x900003B4,0x7E000BE8,0x41FC0BE8,0xA20002AA,0x900003B4,0x7E000BE8,0x7E000BE8,0xFED00640,0xFEEC098D,0xF4F80993,0xFE9801D5,0xF264000D,0xC2640005,0xA6780002,0xA2480021,0xFEB40682,0xFC80016E,0xAE3C0289,0x900003B4, -0xFFC0BE8,0xAC0BEB,0xAC0BEB,0xAC0BEB,0xAC0BEB,0xF0880372,0xF0880372,0xF0880372,0x98880372,0x98880372,0x7C880372,0xE2600289,0xE2600289,0xE2600289,0xA260000A,0xA260000A,0x806C00A5,0x885C0289,0x885C0289,0x785C0042,0x6A5C0289,0x3000BE8,0x3000BE8,0x3000BE8,0xB21C0372,0xB21C0372,0x7C500372,0x9E0002A1,0x9E0002A1,0x7C100005,0x6A240288,0x5FC0BE8, -0x5FC0BE8,0x72000478,0x64000489,0x56000BE8,0xFE90056E,0xFCA808F6,0xAC0BEB,0xFE780156,0xEE600006,0xC0600005,0xB0640022,0x985C0001,0xFE7804E6,0xFE5C00F5,0xA64C0289,0x7C100005,0x1700BE8,0xD4028A,0xD4028A,0xD4028A,0xD4028A,0xC4B40001,0xC4B40001,0xC4B40001,0x8EB00001,0x8EB00001,0x7CB00001,0x33C0288,0x33C0288,0x33C0288,0x9C700001,0x9C700001, -0x7C900000,0x23FC0288,0x23FC0288,0x7C1C0000,0x6A000288,0x33C0288,0x33C0288,0x33C0288,0x9C700001,0x9C700001,0x7C900000,0x23FC0288,0x23FC0288,0x7C1C0000,0x6A000288,0x23FC0288,0x23FC0288,0x7C1C0000,0x6A000288,0x6A000288,0xFAB40109,0xFECC0152,0xD4028A,0xFE88004A,0xE0700001,0xBA6C0000,0xA8780000,0x985C0000,0xFE940128,0xFE680034,0x1C80288,0x7C1C0000, -0x1C80288,0x1240372,0xFAFC0001,0xBAFC0001,0xA6F80001,0x3B00372,0xCCAC0001,0xA6D00000,0x5DFC0372,0xA64C0000,0x90000374,0x3B00372,0xCCAC0001,0xA6D00000,0x5DFC0372,0xA64C0000,0x90000374,0x5DFC0372,0xA64C0000,0x90000374,0x90000374,0x3B00372,0xCCAC0001,0xA6D00000,0x5DFC0372,0xA64C0000,0x90000374,0x5DFC0372,0xA64C0000,0x90000374,0x90000374,0x5DFC0372, -0xA64C0000,0x90000374,0x90000374,0x90000374,0xFEF80221,0x1380372,0xF71C0265,0xFEB000AA,0xFC4C0001,0xC6540000,0xA6800000,0xA6100000,0xFEDC0208,0xFE940080,0xAC8C0000,0x90000374,0x33FC0372,0x880372,0x880372,0x880372,0x880372,0x880372,0x880372,0x880372,0x880372,0x880372,0x880372,0xAC600001,0xAC600001,0xAC600001,0xAC600001,0xAC600001, -0xAC600001,0x6C600001,0x6C600001,0x6C600001,0x585C0001,0xC80372,0xC80372,0xC80372,0xC80372,0xC80372,0xC80372,0x7E100001,0x7E100001,0x7E100001,0x58340000,0x1980372,0x1980372,0x1980372,0x50000071,0x42000374,0xF67C01C2,0x880372,0x880372,0xFE680059,0xF8600001,0xC8600001,0xC8600001,0x94600001,0xFE580145,0xFE50003D,0x80540001,0x7E100001, -0x1200372,}; -static const uint32_t g_etc1_to_bc7_m6_table142[] = { -0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0x14C0000, -0x14C0000,0x14C0000,0x14C0000,0x36000000,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0x2740000,0x2740000,0x2740000,0xA40000,0xE80000,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0xC00001,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000, -0x1200000,0x15F80000,0x15F80000,0x15F80000,0x60000000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x1200000,0x15F80000,0x15F80000,0x15F80000,0x60000000,0x15F80000,0x15F80000,0x15F80000,0x60000000,0x60000000,0xECC0000,0xC00001,0xC00001,0x2E00000,0xF40000,0x1080000,0x1080000,0x1440000,0x2E00000,0xF40000,0x19C0000,0x15F80000, -0x19C0000,0x1080001,0x1080001,0x1080001,0x1080001,0x18C0000,0x18C0000,0x18C0000,0x4BF80000,0x4BF80000,0x84000000,0x18C0000,0x18C0000,0x18C0000,0x4BF80000,0x4BF80000,0x84000000,0x4BF80000,0x4BF80000,0x84000000,0x84000000,0x18C0000,0x18C0000,0x18C0000,0x4BF80000,0x4BF80000,0x84000000,0x4BF80000,0x4BF80000,0x84000000,0x84000000,0x4BF80000, -0x4BF80000,0x84000000,0x84000000,0x84000000,0x3340000,0x11C0000,0x1080001,0x3680000,0x1C00000,0x1BFC0000,0x33F80000,0x59FC0000,0x34C0000,0x18C0000,0x1BFC0000,0x84000000,0x1BFC0000,0x15C0001,0xDFC0000,0x89F80000,0xAE000000,0xDFC0000,0x89F80000,0xAE000000,0x89F80000,0xAE000000,0xAE000000,0xDFC0000,0x89F80000,0xAE000000,0x89F80000,0xAE000000, -0xAE000000,0x89F80000,0xAE000000,0xAE000000,0xAE000000,0xDFC0000,0x89F80000,0xAE000000,0x89F80000,0xAE000000,0xAE000000,0x89F80000,0xAE000000,0xAE000000,0xAE000000,0x89F80000,0xAE000000,0xAE000000,0xAE000000,0xAE000000,0x5B40000,0x3740000,0x3740000,0x3BFC0000,0x79FC0000,0x9BF80000,0xAE000000,0xAE000000,0x1DC0000,0x55FC0000,0xABC80000,0xAE000000, -0x69FC0000,0xE41EAC,0xFEC80E73,0xD4C00BEB,0xAEBC0BEB,0xFEA808EC,0xE49C03A3,0xB2A40533,0xBE9806E6,0xAA9403AE,0x989806E6,0xFE840CF4,0xEA70028A,0xB6880413,0xC86C038E,0xAE6C000E,0x9A7403AE,0xAC700BE9,0xA2600411,0x92640531,0x866C0BE9,0x1541EA8,0xF8280BE9,0xAE700BE8,0xE20406E6,0xB03C038E,0x984C06E6,0xC6000C4F,0xAC000292,0x980403A1,0x86200BE8,0x2FFC1EA8, -0x9C000E58,0x8A000AE8,0x7E00102C,0x72001EA8,0xFEBC1022,0xF6DC197E,0xF8E01AAF,0xFE9804BE,0xFA70000F,0xCC700006,0xB2780046,0xAA680046,0xFEA40F96,0xFE7403B7,0xB458029D,0x980403A1,0x1E81EA8,0x10C0BEB,0xFEEC032A,0xCCE4028A,0xAEE4028A,0xFEC8038D,0xD4C00002,0xB0CC0041,0xB4C40373,0xA8BC00A6,0x98C00375,0x1900BE8,0xE4780289,0xAEAC0288,0xCE580372,0xAC740009, -0x988C0374,0x4DFC0BE8,0xAC00028E,0x96000394,0x86000BE8,0x1900BE8,0xE4780289,0xAEAC0288,0xCE580372,0xAC740009,0x988C0374,0x4DFC0BE8,0xAC00028E,0x96000394,0x86000BE8,0x4DFC0BE8,0xAC00028E,0x96000394,0x86000BE8,0x86000BE8,0xFEE406A5,0xFB0409B3,0xFD080993,0xFEB0022E,0xFA74000D,0xCA740005,0xAE880002,0xAA580021,0xFAD406D1,0xFE9001B2,0xB64C0289,0x96000394, -0x1FF80BE8,0xBC0BEB,0xBC0BEB,0xBC0BEB,0xBC0BEB,0xF8980372,0xF8980372,0xF8980372,0xA0980372,0xA0980372,0x84980372,0xEA700289,0xEA700289,0xEA700289,0xAA70000A,0xAA70000A,0x887C00A5,0x906C0289,0x906C0289,0x806C0042,0x726C0289,0x3180BE8,0x3180BE8,0x3180BE8,0xBA2C0372,0xBA2C0372,0x84600372,0xAA000289,0xAA000289,0x84200005,0x72340288,0x11FC0BE8, -0x11FC0BE8,0x7C000432,0x6C000414,0x5E000BE8,0xFEA005A5,0xF4B80933,0xBC0BEB,0xFE880193,0xF6700006,0xC8700005,0xB8740022,0xA06C0001,0xFC8C0549,0xFE680138,0xAE5C0289,0x84200005,0x1940BE8,0xE4028A,0xE4028A,0xE4028A,0xE4028A,0xCCC40001,0xCCC40001,0xCCC40001,0x96C00001,0x96C00001,0x84C00001,0x1540288,0x1540288,0x1540288,0xA4800001,0xA4800001, -0x84A00000,0x2FFC0288,0x2FFC0288,0x842C0000,0x72000288,0x1540288,0x1540288,0x1540288,0xA4800001,0xA4800001,0x84A00000,0x2FFC0288,0x2FFC0288,0x842C0000,0x72000288,0x2FFC0288,0x2FFC0288,0x842C0000,0x72000288,0x72000288,0xF6C80120,0xF6DC016D,0xE4028A,0xFE980061,0xE8800001,0xC27C0000,0xB0880000,0xA06C0000,0xFAAC0139,0xFE800048,0x1E80288,0x842C0000, -0x1E80288,0x1340372,0xFF0C0002,0xC30C0001,0xAF080001,0x1C80372,0xD4BC0001,0xAEE00000,0x69FC0372,0xAE5C0000,0x98000374,0x1C80372,0xD4BC0001,0xAEE00000,0x69FC0372,0xAE5C0000,0x98000374,0x69FC0372,0xAE5C0000,0x98000374,0x98000374,0x1C80372,0xD4BC0001,0xAEE00000,0x69FC0372,0xAE5C0000,0x98000374,0x69FC0372,0xAE5C0000,0x98000374,0x98000374,0x69FC0372, -0xAE5C0000,0x98000374,0x98000374,0x98000374,0xFB0C0242,0x1480372,0xFF2C0265,0xFEC800D0,0xFE680005,0xCE640000,0xAE900000,0xAE200000,0xFEF0022D,0xFEAC00A4,0xB49C0000,0x98000374,0x41FC0372,0x980372,0x980372,0x980372,0x980372,0x980372,0x980372,0x980372,0x980372,0x980372,0x980372,0xB4700001,0xB4700001,0xB4700001,0xB4700001,0xB4700001, -0xB4700001,0x74700001,0x74700001,0x74700001,0x606C0001,0xE00372,0xE00372,0xE00372,0xE00372,0xE00372,0xE00372,0x86200001,0x86200001,0x86200001,0x60440000,0x1CC0372,0x1CC0372,0x1CC0372,0x5C000041,0x4A000374,0xFE8C01C2,0x980372,0x980372,0xFA7C0071,0xFA700002,0xD0700001,0xD0700001,0x9C700001,0xFC700152,0xFC600055,0x88640001,0x86200001, -0x1400372,}; -static const uint32_t g_etc1_to_bc7_m6_table143[] = { -0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0xBC0000,0x17C0000, -0x17C0000,0x17C0000,0x17C0000,0x3E000000,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0xA840000,0xA840000,0xA840000,0xBC0000,0x10C0000,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000, -0x1380000,0x21F80000,0x21F80000,0x21F80000,0x68000000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x21F80000,0x21F80000,0x21F80000,0x68000000,0x21F80000,0x21F80000,0x21F80000,0x68000000,0x68000000,0xE00000,0xD00001,0xD00001,0xF40000,0x1080000,0x11C0000,0x11C0000,0x1600000,0xF40000,0x1080000,0x1BC0000,0x21F80000, -0x1BC0000,0x1180001,0x1180001,0x1180001,0x1180001,0x1A40000,0x1A40000,0x1A40000,0x57F80000,0x57F80000,0x8C000000,0x1A40000,0x1A40000,0x1A40000,0x57F80000,0x57F80000,0x8C000000,0x57F80000,0x57F80000,0x8C000000,0x8C000000,0x1A40000,0x1A40000,0x1A40000,0x57F80000,0x57F80000,0x8C000000,0x57F80000,0x57F80000,0x8C000000,0x8C000000,0x57F80000, -0x57F80000,0x8C000000,0x8C000000,0x8C000000,0x1480000,0x12C0000,0x1180001,0x1800000,0x1D80000,0x29FC0000,0x3FFC0000,0x65F40000,0x3600000,0x1A40000,0x29FC0000,0x8C000000,0x29FC0000,0x16C0001,0x25FC0000,0x95F80000,0xB6000000,0x25FC0000,0x95F80000,0xB6000000,0x95F80000,0xB6000000,0xB6000000,0x25FC0000,0x95F80000,0xB6000000,0x95F80000,0xB6000000, -0xB6000000,0x95F80000,0xB6000000,0xB6000000,0xB6000000,0x25FC0000,0x95F80000,0xB6000000,0x95F80000,0xB6000000,0xB6000000,0x95F80000,0xB6000000,0xB6000000,0xB6000000,0x95F80000,0xB6000000,0xB6000000,0xB6000000,0xB6000000,0x1CC0000,0xB840000,0xB840000,0x4FFC0000,0x87F80000,0xA5F80000,0xB6000000,0xB6000000,0x3F00000,0x67FC0000,0xB3D80000,0xB6000000, -0x79FC0000,0xF41EAC,0xFEDC0EEC,0xDCD00BEB,0xB6CC0BEB,0xFEBC096E,0xECAC03A3,0xBAB40533,0xC6A806E6,0xB2A403AE,0xA0A806E6,0xFE980D51,0xF280028A,0xBE980413,0xD07C038E,0xB67C000E,0xA28403AE,0xB4800BE9,0xAA700411,0x9A740531,0x8E7C0BE9,0x16C1EA8,0xFE380BEF,0xB6800BE8,0xEA1406E6,0xB84C038E,0xA05C06E6,0xD2000C0F,0xB608028A,0xA01403A1,0x8E300BE8,0x3BFC1EA8, -0xA6000DDB,0x96000A18,0x8A000F9C,0x7A001EA8,0xFED010D1,0xFEEC197E,0xFEEC1ACB,0xFEAC059E,0xFE840027,0xD4800006,0xBA880046,0xB2780046,0xFEB4101B,0xFE900462,0xBC68029D,0xA01403A1,0x7FC1EA8,0x11C0BEB,0xFF000363,0xD4F4028A,0xB6F4028A,0xFEDC03AB,0xDCD00002,0xB8DC0041,0xBCD40373,0xB0CC00A6,0xA0D00375,0x1A80BE8,0xEC880289,0xB6BC0288,0xD6680372,0xB4840009, -0xA09C0374,0x59FC0BE8,0xB6000288,0xA000037D,0x8E000BE8,0x1A80BE8,0xEC880289,0xB6BC0288,0xD6680372,0xB4840009,0xA09C0374,0x59FC0BE8,0xB6000288,0xA000037D,0x8E000BE8,0x59FC0BE8,0xB6000288,0xA000037D,0x8E000BE8,0x8E000BE8,0xFEF806DA,0xFF0C09DB,0xF51809DA,0xFEC00296,0xFE88001A,0xD2840005,0xB6980002,0xB2680021,0xFEDC06F9,0xFEA40224,0xBE5C0289,0xA000037D, -0x2DFC0BE8,0xCC0BEB,0xCC0BEB,0xCC0BEB,0xCC0BEB,0xFEA80373,0xFEA80373,0xFEA80373,0xA8A80372,0xA8A80372,0x8CA80372,0xF2800289,0xF2800289,0xF2800289,0xB280000A,0xB280000A,0x908C00A5,0x987C0289,0x987C0289,0x887C0042,0x7A7C0289,0x3300BE8,0x3300BE8,0x3300BE8,0xC23C0372,0xC23C0372,0x8C700372,0xB2100289,0xB2100289,0x8C300005,0x7A440288,0x1DFC0BE8, -0x1DFC0BE8,0x840003E4,0x760003C9,0x66000BE8,0xFCB805F6,0xFCC80933,0xCC0BEB,0xFE9801DA,0xFE800006,0xD0800005,0xC0840022,0xA87C0001,0xFC9C0581,0xFE800171,0xB66C0289,0x8C300005,0x1B40BE8,0xF4028A,0xF4028A,0xF4028A,0xF4028A,0xD4D40001,0xD4D40001,0xD4D40001,0x9ED00001,0x9ED00001,0x8CD00001,0x16C0288,0x16C0288,0x16C0288,0xAC900001,0xAC900001, -0x8CB00000,0x3BFC0288,0x3BFC0288,0x8C3C0000,0x7A000288,0x16C0288,0x16C0288,0x16C0288,0xAC900001,0xAC900001,0x8CB00000,0x3BFC0288,0x3BFC0288,0x8C3C0000,0x7A000288,0x3BFC0288,0x3BFC0288,0x8C3C0000,0x7A000288,0x7A000288,0xFED80120,0xFEEC016D,0xF4028A,0xFEB40071,0xF0900001,0xCA8C0000,0xB8980000,0xA87C0000,0xFCC00152,0xFE980055,0x7FC0288,0x8C3C0000, -0x7FC0288,0x1440372,0xFF1C0011,0xCB1C0001,0xB7180001,0x1E00372,0xDCCC0001,0xB6F00000,0x75FC0372,0xB66C0000,0xA0000374,0x1E00372,0xDCCC0001,0xB6F00000,0x75FC0372,0xB66C0000,0xA0000374,0x75FC0372,0xB66C0000,0xA0000374,0xA0000374,0x1E00372,0xDCCC0001,0xB6F00000,0x75FC0372,0xB66C0000,0xA0000374,0x75FC0372,0xB66C0000,0xA0000374,0xA0000374,0x75FC0372, -0xB66C0000,0xA0000374,0xA0000374,0xA0000374,0xFF140262,0x5580372,0xF73C028A,0xFCE800F2,0xFC8C0019,0xD6740000,0xB6A00000,0xB6300000,0xFD0C0242,0xFEC000C1,0xBCAC0000,0xA0000374,0x51FC0372,0xA80372,0xA80372,0xA80372,0xA80372,0xA80372,0xA80372,0xA80372,0xA80372,0xA80372,0xA80372,0xBC800001,0xBC800001,0xBC800001,0xBC800001,0xBC800001, -0xBC800001,0x7C800001,0x7C800001,0x7C800001,0x687C0001,0xF80372,0xF80372,0xF80372,0xF80372,0xF80372,0xF80372,0x8E300001,0x8E300001,0x8E300001,0x68540000,0x1FC0372,0x1FC0372,0x1FC0372,0x66000028,0x52000374,0xF69C01E1,0xA80372,0xA80372,0xFC8C0082,0xFE800005,0xD8800001,0xD8800001,0xA4800001,0xF884016D,0xFC740062,0x90740001,0x8E300001, -0x1640372,}; -static const uint32_t g_etc1_to_bc7_m6_table144[] = { -0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0x1B00000, -0x1B00000,0x1B00000,0x1B00000,0x46000001,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x4980000,0x4980000,0x4980000,0xD40000,0x1300000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0xE40000,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000, -0x3500000,0x2DFC0000,0x2DFC0000,0x2DFC0000,0x70000001,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000,0x3500000,0x2DFC0000,0x2DFC0000,0x2DFC0000,0x70000001,0x2DFC0000,0x2DFC0000,0x2DFC0000,0x70000001,0x70000001,0xF40000,0xE40000,0xE40000,0x1080000,0x31C0000,0x3340000,0x3340000,0x1800000,0x1080000,0x31C0000,0x1E40000,0x2DFC0000, -0x1E40000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x3BC0000,0x3BC0000,0x3BC0000,0x63FC0000,0x63FC0000,0x94000001,0x3BC0000,0x3BC0000,0x3BC0000,0x63FC0000,0x63FC0000,0x94000001,0x63FC0000,0x63FC0000,0x94000001,0x94000001,0x3BC0000,0x3BC0000,0x3BC0000,0x63FC0000,0x63FC0000,0x94000001,0x63FC0000,0x63FC0000,0x94000001,0x94000001,0x63FC0000, -0x63FC0000,0x94000001,0x94000001,0x94000001,0x15C0000,0x1400000,0x12C0000,0x1980000,0x1F80000,0x3BFC0000,0x4FFC0000,0x71F80000,0x1780000,0x3BC0000,0x3BFC0000,0x94000001,0x3BFC0000,0x1800000,0x41FC0000,0xA1FC0000,0xBE000001,0x41FC0000,0xA1FC0000,0xBE000001,0xA1FC0000,0xBE000001,0xBE000001,0x41FC0000,0xA1FC0000,0xBE000001,0xA1FC0000,0xBE000001, -0xBE000001,0xA1FC0000,0xBE000001,0xBE000001,0xBE000001,0x41FC0000,0xA1FC0000,0xBE000001,0xA1FC0000,0xBE000001,0xBE000001,0xA1FC0000,0xBE000001,0xBE000001,0xBE000001,0xA1FC0000,0xBE000001,0xBE000001,0xBE000001,0xBE000001,0x3E00000,0x5980000,0x5980000,0x65FC0000,0x95FC0000,0xB1F40000,0xBE000001,0xBE000001,0x13FC0000,0x79FC0000,0xBDCC0000,0xBE000001, -0x89FC0000,0x1081EA8,0xFEE80F9C,0xE6E00BE8,0xBEE00BE9,0xFED00A18,0xF4BC03A1,0xC4C80531,0xD0B806E6,0xBCB803AE,0xAAB806E6,0xFEB00DDB,0xFA90028A,0xC6A80411,0xD88C038E,0xBE90000E,0xAC9803AE,0xBE900BE8,0xB2800413,0xA4880533,0x96900BEB,0x1881EA8,0xFE580C0F,0xBE940BE9,0xF22806E6,0xC05C038E,0xAA6C06E6,0xE0000BEF,0xBE18028A,0xA82403A3,0x96440BEB,0x49F81EA8, -0xB2000D51,0xA000096E,0x90000EEC,0x82001EAC,0xFEE41196,0xF9001A08,0xFB041B18,0xFEC006A1,0xFE9C0075,0xDC900006,0xC2980046,0xBA880046,0xFED410EA,0xFEA40584,0xC47C029D,0xA82403A3,0x19FC1EA8,0x1300BE8,0xFF1003C9,0xDD080288,0xBF080289,0xFEF403E4,0xE2E40005,0xC0EC0042,0xC6E40372,0xB8DC00A5,0xAAE40372,0x1C40BE8,0xF6980289,0xC0CC0289,0xE0780372,0xBE98000A, -0xAAAC0372,0x67F80BE8,0xBE140289,0xAA000373,0x96000BEB,0x1C40BE8,0xF6980289,0xC0CC0289,0xE0780372,0xBE98000A,0xAAAC0372,0x67F80BE8,0xBE140289,0xAA000373,0x96000BEB,0x67F80BE8,0xBE140289,0xAA000373,0x96000BEB,0x96000BEB,0xFD10074D,0xFD2809F6,0xFF2C09D9,0xFEDC0312,0xFEA4003E,0xDA940006,0xC0AC0001,0xBC740022,0xFEF80752,0xFEC00281,0xC86C0288,0xAA000373, -0x3FF80BE8,0xE00BE8,0xE00BE8,0xE00BE8,0xE00BE8,0xFEBC037D,0xFEBC037D,0xFEBC037D,0xB0BC0374,0xB0BC0374,0x94B80375,0xFE900288,0xFE900288,0xFE900288,0xBC940009,0xBC940009,0x989C00A6,0xA0900288,0xA0900288,0x908C0041,0x8290028A,0x14C0BE8,0x14C0BE8,0x14C0BE8,0xCA500372,0xCA500372,0x94840373,0xBA240289,0xBA240289,0x96440002,0x8454028A,0x2BF80BE8, -0x2BF80BE8,0x900003AB,0x7E000363,0x6E000BEB,0xFECC062C,0xF4D80975,0xE00BE8,0xFEAC023D,0xFE940011,0xDC900005,0xCA980021,0xB0900002,0xFEB405B5,0xFE9801C6,0xBE80028A,0x96440002,0x1DC0BE8,0x1080288,0x1080288,0x1080288,0x1080288,0xDEE40000,0xDEE40000,0xDEE40000,0xA6E40000,0xA6E40000,0x94E40001,0x1880288,0x1880288,0x1880288,0xB4A40001,0xB4A40001, -0x96C00001,0x49F80288,0x49F80288,0x94540001,0x8200028A,0x1880288,0x1880288,0x1880288,0xB4A40001,0xB4A40001,0x96C00001,0x49F80288,0x49F80288,0x94540001,0x8200028A,0x49F80288,0x49F80288,0x94540001,0x8200028A,0x8200028A,0xFAEC0139,0xF9000188,0x1080288,0xFEC40088,0xFE9C0000,0xD49C0000,0xC0AC0000,0xB28C0000,0xFED0015A,0xFEB00071,0x19FC0288,0x94540001, -0x19FC0288,0x1540374,0xFF300028,0xD52C0000,0xBF2C0001,0x1FC0372,0xE4E00001,0xBF040001,0x83F80372,0xBE840001,0xAA000372,0x1FC0372,0xE4E00001,0xBF040001,0x83F80372,0xBE840001,0xAA000372,0x83F80372,0xBE840001,0xAA000372,0xAA000372,0x1FC0372,0xE4E00001,0xBF040001,0x83F80372,0xBE840001,0xAA000372,0x83F80372,0xBE840001,0xAA000372,0xAA000372,0x83F80372, -0xBE840001,0xAA000372,0xAA000372,0xAA000372,0xFF340265,0x16C0372,0xFF4C0290,0xFF000120,0xFEA8002D,0xDE880000,0xBEB40001,0xBE480001,0xFF180262,0xFEE400F4,0xC6BC0000,0xAA000372,0x61FC0372,0xB80374,0xB80374,0xB80374,0xB80374,0xB80374,0xB80374,0xB80374,0xB80374,0xB80374,0xB80374,0xC6900000,0xC6900000,0xC6900000,0xC6900000,0xC6900000, -0xC6900000,0x86900000,0x86900000,0x86900000,0x70900001,0x1140372,0x1140372,0x1140372,0x1140372,0x1140372,0x1140372,0x96440001,0x96440001,0x96440001,0x70680001,0xFF80372,0xFF80372,0xFF80372,0x70000011,0x5C000372,0xFEAC01E5,0xB80374,0xB80374,0xFEA00091,0xFE94000D,0xE4900000,0xE4900000,0xAE900000,0xFE900179,0xFC880080,0x9C840000,0x96440001, -0x18C0372,}; -static const uint32_t g_etc1_to_bc7_m6_table145[] = { -0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0x1E40000, -0x1E40000,0x1E40000,0x1E40000,0x4E000001,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xA00000,0xCA80000,0xCA80000,0xCA80000,0xEC0000,0x1540000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0x3680000,0x3680000,0x3680000,0x3680000,0x3680000, -0x3680000,0x39FC0000,0x39FC0000,0x39FC0000,0x78000001,0x3680000,0x3680000,0x3680000,0x3680000,0x3680000,0x3680000,0x39FC0000,0x39FC0000,0x39FC0000,0x78000001,0x39FC0000,0x39FC0000,0x39FC0000,0x78000001,0x78000001,0x1040000,0xF40000,0xF40000,0x11C0000,0x3300000,0x14C0000,0x14C0000,0x1980000,0x11C0000,0x3300000,0x5FC0000,0x39FC0000, -0x5FC0000,0x13C0000,0x13C0000,0x13C0000,0x13C0000,0x1D40000,0x1D40000,0x1D40000,0x6FFC0000,0x6FFC0000,0x9C000001,0x1D40000,0x1D40000,0x1D40000,0x6FFC0000,0x6FFC0000,0x9C000001,0x6FFC0000,0x6FFC0000,0x9C000001,0x9C000001,0x1D40000,0x1D40000,0x1D40000,0x6FFC0000,0x6FFC0000,0x9C000001,0x6FFC0000,0x6FFC0000,0x9C000001,0x9C000001,0x6FFC0000, -0x6FFC0000,0x9C000001,0x9C000001,0x9C000001,0x1700000,0x1500000,0x13C0000,0x3AC0000,0x11FC0000,0x49FC0000,0x5DF80000,0x7BFC0000,0x18C0000,0x1D40000,0x49FC0000,0x9C000001,0x49FC0000,0x1900000,0x59FC0000,0xADFC0000,0xC6000001,0x59FC0000,0xADFC0000,0xC6000001,0xADFC0000,0xC6000001,0xC6000001,0x59FC0000,0xADFC0000,0xC6000001,0xADFC0000,0xC6000001, -0xC6000001,0xADFC0000,0xC6000001,0xC6000001,0xC6000001,0x59FC0000,0xADFC0000,0xC6000001,0xADFC0000,0xC6000001,0xC6000001,0xADFC0000,0xC6000001,0xC6000001,0xC6000001,0xADFC0000,0xC6000001,0xC6000001,0xC6000001,0xC6000001,0x3F40000,0xDA80000,0xDA80000,0x79FC0000,0xA3FC0000,0xBBF40000,0xC6000001,0xC6000001,0x31FC0000,0x8BFC0000,0xC5DC0000,0xC6000001, -0x99FC0000,0x1181EA8,0xFF00102C,0xEEF00BE8,0xC6F00BE9,0xFEE80AE8,0xFCCC03A1,0xCCD80531,0xD8C806E6,0xC4C803AE,0xB2C806E6,0xFEC40E58,0xFEA40292,0xCEB80411,0xE09C038E,0xC6A0000E,0xB4A803AE,0xC6A00BE8,0xBA900413,0xAC980533,0x9EA00BEB,0x1A01EA8,0xFE700C4F,0xC6A40BE9,0xFA3806E6,0xC86C038E,0xB27C06E6,0xEA0C0BE9,0xC628028A,0xB03403A3,0x9E540BEB,0x55F81EA8, -0xBC000CF4,0xAA0008EC,0x9A000E73,0x8A001EAC,0xFEF81255,0xFF0C1A18,0xFF0C1B58,0xFED4078E,0xFEB000ED,0xE4A00006,0xCAA80046,0xC2980046,0xFEDC11C3,0xFEC00675,0xCC8C029D,0xB03403A3,0x27FC1EA8,0x1400BE8,0xFF240414,0xE5180288,0xC7180289,0xFF040432,0xEAF40005,0xC8FC0042,0xCEF40372,0xC0EC00A5,0xB2F40372,0x1DC0BE8,0xFEA80289,0xC8DC0289,0xE8880372,0xC6A8000A, -0xB2BC0372,0x73F80BE8,0xC6240289,0xB2080372,0x9E000BEB,0x1DC0BE8,0xFEA80289,0xC8DC0289,0xE8880372,0xC6A8000A,0xB2BC0372,0x73F80BE8,0xC6240289,0xB2080372,0x9E000BEB,0x73F80BE8,0xC6240289,0xB2080372,0x9E000BEB,0x9E000BEB,0xFF20078E,0xF5380A38,0xF73C0A20,0xFEEC0395,0xFEBC007E,0xE2A40006,0xC8BC0001,0xC4840022,0xFF0807A3,0xFEDC0305,0xD07C0288,0xB2080372, -0x4DFC0BE8,0xF00BE8,0xF00BE8,0xF00BE8,0xF00BE8,0xFED00394,0xFED00394,0xFED00394,0xB8CC0374,0xB8CC0374,0x9CC80375,0xFEA4028E,0xFEA4028E,0xFEA4028E,0xC4A40009,0xC4A40009,0xA0AC00A6,0xA8A00288,0xA8A00288,0x989C0041,0x8AA0028A,0x1640BE8,0x1640BE8,0x1640BE8,0xD2600372,0xD2600372,0x9C940373,0xC2340289,0xC2340289,0x9E540002,0x8C64028A,0x37F80BE8, -0x37F80BE8,0x9A00038D,0x8800032A,0x76000BEB,0xFED80672,0xFCE80975,0xF00BE8,0xFEC4028C,0xFEAC0031,0xE4A00005,0xD2A80021,0xB8A00002,0xFEBC061A,0xFCAC0225,0xC690028A,0x9E540002,0x1FC0BE8,0x1180288,0x1180288,0x1180288,0x1180288,0xE6F40000,0xE6F40000,0xE6F40000,0xAEF40000,0xAEF40000,0x9CF40001,0x1A00288,0x1A00288,0x1A00288,0xBCB40001,0xBCB40001, -0x9ED00001,0x55F80288,0x55F80288,0x9C640001,0x8A00028A,0x1A00288,0x1A00288,0x1A00288,0xBCB40001,0xBCB40001,0x9ED00001,0x55F80288,0x55F80288,0x9C640001,0x8A00028A,0x55F80288,0x55F80288,0x9C640001,0x8A00028A,0x8A00028A,0xF7000152,0xFF0C0190,0x1180288,0xFEDC0091,0xFEB00004,0xDCAC0000,0xC8BC0000,0xBA9C0000,0xF8EC016D,0xFEC40080,0x27FC0288,0x9C640001, -0x27FC0288,0x1640374,0xFF440041,0xDD3C0000,0xC73C0001,0x19FC0372,0xECF00001,0xC7140001,0x8FF80372,0xC6940001,0xB2000372,0x19FC0372,0xECF00001,0xC7140001,0x8FF80372,0xC6940001,0xB2000372,0x8FF80372,0xC6940001,0xB2000372,0xB2000372,0x19FC0372,0xECF00001,0xC7140001,0x8FF80372,0xC6940001,0xB2000372,0x8FF80372,0xC6940001,0xB2000372,0xB2000372,0x8FF80372, -0xC6940001,0xB2000372,0xB2000372,0xB2000372,0xFB480288,0x77C0372,0xF96002AD,0xFF14013D,0xFECC0055,0xE6980000,0xC6C40001,0xC6580001,0xF1400288,0xFD040120,0xCECC0000,0xB2000372,0x71FC0372,0xC80374,0xC80374,0xC80374,0xC80374,0xC80374,0xC80374,0xC80374,0xC80374,0xC80374,0xC80374,0xCEA00000,0xCEA00000,0xCEA00000,0xCEA00000,0xCEA00000, -0xCEA00000,0x8EA00000,0x8EA00000,0x8EA00000,0x78A00001,0x12C0372,0x12C0372,0x12C0372,0x12C0372,0x12C0372,0x12C0372,0x9E540001,0x9E540001,0x9E540001,0x78780001,0x1BF80372,0x1BF80372,0x1BF80372,0x78000002,0x64000372,0xF8C00200,0xC80374,0xC80374,0xFEAC00B4,0xFEA80019,0xECA00000,0xECA00000,0xB6A00000,0xFEA0019A,0xFC9C0091,0xA4940000,0x9E540001, -0x1AC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table146[] = { -0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x7FC0000, -0x7FC0000,0x7FC0000,0x7FC0000,0x56000001,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xBC0000,0xBC0000,0xBC0000,0x1040000,0x1740000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x1040000,0x3800000,0x3800000,0x3800000,0x3800000,0x3800000, -0x3800000,0x45FC0000,0x45FC0000,0x45FC0000,0x80000001,0x3800000,0x3800000,0x3800000,0x3800000,0x3800000,0x3800000,0x45FC0000,0x45FC0000,0x45FC0000,0x80000001,0x45FC0000,0x45FC0000,0x45FC0000,0x80000001,0x80000001,0x3140000,0x1040000,0x1040000,0x52C0000,0x3440000,0x1600000,0x1600000,0x1B40000,0x52C0000,0x3440000,0x15FC0000,0x45FC0000, -0x15FC0000,0x14C0000,0x14C0000,0x14C0000,0x14C0000,0x1EC0000,0x1EC0000,0x1EC0000,0x7BFC0000,0x7BFC0000,0xA4000001,0x1EC0000,0x1EC0000,0x1EC0000,0x7BFC0000,0x7BFC0000,0xA4000001,0x7BFC0000,0x7BFC0000,0xA4000001,0xA4000001,0x1EC0000,0x1EC0000,0x1EC0000,0x7BFC0000,0x7BFC0000,0xA4000001,0x7BFC0000,0x7BFC0000,0xA4000001,0xA4000001,0x7BFC0000, -0x7BFC0000,0xA4000001,0xA4000001,0xA4000001,0x5800000,0x9600000,0x14C0000,0x1C40000,0x25FC0000,0x59FC0000,0x69FC0000,0x87F40000,0x1A00000,0x1EC0000,0x59FC0000,0xA4000001,0x59FC0000,0x1A00000,0x71FC0000,0xB9FC0000,0xCE000001,0x71FC0000,0xB9FC0000,0xCE000001,0xB9FC0000,0xCE000001,0xCE000001,0x71FC0000,0xB9FC0000,0xCE000001,0xB9FC0000,0xCE000001, -0xCE000001,0xB9FC0000,0xCE000001,0xCE000001,0xCE000001,0x71FC0000,0xB9FC0000,0xCE000001,0xB9FC0000,0xCE000001,0xCE000001,0xB9FC0000,0xCE000001,0xCE000001,0xCE000001,0xB9FC0000,0xCE000001,0xCE000001,0xCE000001,0xCE000001,0x1BFC0000,0x1BC0000,0x1BC0000,0x8DFC0000,0xB1FC0000,0xC5F40000,0xCE000001,0xCE000001,0x4FFC0000,0x9BFC0000,0xCDEC0000,0xCE000001, -0xA7FC0000,0x1281EA8,0xFF0C10D8,0xF7000BE8,0xCF000BE9,0xFEF40BA8,0xFEDC03B5,0xD4E80531,0xE0D806E6,0xCCD803AE,0xBAD806E6,0xFEDC0EF8,0xFEB802CE,0xD6C80411,0xE8AC038E,0xCEB0000E,0xBCB803AE,0xCEB00BE8,0xC2A00413,0xB4A80533,0xA6B00BEB,0x1B81EA8,0xFE880CAF,0xCEB40BE9,0xFE4C06F6,0xD07C038E,0xBA8C06E6,0xF21C0BE9,0xCE38028A,0xB84403A3,0xA6640BEB,0x61F81EA8, -0xC4000C91,0xB2000856,0xA0000E0B,0x92001EAC,0xFF0C1316,0xF9201A96,0xFB241B85,0xFEEC08C9,0xFEC40195,0xECB00006,0xD2B80046,0xCAA80046,0xFEF81262,0xFED0075B,0xD49C029D,0xB84403A3,0x37FC1EA8,0x1500BE8,0xFF340489,0xED280288,0xCF280289,0xFF180478,0xF3040005,0xD10C0042,0xD7040372,0xC8FC00A5,0xBB040372,0x1F40BE8,0xFEC002A1,0xD0EC0289,0xF0980372,0xCEB8000A, -0xBACC0372,0x7FF80BE8,0xCE340289,0xBA180372,0xA6000BEB,0x1F40BE8,0xFEC002A1,0xD0EC0289,0xF0980372,0xCEB8000A,0xBACC0372,0x7FF80BE8,0xCE340289,0xBA180372,0xA6000BEB,0x7FF80BE8,0xCE340289,0xBA180372,0xA6000BEB,0xA6000BEB,0xFF3407C9,0xFD480A38,0xFF4C0A20,0xFF08040E,0xFED000DE,0xEAB40006,0xD0CC0001,0xCC940022,0xFF180806,0xFEF00396,0xD88C0288,0xBA180372, -0x5DF80BE8,0x1000BE8,0x1000BE8,0x1000BE8,0x1000BE8,0xFEDC03B4,0xFEDC03B4,0xFEDC03B4,0xC0DC0374,0xC0DC0374,0xA4D80375,0xFCB802AA,0xFCB802AA,0xFCB802AA,0xCCB40009,0xCCB40009,0xA8BC00A6,0xB0B00288,0xB0B00288,0xA0AC0041,0x92B0028A,0x17C0BE8,0x17C0BE8,0x17C0BE8,0xDA700372,0xDA700372,0xA4A40373,0xCA440289,0xCA440289,0xA6640002,0x9474028A,0x43F80BE8, -0x43F80BE8,0xA400037B,0x900002EB,0x7E000BEB,0xFEE806AD,0xF6FC09B4,0x1000BE8,0xFED402DD,0xFEC0005D,0xECB00005,0xDAB80021,0xC0B00002,0xFED0064B,0xFEBC0272,0xCEA0028A,0xA6640002,0x11FC0BE8,0x1280288,0x1280288,0x1280288,0x1280288,0xEF040000,0xEF040000,0xEF040000,0xB7040000,0xB7040000,0xA5040001,0x1B80288,0x1B80288,0x1B80288,0xC4C40001,0xC4C40001, -0xA6E00001,0x61F80288,0x61F80288,0xA4740001,0x9200028A,0x1B80288,0x1B80288,0x1B80288,0xC4C40001,0xC4C40001,0xA6E00001,0x61F80288,0x61F80288,0xA4740001,0x9200028A,0x61F80288,0x61F80288,0xA4740001,0x9200028A,0x9200028A,0xFF100152,0xF92001A5,0x1280288,0xFCF400B5,0xFEC8000A,0xE4BC0000,0xD0CC0000,0xC2AC0000,0xFEF80171,0xFEDC0091,0x37FC0288,0xA4740001, -0x37FC0288,0x1740374,0xFF5C0071,0xE54C0000,0xCF4C0001,0x31FC0372,0xF5000001,0xCF240001,0x9BF80372,0xCEA40001,0xBA000372,0x31FC0372,0xF5000001,0xCF240001,0x9BF80372,0xCEA40001,0xBA000372,0x9BF80372,0xCEA40001,0xBA000372,0xBA000372,0x31FC0372,0xF5000001,0xCF240001,0x9BF80372,0xCEA40001,0xBA000372,0x9BF80372,0xCEA40001,0xBA000372,0xBA000372,0x9BF80372, -0xCEA40001,0xBA000372,0xBA000372,0xBA000372,0xFF5002A8,0xF8C0372,0xFF6C02B9,0xFF30016D,0xFEE80075,0xEEA80000,0xCED40001,0xCE680001,0xF9500288,0xFF140145,0xD6DC0000,0xBA000372,0x7FFC0372,0xD80374,0xD80374,0xD80374,0xD80374,0xD80374,0xD80374,0xD80374,0xD80374,0xD80374,0xD80374,0xD6B00000,0xD6B00000,0xD6B00000,0xD6B00000,0xD6B00000, -0xD6B00000,0x96B00000,0x96B00000,0x96B00000,0x80B00001,0x1440372,0x1440372,0x1440372,0x1440372,0x1440372,0x1440372,0xA6640001,0xA6640001,0xA6640001,0x80880001,0x27F80372,0x27F80372,0x27F80372,0x80080001,0x6C000372,0xFECC0208,0xD80374,0xD80374,0xFAC400C8,0xFEB40028,0xF4B00000,0xF4B00000,0xBEB00000,0xFCB801A5,0xFAAC00A4,0xACA40000,0xA6640001, -0x1D00372,}; -static const uint32_t g_etc1_to_bc7_m6_table147[] = { -0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x13FC0000, -0x13FC0000,0x13FC0000,0x13FC0000,0x5E000001,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xC00000,0xCC0000,0xCC0000,0xCC0000,0x11C0000,0x1980000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000, -0x3980000,0x51FC0000,0x51FC0000,0x51FC0000,0x88000001,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000,0x51FC0000,0x51FC0000,0x51FC0000,0x88000001,0x51FC0000,0x51FC0000,0x51FC0000,0x88000001,0x88000001,0xB240000,0x1140000,0x1140000,0x1400000,0x3580000,0x1780000,0x1780000,0x1D00000,0x1400000,0x3580000,0x23FC0000,0x51FC0000, -0x23FC0000,0x15C0000,0x15C0000,0x15C0000,0x15C0000,0x9FC0000,0x9FC0000,0x9FC0000,0x87FC0000,0x87FC0000,0xAC000001,0x9FC0000,0x9FC0000,0x9FC0000,0x87FC0000,0x87FC0000,0xAC000001,0x87FC0000,0x87FC0000,0xAC000001,0xAC000001,0x9FC0000,0x9FC0000,0x9FC0000,0x87FC0000,0x87FC0000,0xAC000001,0x87FC0000,0x87FC0000,0xAC000001,0xAC000001,0x87FC0000, -0x87FC0000,0xAC000001,0xAC000001,0xAC000001,0x1940000,0x1740000,0x15C0000,0x1D80000,0x39FC0000,0x67FC0000,0x77FC0000,0x91FC0000,0x1B40000,0x9FC0000,0x67FC0000,0xAC000001,0x67FC0000,0x1B00000,0x89FC0000,0xC5FC0000,0xD6000001,0x89FC0000,0xC5FC0000,0xD6000001,0xC5FC0000,0xD6000001,0xD6000001,0x89FC0000,0xC5FC0000,0xD6000001,0xC5FC0000,0xD6000001, -0xD6000001,0xC5FC0000,0xD6000001,0xD6000001,0xD6000001,0x89FC0000,0xC5FC0000,0xD6000001,0xC5FC0000,0xD6000001,0xD6000001,0xC5FC0000,0xD6000001,0xD6000001,0xD6000001,0xC5FC0000,0xD6000001,0xD6000001,0xD6000001,0xD6000001,0x41FC0000,0x1CC0000,0x1CC0000,0xA1FC0000,0xBFF80000,0xCFF40000,0xD6000001,0xD6000001,0x6FFC0000,0xADFC0000,0xD5FC0000,0xD6000001, -0xB7FC0000,0x1381EA8,0xFF241198,0xFF100BE8,0xD7100BE9,0xFF0C0C98,0xFEF403FD,0xDCF80531,0xE8E806E6,0xD4E803AE,0xC2E806E6,0xFEF40FB8,0xFECC0345,0xDED80411,0xF0BC038E,0xD6C0000E,0xC4C803AE,0xD6C00BE8,0xCAB00413,0xBCB80533,0xAEC00BEB,0x1D01EA8,0xFEA00D2F,0xD6C40BE9,0xFE700736,0xD88C038E,0xC29C06E6,0xFA2C0BE9,0xD648028A,0xC05403A3,0xAE740BEB,0x6DF81EA8, -0xD0000C49,0xBC0007F4,0xAC000D93,0x9A001EAC,0xFF2013CE,0xFF2C1AAE,0xFF2C1BCD,0xFF0009E0,0xFED8026D,0xF4C00006,0xDAC80046,0xD2B80046,0xFF101346,0xFEE008B4,0xDCAC029D,0xC05403A3,0x45FC1EA8,0x1600BE8,0xFF4404E1,0xF5380288,0xD7380289,0xFF3004D8,0xFB140005,0xD91C0042,0xDF140372,0xD10C00A5,0xC3140372,0xFFC0BE8,0xFED802D9,0xD8FC0289,0xF8A80372,0xD6C8000A, -0xC2DC0372,0x8BF80BE8,0xD6440289,0xC2280372,0xAE000BEB,0xFFC0BE8,0xFED802D9,0xD8FC0289,0xF8A80372,0xD6C8000A,0xC2DC0372,0x8BF80BE8,0xD6440289,0xC2280372,0xAE000BEB,0x8BF80BE8,0xD6440289,0xC2280372,0xAE000BEB,0xAE000BEB,0xFB480849,0xF5580A7E,0xF75C0A69,0xFF180496,0xFEE80149,0xF2C40006,0xD8DC0001,0xD4A40022,0xFF34084E,0xFF040403,0xE09C0288,0xC2280372, -0x6BFC0BE8,0x1100BE8,0x1100BE8,0x1100BE8,0x1100BE8,0xFEF403E4,0xFEF403E4,0xFEF403E4,0xC8EC0374,0xC8EC0374,0xACE80375,0xFEC802CA,0xFEC802CA,0xFEC802CA,0xD4C40009,0xD4C40009,0xB0CC00A6,0xB8C00288,0xB8C00288,0xA8BC0041,0x9AC0028A,0x1940BE8,0x1940BE8,0x1940BE8,0xE2800372,0xE2800372,0xACB40373,0xD2540289,0xD2540289,0xAE740002,0x9C84028A,0x4FF80BE8, -0x4FF80BE8,0xAC040373,0x9A0002CA,0x86000BEB,0xFD000714,0xFF0C09B4,0x1100BE8,0xFEE8034D,0xFED40099,0xF4C00005,0xE2C80021,0xC8C00002,0xFEE406AA,0xFED002BA,0xD6B0028A,0xAE740002,0x1FFC0BE8,0x1380288,0x1380288,0x1380288,0x1380288,0xF7140000,0xF7140000,0xF7140000,0xBF140000,0xBF140000,0xAD140001,0x1D00288,0x1D00288,0x1D00288,0xCCD40001,0xCCD40001, -0xAEF00001,0x6DF80288,0x6DF80288,0xAC840001,0x9A00028A,0x1D00288,0x1D00288,0x1D00288,0xCCD40001,0xCCD40001,0xAEF00001,0x6DF80288,0x6DF80288,0xAC840001,0x9A00028A,0x6DF80288,0x6DF80288,0xAC840001,0x9A00028A,0x9A00028A,0xFF20016D,0xFF2C01B1,0x1380288,0xFD0400CA,0xFEE00019,0xECCC0000,0xD8DC0000,0xCABC0000,0xFD100188,0xFEE800B4,0x45FC0288,0xAC840001, -0x45FC0288,0x1840374,0xFF68009D,0xED5C0000,0xD75C0001,0x49FC0372,0xFD100001,0xD7340001,0xA7F80372,0xD6B40001,0xC2000372,0x49FC0372,0xFD100001,0xD7340001,0xA7F80372,0xD6B40001,0xC2000372,0xA7F80372,0xD6B40001,0xC2000372,0xC2000372,0x49FC0372,0xFD100001,0xD7340001,0xA7F80372,0xD6B40001,0xC2000372,0xA7F80372,0xD6B40001,0xC2000372,0xC2000372,0xA7F80372, -0xD6B40001,0xC2000372,0xC2000372,0xC2000372,0xFB7002AD,0x1A00372,0xF98002D4,0xFF40019A,0xFF0C00B5,0xF6B80000,0xD6E40001,0xD6780001,0xFF5C0290,0xFF2C0185,0xDEEC0000,0xC2000372,0x8FFC0372,0xE80374,0xE80374,0xE80374,0xE80374,0xE80374,0xE80374,0xE80374,0xE80374,0xE80374,0xE80374,0xDEC00000,0xDEC00000,0xDEC00000,0xDEC00000,0xDEC00000, -0xDEC00000,0x9EC00000,0x9EC00000,0x9EC00000,0x88C00001,0x15C0372,0x15C0372,0x15C0372,0x15C0372,0x15C0372,0x15C0372,0xAE740001,0xAE740001,0xAE740001,0x88980001,0x33F80372,0x33F80372,0x33F80372,0x88180001,0x74000372,0xF8E00221,0xE80374,0xE80374,0xFCD400DD,0xFCCC003D,0xFCC00000,0xFCC00000,0xC6C00000,0xF8CC01C2,0xFEBC00B9,0xB4B40000,0xAE740001, -0x1F00372,}; -static const uint32_t g_etc1_to_bc7_m6_table148[] = { -0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x21F80000, -0x21F80000,0x21F80000,0x21F80000,0x68000000,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xD00001,0xE00000,0xE00000,0xE00000,0x1380000,0x1BC0000,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1240001,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000, -0x1B40000,0x5FF80000,0x5FF80000,0x5FF80000,0x92000000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x1B40000,0x5FF80000,0x5FF80000,0x5FF80000,0x92000000,0x5FF80000,0x5FF80000,0x5FF80000,0x92000000,0x92000000,0x5380000,0x1240001,0x1240001,0x3540000,0x1700000,0x1900000,0x1900000,0x3EC0000,0x3540000,0x1700000,0x35FC0000,0x5FF80000, -0x35FC0000,0x16C0001,0x16C0001,0x16C0001,0x16C0001,0x25FC0000,0x25FC0000,0x25FC0000,0x95F80000,0x95F80000,0xB6000000,0x25FC0000,0x25FC0000,0x25FC0000,0x95F80000,0x95F80000,0xB6000000,0x95F80000,0x95F80000,0xB6000000,0xB6000000,0x25FC0000,0x25FC0000,0x25FC0000,0x95F80000,0x95F80000,0xB6000000,0x95F80000,0x95F80000,0xB6000000,0xB6000000,0x95F80000, -0x95F80000,0xB6000000,0xB6000000,0xB6000000,0x3A80000,0xB840000,0x16C0001,0x3F00000,0x4FFC0000,0x79FC0000,0x87F80000,0x9DFC0000,0x1CC0000,0x25FC0000,0x79FC0000,0xB6000000,0x79FC0000,0x1C00001,0xA5FC0000,0xD3FC0000,0xE0000000,0xA5FC0000,0xD3FC0000,0xE0000000,0xD3FC0000,0xE0000000,0xE0000000,0xA5FC0000,0xD3FC0000,0xE0000000,0xD3FC0000,0xE0000000, -0xE0000000,0xD3FC0000,0xE0000000,0xE0000000,0xE0000000,0xA5FC0000,0xD3FC0000,0xE0000000,0xD3FC0000,0xE0000000,0xE0000000,0xD3FC0000,0xE0000000,0xE0000000,0xE0000000,0xD3FC0000,0xE0000000,0xE0000000,0xE0000000,0xE0000000,0x6DFC0000,0x1E00000,0x1E00000,0xB7FC0000,0xCDFC0000,0xD9FC0000,0xE0000000,0xE0000000,0x8FFC0000,0xC1FC0000,0xDFF00000,0xE0000000, -0xC7FC0000,0x1481EAC,0xFF30126C,0xFF200C0F,0xE1200BEB,0xFF180DB4,0xFF080497,0xE5080533,0xF0FC06E6,0xDCF803AE,0xCAFC06E6,0xFF00109C,0xFEE4040B,0xE8EC0413,0xFAD0038E,0xE0D0000E,0xCCD803AE,0xDED40BE9,0xD4C40411,0xC4C80531,0xB8D00BE9,0x3E81EA8,0xFEC00DE7,0xE0D40BE8,0xFE8807BE,0xE2A0038E,0xCAB006E6,0xFE4C0BFB,0xE05C028A,0xCA6803A1,0xB8840BE8,0x79FC1EA8, -0xDC000C13,0xC400078E,0xB2000D35,0xA4001EA8,0xFF3414AF,0xFB441B24,0xFD481BF4,0xFF140B27,0xFEF00392,0xFED40006,0xE4DC0046,0xDCCC0046,0xFF181429,0xFF0009F4,0xE6BC029D,0xCA6803A1,0x57FC1EA8,0x1700BEB,0xFF5C056A,0xFF48028A,0xE148028A,0xFF440545,0xFF28001A,0xE3300041,0xE7280373,0xDB2000A6,0xCB240375,0x2BFC0BE8,0xFEFC032A,0xE1100288,0xFEC00375,0xDED80009, -0xCAF00374,0x97FC0BE8,0xE0540288,0xCA400374,0xB8000BE8,0x2BFC0BE8,0xFEFC032A,0xE1100288,0xFEC00375,0xDED80009,0xCAF00374,0x97FC0BE8,0xE0540288,0xCA400374,0xB8000BE8,0x97FC0BE8,0xE0540288,0xCA400374,0xB8000BE8,0xB8000BE8,0xFF580895,0xFF6C0A7D,0xFF6C0A6E,0xFF30053A,0xFF1001EE,0xFCD80005,0xE0EC0002,0xDCBC0021,0xFF4408B8,0xFF2404CA,0xE8B00289,0xCA400374, -0x7DF80BE8,0x1200BEB,0x1200BEB,0x1200BEB,0x1200BEB,0xFF040422,0xFF040422,0xFF040422,0xD2FC0372,0xD2FC0372,0xB6FC0372,0xFEE00301,0xFEE00301,0xFEE00301,0xDCD4000A,0xDCD4000A,0xBAE000A5,0xC2D00289,0xC2D00289,0xB2D00042,0xA4D00289,0x1B00BE8,0x1B00BE8,0x1B00BE8,0xEC900372,0xEC900372,0xB6C40372,0xDC640289,0xDC640289,0xB6840005,0xA4980288,0x5DF40BE8, -0x5DF40BE8,0xB6100372,0xA20002A9,0x90000BE8,0xFF100755,0xF71C09F6,0x1200BEB,0xFEF803C2,0xFEE400F1,0xFAD40005,0xEAD80022,0xD2D00001,0xFEF806F2,0xFEE80352,0xE0C00289,0xB6840005,0x31FC0BE8,0x148028A,0x148028A,0x148028A,0x148028A,0xFF280001,0xFF280001,0xFF280001,0xC9240001,0xC9240001,0xB7240001,0x3E80288,0x3E80288,0x3E80288,0xD6E40001,0xD6E40001, -0xB7040000,0x79FC0288,0x79FC0288,0xB6900000,0xA4000288,0x3E80288,0x3E80288,0x3E80288,0xD6E40001,0xD6E40001,0xB7040000,0x79FC0288,0x79FC0288,0xB6900000,0xA4000288,0x79FC0288,0x79FC0288,0xB6900000,0xA4000288,0xA4000288,0xFB34018A,0xFB4401C2,0x148028A,0xFF1800E1,0xFCFC0032,0xF4E00000,0xE2EC0000,0xD2D00000,0xF92801A5,0xFF0400D0,0x57FC0288,0xB6900000, -0x57FC0288,0x1980372,0xFF8000E1,0xF5700001,0xE16C0001,0x65FC0372,0xFF2C0011,0xE1440000,0xB3FC0372,0xE0C00000,0xCA000374,0x65FC0372,0xFF2C0011,0xE1440000,0xB3FC0372,0xE0C00000,0xCA000374,0xB3FC0372,0xE0C00000,0xCA000374,0xCA000374,0x65FC0372,0xFF2C0011,0xE1440000,0xB3FC0372,0xE0C00000,0xCA000374,0xB3FC0372,0xE0C00000,0xCA000374,0xCA000374,0xB3FC0372, -0xE0C00000,0xCA000374,0xCA000374,0xCA000374,0xFB8402D2,0x1B40372,0xFF8C02F2,0xFD6801E1,0xFF2800E9,0xFECC0001,0xE0F40000,0xE0840000,0xFF7002C5,0xFF4C01B1,0xE7000000,0xCA000374,0x9FFC0372,0xFC0372,0xFC0372,0xFC0372,0xFC0372,0xFC0372,0xFC0372,0xFC0372,0xFC0372,0xFC0372,0xFC0372,0xE6D40001,0xE6D40001,0xE6D40001,0xE6D40001,0xE6D40001, -0xE6D40001,0xA6D40001,0xA6D40001,0xA6D40001,0x92D00001,0x3740372,0x3740372,0x3740372,0x3740372,0x3740372,0x3740372,0xB8840001,0xB8840001,0xB8840001,0x92A80000,0x3FFC0372,0x3FFC0372,0x3FFC0372,0x92240000,0x7C000374,0xFEEC0239,0xFC0372,0xFC0372,0xFEE800F2,0xFCE00055,0xFED40002,0xFED40002,0xCED40001,0xFED801D4,0xFED000D0,0xBAC80001,0xB8840001, -0xDFC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table149[] = { -0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x1500000,0x2DF80000, -0x2DF80000,0x2DF80000,0x2DF80000,0x70000000,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xE00001,0xF00000,0xF00000,0xF00000,0x1500000,0x1E00000,0x1340001,0x1340001,0x1340001,0x1340001,0x1340001,0x1340001,0x1340001,0x1340001,0x1340001,0x1340001,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000, -0x1CC0000,0x6BF80000,0x6BF80000,0x6BF80000,0x9A000000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x6BF80000,0x6BF80000,0x6BF80000,0x9A000000,0x6BF80000,0x6BF80000,0x6BF80000,0x9A000000,0x9A000000,0xD480000,0x1340001,0x1340001,0x1680000,0x1840000,0x3A40000,0x3A40000,0xBFC0000,0x1680000,0x1840000,0x43FC0000,0x6BF80000, -0x43FC0000,0x17C0001,0x17C0001,0x17C0001,0x17C0001,0x3DFC0000,0x3DFC0000,0x3DFC0000,0xA1F80000,0xA1F80000,0xBE000000,0x3DFC0000,0x3DFC0000,0x3DFC0000,0xA1F80000,0xA1F80000,0xBE000000,0xA1F80000,0xA1F80000,0xBE000000,0xBE000000,0x3DFC0000,0x3DFC0000,0x3DFC0000,0xA1F80000,0xA1F80000,0xBE000000,0xA1F80000,0xA1F80000,0xBE000000,0xBE000000,0xA1F80000, -0xA1F80000,0xBE000000,0xBE000000,0xBE000000,0x1BC0000,0x1980000,0x17C0001,0xFFC0000,0x63FC0000,0x87FC0000,0x95F80000,0xA9F40000,0x1E00000,0x3DFC0000,0x87FC0000,0xBE000000,0x87FC0000,0x1D00001,0xBDFC0000,0xDFF80000,0xE8000000,0xBDFC0000,0xDFF80000,0xE8000000,0xDFF80000,0xE8000000,0xE8000000,0xBDFC0000,0xDFF80000,0xE8000000,0xDFF80000,0xE8000000, -0xE8000000,0xDFF80000,0xE8000000,0xE8000000,0xE8000000,0xBDFC0000,0xDFF80000,0xE8000000,0xDFF80000,0xE8000000,0xE8000000,0xDFF80000,0xE8000000,0xE8000000,0xE8000000,0xDFF80000,0xE8000000,0xE8000000,0xE8000000,0xE8000000,0x95FC0000,0x1F00000,0x1F00000,0xCBFC0000,0xDBFC0000,0xE5F00000,0xE8000000,0xE8000000,0xAFFC0000,0xD1FC0000,0xE9C40000,0xE8000000, -0xD7FC0000,0x1581EAC,0xFF44131B,0xFF340C64,0xE9300BEB,0xFF300EB4,0xFF180563,0xED180533,0xF90C06E6,0xE50803AE,0xD30C06E6,0xFF181164,0xFEFC04F3,0xF0FC0413,0xFEE4039E,0xE8E0000E,0xD4E803AE,0xE6E40BE9,0xDCD40411,0xCCD80531,0xC0E00BE9,0x7FC1EA8,0xFED80EA7,0xE8E40BE8,0xFEAC0866,0xEAB0038E,0xD2C006E6,0xFE640C43,0xE86C028A,0xD27803A1,0xC0940BE8,0x85FC1EA8, -0xE6000BF8,0xD000073E,0xBE000CD5,0xAC001EA8,0xFF3C1574,0xFF4C1B64,0xF5581C63,0xFF240C9E,0xFF0404CE,0xFEE80032,0xECEC0046,0xE4DC0046,0xFF3414DA,0xFF100B2E,0xEECC029D,0xD27803A1,0x65FC1EA8,0x1800BEB,0xFF6805FA,0xFF5802A3,0xE958028A,0xFF5C05CD,0xFF40006A,0xEB400041,0xEF380373,0xE33000A6,0xD3340375,0x43FC0BE8,0xFF14039A,0xE9200288,0xFEE4039D,0xE6E80009, -0xD3000374,0xA3FC0BE8,0xE8640288,0xD2500374,0xC0000BE8,0x43FC0BE8,0xFF14039A,0xE9200288,0xFEE4039D,0xE6E80009,0xD3000374,0xA3FC0BE8,0xE8640288,0xD2500374,0xC0000BE8,0xA3FC0BE8,0xE8640288,0xD2500374,0xC0000BE8,0xC0000BE8,0xFF640905,0xF77C0AC3,0xF9800AB3,0xFF4405E1,0xFF24028E,0xFEF0001B,0xE8FC0002,0xE4CC0021,0xFF5C08DE,0xFF400550,0xF0C00289,0xD2500374, -0x8BFC0BE8,0x1300BEB,0x1300BEB,0x1300BEB,0x1300BEB,0xFF180463,0xFF180463,0xFF180463,0xDB0C0372,0xDB0C0372,0xBF0C0372,0xFEF4033B,0xFEF4033B,0xFEF4033B,0xE4E4000A,0xE4E4000A,0xC2F000A5,0xCAE00289,0xCAE00289,0xBAE00042,0xACE00289,0x3C40BE8,0x3C40BE8,0x3C40BE8,0xF4A00372,0xF4A00372,0xBED40372,0xE4740289,0xE4740289,0xBE940005,0xACA80288,0x67FC0BE8, -0x67FC0BE8,0xBE200372,0xAC000291,0x98000BE8,0xFF200792,0xFF2C09F6,0x1300BEB,0xFF100426,0xFEF8014D,0xFEE8000E,0xF2E80022,0xDAE00001,0xFF100749,0xFEF403B6,0xE8D00289,0xBE940005,0x3FFC0BE8,0x158028A,0x158028A,0x158028A,0x158028A,0xFD38000A,0xFD38000A,0xFD38000A,0xD1340001,0xD1340001,0xBF340001,0x7FC0288,0x7FC0288,0x7FC0288,0xDEF40001,0xDEF40001, -0xBF140000,0x85FC0288,0x85FC0288,0xBEA00000,0xAC000288,0x7FC0288,0x7FC0288,0x7FC0288,0xDEF40001,0xDEF40001,0xBF140000,0x85FC0288,0x85FC0288,0xBEA00000,0xAC000288,0x85FC0288,0x85FC0288,0xBEA00000,0xAC000288,0xAC000288,0xF74801A5,0xF35401E1,0x158028A,0xFD300109,0xFD140048,0xFCF00000,0xEAFC0000,0xDAE00000,0xFF3401A9,0xFF1800E9,0x65FC0288,0xBEA00000, -0x65FC0288,0x1A80372,0xFF8C0131,0xFD800001,0xE97C0001,0x7DFC0372,0xFF4C0041,0xE9540000,0xBFFC0372,0xE8D00000,0xD2000374,0x7DFC0372,0xFF4C0041,0xE9540000,0xBFFC0372,0xE8D00000,0xD2000374,0xBFFC0372,0xE8D00000,0xD2000374,0xD2000374,0x7DFC0372,0xFF4C0041,0xE9540000,0xBFFC0372,0xE8D00000,0xD2000374,0xBFFC0372,0xE8D00000,0xD2000374,0xD2000374,0xBFFC0372, -0xE8D00000,0xD2000374,0xD2000374,0xD2000374,0xFF9402D4,0x1C40372,0xFBA402F9,0xFF740212,0xFF500139,0xFEF0001A,0xE9040000,0xE8940000,0xFB9002D2,0xFF6401F9,0xEF100000,0xD2000374,0xAFFC0372,0x10C0372,0x10C0372,0x10C0372,0x10C0372,0x10C0372,0x10C0372,0x10C0372,0x10C0372,0x10C0372,0x10C0372,0xEEE40001,0xEEE40001,0xEEE40001,0xEEE40001,0xEEE40001, -0xEEE40001,0xAEE40001,0xAEE40001,0xAEE40001,0x9AE00001,0x38C0372,0x38C0372,0x38C0372,0x38C0372,0x38C0372,0x38C0372,0xC0940001,0xC0940001,0xC0940001,0x9AB80000,0x4BFC0372,0x4BFC0372,0x4BFC0372,0x9A340000,0x84000374,0xFB040242,0x10C0372,0x10C0372,0xFEF40115,0xFEEC006A,0xFEE8000D,0xFEE8000D,0xD6E40001,0xFCF001E1,0xFEE400F4,0xC2D80001,0xC0940001, -0x1DF80372,}; -static const uint32_t g_etc1_to_bc7_m6_table150[] = { -0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x39F80000, -0x39F80000,0x39F80000,0x39F80000,0x78000000,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0xF00001,0x9000000,0x9000000,0x9000000,0x1680000,0x3FC0000,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1440001,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000, -0x1E40000,0x77F80000,0x77F80000,0x77F80000,0xA2000000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x1E40000,0x77F80000,0x77F80000,0x77F80000,0xA2000000,0x77F80000,0x77F80000,0x77F80000,0xA2000000,0xA2000000,0x15C0000,0x1440001,0x1440001,0x17C0000,0x1980000,0x1BC0000,0x1BC0000,0x1DFC0000,0x17C0000,0x1980000,0x53FC0000,0x77F80000, -0x53FC0000,0x18C0001,0x18C0001,0x18C0001,0x18C0001,0x55FC0000,0x55FC0000,0x55FC0000,0xADF80000,0xADF80000,0xC6000000,0x55FC0000,0x55FC0000,0x55FC0000,0xADF80000,0xADF80000,0xC6000000,0xADF80000,0xADF80000,0xC6000000,0xC6000000,0x55FC0000,0x55FC0000,0x55FC0000,0xADF80000,0xADF80000,0xC6000000,0xADF80000,0xADF80000,0xC6000000,0xC6000000,0xADF80000, -0xADF80000,0xC6000000,0xC6000000,0xC6000000,0x7CC0000,0x1A80000,0x18C0001,0x2FFC0000,0x77FC0000,0x97FC0000,0xA1FC0000,0xB3FC0000,0x1F40000,0x55FC0000,0x97FC0000,0xC6000000,0x97FC0000,0x1E00001,0xD5FC0000,0xEBF80000,0xF0000000,0xD5FC0000,0xEBF80000,0xF0000000,0xEBF80000,0xF0000000,0xF0000000,0xD5FC0000,0xEBF80000,0xF0000000,0xEBF80000,0xF0000000, -0xF0000000,0xEBF80000,0xF0000000,0xF0000000,0xF0000000,0xD5FC0000,0xEBF80000,0xF0000000,0xEBF80000,0xF0000000,0xF0000000,0xEBF80000,0xF0000000,0xF0000000,0xF0000000,0xEBF80000,0xF0000000,0xF0000000,0xF0000000,0xF0000000,0xBDFC0000,0x27FC0000,0x27FC0000,0xDDFC0000,0xE9F80000,0xEFF00000,0xF0000000,0xF0000000,0xCDFC0000,0xE3FC0000,0xF1D40000,0xF0000000, -0xE5FC0000,0x1681EAC,0xFF50140B,0xFF480D03,0xF1400BEB,0xFF440FB6,0xFF2C0663,0xF5280533,0xFF1C06EC,0xED1803AE,0xDB1C06E6,0xFF30124C,0xFF14061B,0xF90C0413,0xFEFC0406,0xF0F0000E,0xDCF803AE,0xEEF40BE9,0xE4E40411,0xD4E80531,0xC8F00BE9,0x1FFC1EA8,0xFEF00F87,0xF0F40BE8,0xFEC0096E,0xF2C0038E,0xDAD006E6,0xFE880CBB,0xF07C028A,0xDA8803A1,0xC8A40BE8,0x91FC1EA8, -0xF0000BEB,0xD8000716,0xC4000C91,0xB4001EA8,0xFF50163F,0xFB641BB6,0xFD681C63,0xFF340DE2,0xFF1C0636,0xFEFC00C2,0xF4FC0046,0xECEC0046,0xFF4415D9,0xFF240CCA,0xF6DC029D,0xDA8803A1,0x75FC1EA8,0x1900BEB,0xFF80068A,0xFF7002EB,0xF168028A,0xFF680655,0xFF5400F6,0xF3500041,0xF7480373,0xEB4000A6,0xDB440375,0x5BFC0BE8,0xFF2C042A,0xF1300288,0xFEFC03ED,0xEEF80009, -0xDB100374,0xAFFC0BE8,0xF0740288,0xDA600374,0xC8000BE8,0x5BFC0BE8,0xFF2C042A,0xF1300288,0xFEFC03ED,0xEEF80009,0xDB100374,0xAFFC0BE8,0xF0740288,0xDA600374,0xC8000BE8,0xAFFC0BE8,0xF0740288,0xDA600374,0xC8000BE8,0xC8000BE8,0xFF78093E,0xFF8C0AC3,0xFF8C0ABB,0xFF5C0671,0xFF380356,0xFF10007D,0xF10C0002,0xECDC0021,0xFF780956,0xFF540602,0xF8D00289,0xDA600374, -0x9BFC0BE8,0x1400BEB,0x1400BEB,0x1400BEB,0x1400BEB,0xFF2404B3,0xFF2404B3,0xFF2404B3,0xE31C0372,0xE31C0372,0xC71C0372,0xFF0C0393,0xFF0C0393,0xFF0C0393,0xECF4000A,0xECF4000A,0xCB0000A5,0xD2F00289,0xD2F00289,0xC2F00042,0xB4F00289,0x3DC0BE8,0x3DC0BE8,0x3DC0BE8,0xFCB00372,0xFCB00372,0xC6E40372,0xEC840289,0xEC840289,0xC6A40005,0xB4B80288,0x73FC0BE8, -0x73FC0BE8,0xC6300372,0xB4000289,0xA0000BE8,0xFF2C07F1,0xF73C0A3B,0x1400BEB,0xFF1804AA,0xFF0C01B9,0xFEF80032,0xFAF80022,0xE2F00001,0xFF20078D,0xFF10042D,0xF0E00289,0xC6A40005,0x4FFC0BE8,0x168028A,0x168028A,0x168028A,0x168028A,0xFD4C0019,0xFD4C0019,0xFD4C0019,0xD9440001,0xD9440001,0xC7440001,0x1FFC0288,0x1FFC0288,0x1FFC0288,0xE7040001,0xE7040001, -0xC7240000,0x91FC0288,0x91FC0288,0xC6B00000,0xB4000288,0x1FFC0288,0x1FFC0288,0x1FFC0288,0xE7040001,0xE7040001,0xC7240000,0x91FC0288,0x91FC0288,0xC6B00000,0xB4000288,0x91FC0288,0x91FC0288,0xC6B00000,0xB4000288,0xB4000288,0xFF5801A5,0xFB6401E1,0x168028A,0xFF440120,0xFD280064,0xFF080008,0xF30C0000,0xE2F00000,0xFD4C01C2,0xFF30010D,0x75FC0288,0xC6B00000, -0x75FC0288,0x1B80372,0xFFA40179,0xFF900011,0xF18C0001,0x95FC0372,0xFF640089,0xF1640000,0xCBFC0372,0xF0E00000,0xDA000374,0x95FC0372,0xFF640089,0xF1640000,0xCBFC0372,0xF0E00000,0xDA000374,0xCBFC0372,0xF0E00000,0xDA000374,0xDA000374,0x95FC0372,0xFF640089,0xF1640000,0xCBFC0372,0xF0E00000,0xDA000374,0xCBFC0372,0xF0E00000,0xDA000374,0xDA000374,0xCBFC0372, -0xF0E00000,0xDA000374,0xDA000374,0xDA000374,0xFBAC02F9,0x3D40372,0xFFAC0321,0xFF940242,0xFF680179,0xFF180055,0xF1140000,0xF0A40000,0xFF9802F2,0xFF880221,0xF7200000,0xDA000374,0xBFF80372,0x11C0372,0x11C0372,0x11C0372,0x11C0372,0x11C0372,0x11C0372,0x11C0372,0x11C0372,0x11C0372,0x11C0372,0xF6F40001,0xF6F40001,0xF6F40001,0xF6F40001,0xF6F40001, -0xF6F40001,0xB6F40001,0xB6F40001,0xB6F40001,0xA2F00001,0x3A40372,0x3A40372,0x3A40372,0x3A40372,0x3A40372,0x3A40372,0xC8A40001,0xC8A40001,0xC8A40001,0xA2C80000,0x57FC0372,0x57FC0372,0x57FC0372,0xA2440000,0x8C000374,0xFF0C0262,0x11C0372,0x11C0372,0xFF040132,0xFF000082,0xFEF80019,0xFEF80019,0xDEF40001,0xF9040200,0xFEF80109,0xCAE80001,0xC8A40001, -0x2BFC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table151[] = { -0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x45F80000, -0x45F80000,0x45F80000,0x45F80000,0x80000000,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1000001,0x1140000,0x1140000,0x1140000,0x1800000,0x13FC0000,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000, -0x1FC0000,0x83F80000,0x83F80000,0x83F80000,0xAA000000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x83F80000,0x83F80000,0x83F80000,0xAA000000,0x83F80000,0x83F80000,0x83F80000,0xAA000000,0xAA000000,0x16C0000,0x1540001,0x1540001,0x38C0000,0x1AC0000,0x1D00000,0x1D00000,0x31FC0000,0x38C0000,0x1AC0000,0x61FC0000,0x83F80000, -0x61FC0000,0x19C0001,0x19C0001,0x19C0001,0x19C0001,0x6FFC0000,0x6FFC0000,0x6FFC0000,0xB9F80000,0xB9F80000,0xCE000000,0x6FFC0000,0x6FFC0000,0x6FFC0000,0xB9F80000,0xB9F80000,0xCE000000,0xB9F80000,0xB9F80000,0xCE000000,0xCE000000,0x6FFC0000,0x6FFC0000,0x6FFC0000,0xB9F80000,0xB9F80000,0xCE000000,0xB9F80000,0xB9F80000,0xCE000000,0xCE000000,0xB9F80000, -0xB9F80000,0xCE000000,0xCE000000,0xCE000000,0x3E00000,0x5B80000,0x19C0001,0x4DFC0000,0x8BFC0000,0xA5FC0000,0xAFFC0000,0xBFF40000,0x15FC0000,0x6FFC0000,0xA5FC0000,0xCE000000,0xA5FC0000,0x1F00001,0xEFFC0000,0xF7F80000,0xF8000000,0xEFFC0000,0xF7F80000,0xF8000000,0xF7F80000,0xF8000000,0xF8000000,0xEFFC0000,0xF7F80000,0xF8000000,0xF7F80000,0xF8000000, -0xF8000000,0xF7F80000,0xF8000000,0xF8000000,0xF8000000,0xEFFC0000,0xF7F80000,0xF8000000,0xF7F80000,0xF8000000,0xF8000000,0xF7F80000,0xF8000000,0xF8000000,0xF8000000,0xF7F80000,0xF8000000,0xF8000000,0xF8000000,0xF8000000,0xE3FC0000,0xA7FC0000,0xA7FC0000,0xF1FC0000,0xF5FC0000,0xF9F00000,0xF8000000,0xF8000000,0xEBFC0000,0xF3FC0000,0xF9E40000,0xF8000000, -0xF5FC0000,0x1781EAC,0xFF6814E3,0xFF580DBC,0xF9500BEB,0xFF5010EE,0xFF4007AB,0xFD380533,0xFF2C0746,0xF52803AE,0xE32C06E6,0xFF441329,0xFF280782,0xFF180417,0xFF0804CE,0xF900000E,0xE50803AE,0xF7040BE9,0xECF40411,0xDCF80531,0xD1000BE9,0x37FC1EA8,0xFF14107B,0xF9040BE8,0xFEE40A7E,0xFAD0038E,0xE2E006E6,0xFEAC0D6B,0xF88C028A,0xE29803A1,0xD0B40BE8,0x9DFC1EA8, -0xF8080BE8,0xE00006F8,0xCE000C58,0xBC001EA8,0xFF641706,0xFF6C1C06,0xF5781CD4,0xFF440F42,0xFF2807C3,0xFF1001BE,0xFD0C0046,0xF4FC0046,0xFF54168D,0xFF400E29,0xFEEC029D,0xE29803A1,0x83FC1EA8,0x1A00BEB,0xFF8C072A,0xFF7C036B,0xF978028A,0xFF8006ED,0xFF6401AA,0xFB600041,0xFF580373,0xF35000A6,0xE3540375,0x75FC0BE8,0xFF4C04E1,0xF9400288,0xFF200465,0xF7080009, -0xE3200374,0xBBFC0BE8,0xF8840288,0xE2700374,0xD0000BE8,0x75FC0BE8,0xFF4C04E1,0xF9400288,0xFF200465,0xF7080009,0xE3200374,0xBBFC0BE8,0xF8840288,0xE2700374,0xD0000BE8,0xBBFC0BE8,0xF8840288,0xE2700374,0xD0000BE8,0xD0000BE8,0xFF940998,0xF79C0B0D,0xF9A00AFE,0xFF780729,0xFF50043A,0xFF2C0113,0xF91C0002,0xF4EC0021,0xFF88099B,0xFF6806C8,0xFEE40291,0xE2700374, -0xA9FC0BE8,0x1500BEB,0x1500BEB,0x1500BEB,0x1500BEB,0xFF3C0513,0xFF3C0513,0xFF3C0513,0xEB2C0372,0xEB2C0372,0xCF2C0372,0xFF1803F3,0xFF1803F3,0xFF1803F3,0xF504000A,0xF504000A,0xD31000A5,0xDB000289,0xDB000289,0xCB000042,0xBD000289,0x3F40BE8,0x3F40BE8,0x3F40BE8,0xFEC8037D,0xFEC8037D,0xCEF40372,0xF4940289,0xF4940289,0xCEB40005,0xBCC80288,0x7FFC0BE8, -0x7FFC0BE8,0xCE400372,0xBC0C0288,0xA8000BE8,0xFF3C0843,0xFF4C0A3B,0x1500BEB,0xFF34051E,0xFF200235,0xFF10007A,0xFF080031,0xEB000001,0xFF3407CA,0xFF2404BA,0xF8F00289,0xCEB40005,0x5FF80BE8,0x178028A,0x178028A,0x178028A,0x178028A,0xFF5C002D,0xFF5C002D,0xFF5C002D,0xE1540001,0xE1540001,0xCF540001,0x37FC0288,0x37FC0288,0x37FC0288,0xEF140001,0xEF140001, -0xCF340000,0x9DFC0288,0x9DFC0288,0xCEC00000,0xBC000288,0x37FC0288,0x37FC0288,0x37FC0288,0xEF140001,0xEF140001,0xCF340000,0x9DFC0288,0x9DFC0288,0xCEC00000,0xBC000288,0x9DFC0288,0x9DFC0288,0xCEC00000,0xBC000288,0xBC000288,0xFB6C01C2,0xF3740202,0x178028A,0xFD580139,0xFF400080,0xFF200014,0xFB1C0000,0xEB000000,0xF56401E1,0xFF480122,0x83FC0288,0xCEC00000, -0x83FC0288,0x1C80372,0xFFB001E1,0xFFA00052,0xF99C0001,0xAFFC0372,0xFF8800E9,0xF9740000,0xD7FC0372,0xF8F00000,0xE2000374,0xAFFC0372,0xFF8800E9,0xF9740000,0xD7FC0372,0xF8F00000,0xE2000374,0xD7FC0372,0xF8F00000,0xE2000374,0xE2000374,0xAFFC0372,0xFF8800E9,0xF9740000,0xD7FC0372,0xF8F00000,0xE2000374,0xD7FC0372,0xF8F00000,0xE2000374,0xE2000374,0xD7FC0372, -0xF8F00000,0xE2000374,0xE2000374,0xE2000374,0xFFB40311,0xBE40372,0xFBC40322,0xFFAC0288,0xFF7C01E1,0xFF4800A9,0xF9240000,0xF8B40000,0xF1C00320,0xFFA00269,0xFF300000,0xE2000374,0xCDFC0372,0x12C0372,0x12C0372,0x12C0372,0x12C0372,0x12C0372,0x12C0372,0x12C0372,0x12C0372,0x12C0372,0x12C0372,0xFF040001,0xFF040001,0xFF040001,0xFF040001,0xFF040001, -0xFF040001,0xBF040001,0xBF040001,0xBF040001,0xAB000001,0x3BC0372,0x3BC0372,0x3BC0372,0x3BC0372,0x3BC0372,0x3BC0372,0xD0B40001,0xD0B40001,0xD0B40001,0xAAD80000,0x63FC0372,0x63FC0372,0x63FC0372,0xAA540000,0x94000374,0xFB240265,0x12C0372,0x12C0372,0xFD1C0152,0xFF1400A2,0xFF08002D,0xFF08002D,0xE7040001,0xFF100208,0xFB0C0139,0xD2F80001,0xD0B40001, -0x3BFC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table152[] = { -0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000,0x3980000,0x51FC0000, -0x51FC0000,0x51FC0000,0x51FC0000,0x88000001,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0xB240000,0xB240000,0xB240000,0x3980000,0x23FC0000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1680000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000, -0x1BFC0000,0x91F80000,0x91F80000,0x91F80000,0xB2000001,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x91F80000,0x91F80000,0x91F80000,0xB2000001,0x91F80000,0x91F80000,0x91F80000,0xB2000001,0xB2000001,0x1800000,0x1680000,0x1680000,0x7A00000,0x1C40000,0x3E80000,0x3E80000,0x47FC0000,0x7A00000,0x1C40000,0x73FC0000,0x91F80000, -0x73FC0000,0x1B00000,0x1B00000,0x1B00000,0x1B00000,0x89FC0000,0x89FC0000,0x89FC0000,0xC5FC0000,0xC5FC0000,0xD6000001,0x89FC0000,0x89FC0000,0x89FC0000,0xC5FC0000,0xC5FC0000,0xD6000001,0xC5FC0000,0xC5FC0000,0xD6000001,0xD6000001,0x89FC0000,0x89FC0000,0x89FC0000,0xC5FC0000,0xC5FC0000,0xD6000001,0xC5FC0000,0xC5FC0000,0xD6000001,0xD6000001,0xC5FC0000, -0xC5FC0000,0xD6000001,0xD6000001,0xD6000001,0x5F40000,0x1CC0000,0x1B00000,0x6FFC0000,0xA1FC0000,0xB7FC0000,0xBFF80000,0xCBF80000,0x41FC0000,0x89FC0000,0xB7FC0000,0xD6000001,0xB7FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x18C1D49,0xFF7414EC,0xFF6C0E58,0xFF640BE8,0xFF681121,0xFF5408B8,0xFF50057D,0xFF400775,0xFB3C0394,0xEB3C066D,0xFF5C130E,0xFF40088F,0xFF30046C,0xFF20053D,0xFF180009,0xEB1C033A,0xFF140AFE,0xF30C03CA,0xE5080492,0xD9140AFE,0x4FFC1D47,0xFF2C10D8,0xFF180BE8,0xFEFC0B01,0xFEE403A5,0xEAF4066B,0xFED80D6E,0xFEA00289,0xE8B00322,0xD8C80AFE,0xA9FC1D47, -0xFE280BE8,0xEA04066B,0xD6000B2E,0xC4001D47,0xFF7816B6,0xFD881AF5,0xFD881B85,0xFF5C0FB9,0xFF4808E2,0xFF2C02B1,0xFF240072,0xFB10002D,0xFF70165C,0xFF500E9A,0xFF0802CE,0xE8B00322,0x93FC1D47,0x1B00B01,0xFFA4072C,0xFF9403C9,0xFF8C0288,0xFF9806DD,0xFF7C0236,0xFF740055,0xFF700321,0xF7640084,0xEB6802F9,0x8DFC0AFE,0xFF64052B,0xFF540288,0xFF380471,0xFD200005, -0xEB3002FA,0xC7FC0AFE,0xFEA00288,0xEA8402F9,0xD8000AFE,0x8DFC0AFE,0xFF64052B,0xFF540288,0xFF380471,0xFD200005,0xEB3002FA,0xC7FC0AFE,0xFEA00288,0xEA8402F9,0xD8000AFE,0xC7FC0AFE,0xFEA00288,0xEA8402F9,0xD8000AFE,0xD8000AFE,0xFFA0091B,0xFFAC0A29,0xFFAC0A2C,0xFF94070A,0xFF680489,0xFF480194,0xFF340005,0xFCFC0011,0xFF98091E,0xFF8406A8,0xFF1002BB,0xEA8402F9, -0xB9FC0AFE,0x1640BE8,0x1640BE8,0x1640BE8,0x1640BE8,0xFF50057D,0xFF50057D,0xFF50057D,0xF3400374,0xF3400374,0xD73C0375,0xFF30046C,0xFF30046C,0xFF30046C,0xFF180009,0xFF180009,0xDB2000A6,0xE3140288,0xE3140288,0xD3100041,0xC514028A,0x15FC0BE8,0x15FC0BE8,0x15FC0BE8,0xFEE403A5,0xFEE403A5,0xD7080373,0xFCA80289,0xFCA80289,0xD8C80002,0xC6D8028A,0x8DFC0BE8, -0x8DFC0BE8,0xD6580373,0xC420028A,0xB0000BEB,0xFF58088D,0xF9600A7D,0x1640BE8,0xFF4405A2,0xFF3402D2,0xFF2400E9,0xFF240072,0xF3140002,0xFD4C0845,0xFF300545,0xFF08028E,0xD8C80002,0x6FFC0BE8,0x18C0288,0x18C0288,0x18C0288,0x18C0288,0xFF740055,0xFF740055,0xFF740055,0xE9680000,0xE9680000,0xD7680001,0x53FC0288,0x53FC0288,0x53FC0288,0xF7280001,0xF7280001, -0xD9440001,0xABF80288,0xABF80288,0xD6D80001,0xC400028A,0x53FC0288,0x53FC0288,0x53FC0288,0xF7280001,0xF7280001,0xD9440001,0xABF80288,0xABF80288,0xD6D80001,0xC400028A,0xABF80288,0xABF80288,0xD6D80001,0xC400028A,0xC400028A,0xF78001E1,0xFD880200,0x18C0288,0xFB70016D,0xFF5400AA,0xFF3C0034,0xFF340005,0xF5100000,0xFD7401E1,0xFF5C0145,0x95FC0288,0xD6D80001, -0x95FC0288,0x1D802F9,0xFFC401CD,0xFFB80089,0xFFB00000,0xC3FC02F9,0xFFA00112,0xFF880001,0xE1FC02F9,0xFF100000,0xEA0002F9,0xC3FC02F9,0xFFA00112,0xFF880001,0xE1FC02F9,0xFF100000,0xEA0002F9,0xE1FC02F9,0xFF100000,0xEA0002F9,0xEA0002F9,0xC3FC02F9,0xFFA00112,0xFF880001,0xE1FC02F9,0xFF100000,0xEA0002F9,0xE1FC02F9,0xFF100000,0xEA0002F9,0xEA0002F9,0xE1FC02F9, -0xFF100000,0xEA0002F9,0xEA0002F9,0xEA0002F9,0xFFD002AD,0x1F802F9,0xF3D402D4,0xFFC00244,0xFFA801CA,0xFF7000E8,0xFF3C0000,0xFED80000,0xF9D002AD,0xFDBC0244,0xFF580019,0xEA0002F9,0xDBFC02F9,0x13C0374,0x13C0374,0x13C0374,0x13C0374,0x13C0374,0x13C0374,0x13C0374,0x13C0374,0x13C0374,0x13C0374,0xFF180008,0xFF180008,0xFF180008,0xFF180008,0xFF180008, -0xFF180008,0xC9140000,0xC9140000,0xC9140000,0xB3140001,0x1D80372,0x1D80372,0x1D80372,0x1D80372,0x1D80372,0x1D80372,0xD8C80001,0xD8C80001,0xD8C80001,0xB2EC0001,0x71F80372,0x71F80372,0x71F80372,0xB26C0001,0x9E000372,0xF5380288,0x13C0374,0x13C0374,0xFF2C0171,0xFF2800C8,0xFF1C0050,0xFF1C0050,0xF1140000,0xFF200239,0xFF200152,0xDF080000,0xD8C80001, -0x4BFC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table153[] = { -0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x3B00000,0x5DFC0000, -0x5DFC0000,0x5DFC0000,0x5DFC0000,0x90000001,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1240000,0x1380000,0x1380000,0x1380000,0x3B00000,0x33FC0000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x1780000,0x35FC0000,0x35FC0000,0x35FC0000,0x35FC0000,0x35FC0000, -0x35FC0000,0x9DF40000,0x9DF40000,0x9DF40000,0xBA000001,0x35FC0000,0x35FC0000,0x35FC0000,0x35FC0000,0x35FC0000,0x35FC0000,0x9DF40000,0x9DF40000,0x9DF40000,0xBA000001,0x9DF40000,0x9DF40000,0x9DF40000,0xBA000001,0xBA000001,0x1900000,0x1780000,0x1780000,0x3B40000,0x1D80000,0x5FC0000,0x5FC0000,0x5BFC0000,0x3B40000,0x1D80000,0x81FC0000,0x9DF40000, -0x81FC0000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xD1FC0000,0xD1FC0000,0xDE000001,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xD1FC0000,0xD1FC0000,0xDE000001,0xD1FC0000,0xD1FC0000,0xDE000001,0xDE000001,0xA3FC0000,0xA3FC0000,0xA3FC0000,0xD1FC0000,0xD1FC0000,0xDE000001,0xD1FC0000,0xD1FC0000,0xDE000001,0xDE000001,0xD1FC0000, -0xD1FC0000,0xDE000001,0xDE000001,0xDE000001,0x27FC0000,0x7DC0000,0x1C00000,0x8DFC0000,0xB3FC0000,0xC5FC0000,0xCBFC0000,0xD5FC0000,0x69FC0000,0xA3FC0000,0xC5FC0000,0xDE000001,0xC5FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x19819DD,0xFF801338,0xFF7C0DD1,0xFF740BE8,0xFF740F69,0xFF64088E,0xFF5C05E5,0xFF58068D,0xFD4C0378,0xEF4C0555,0xFF6810D2,0xFF4C0853,0xFF4404DA,0xFF380465,0xFF2C0029,0xF12C0235,0xFF2808DD,0xF51C032E,0xEB1C0319,0xDF2408CA,0x63FC19DB,0xFF380FF4,0xFF300BE8,0xFF1409E9,0xFEFC03ED,0xEF080553,0xFEF00B8E,0xFEC0029E,0xECC401FA,0xDED808CA,0xB3F819DB, -0xFE580BE8,0xEE280553,0xDC0008D2,0xCA0019DB,0xFF801459,0xFF8C17D1,0xFF8C1891,0xFF700E65,0xFF50086A,0xFF3C02F4,0xFF3400D5,0xFD240006,0xFF7813DA,0xFF5C0D8F,0xFF1C02E2,0xECC401FA,0x9FF819DB,0x1BC08C9,0xFFB005F4,0xFFA00371,0xFF9C0288,0xFFA40569,0xFF9001F8,0xFF840088,0xFF7C0221,0xFB780034,0xEF7801E1,0x9BFC08C9,0xFF7C0473,0xFF6C0288,0xFF580362,0xFF380001, -0xEF4401E2,0xCFF808C9,0xFED40288,0xEEA801E1,0xDE0008CA,0x9BFC08C9,0xFF7C0473,0xFF6C0288,0xFF580362,0xFF380001,0xEF4401E2,0xCFF808C9,0xFED40288,0xEEA801E1,0xDE0008CA,0xCFF808C9,0xFED40288,0xEEA801E1,0xDE0008CA,0xDE0008CA,0xFFB4074A,0xF5B80845,0xF7BC0849,0xFF9805B9,0xFF7C03C5,0xFF5C0171,0xFF500019,0xFF180001,0xFFA4076A,0xFF90057E,0xFF3002AE,0xEEA801E1, -0xC1FC08C9,0x1740BE8,0x1740BE8,0x1740BE8,0x1740BE8,0xFF5C05E5,0xFF5C05E5,0xFF5C05E5,0xFB500374,0xFB500374,0xDF4C0375,0xFF4404DA,0xFF4404DA,0xFF4404DA,0xFF2C0029,0xFF2C0029,0xE33000A6,0xEB240288,0xEB240288,0xDB200041,0xCD24028A,0x2FFC0BE8,0x2FFC0BE8,0x2FFC0BE8,0xFEFC03ED,0xFEFC03ED,0xDF180373,0xFEC0029E,0xFEC0029E,0xE0D80002,0xCEE8028A,0x99FC0BE8, -0x99FC0BE8,0xDE680373,0xCC30028A,0xB8000BEB,0xFF6808CE,0xFF6C0A8D,0x1740BE8,0xFF540625,0xFF440371,0xFF3C0164,0xFF3400D5,0xFB240002,0xFF58089A,0xFF5005B6,0xFF1C02B1,0xE0D80002,0x7FF80BE8,0x19C0288,0x19C0288,0x19C0288,0x19C0288,0xFF840088,0xFF840088,0xFF840088,0xF1780000,0xF1780000,0xDF780001,0x6BFC0288,0x6BFC0288,0x6BFC0288,0xFF380001,0xFF380001, -0xE1540001,0xB7F80288,0xB7F80288,0xDEE80001,0xCC00028A,0x6BFC0288,0x6BFC0288,0x6BFC0288,0xFF380001,0xFF380001,0xE1540001,0xB7F80288,0xB7F80288,0xDEE80001,0xCC00028A,0xB7F80288,0xB7F80288,0xDEE80001,0xCC00028A,0xCC00028A,0xFF9001E1,0xF5980221,0x19C0288,0xFD840188,0xFF7000DD,0xFF580064,0xFF500019,0xFD200000,0xFD880200,0xFF740171,0xA3FC0288,0xDEE80001, -0xA3FC0288,0x1E001E1,0xFFD00121,0xFFC80061,0xFFC00000,0xCFFC01E1,0xFFB800AA,0xFFA00001,0xE7FC01E1,0xFF400000,0xEE0001E1,0xCFFC01E1,0xFFB800AA,0xFFA00001,0xE7FC01E1,0xFF400000,0xEE0001E1,0xE7FC01E1,0xFF400000,0xEE0001E1,0xEE0001E1,0xCFFC01E1,0xFFB800AA,0xFFA00001,0xE7FC01E1,0xFF400000,0xEE0001E1,0xE7FC01E1,0xFF400000,0xEE0001E1,0xEE0001E1,0xE7FC01E1, -0xFF400000,0xEE0001E1,0xEE0001E1,0xEE0001E1,0xFFD001BD,0x7FC01E1,0xF7DC01C4,0xFFC80179,0xFFBC0122,0xFF900092,0xFF640000,0xFF140000,0xFDD801A5,0xFFC00164,0xFF780010,0xEE0001E1,0xE1FC01E1,0x14C0374,0x14C0374,0x14C0374,0x14C0374,0x14C0374,0x14C0374,0x14C0374,0x14C0374,0x14C0374,0x14C0374,0xFF28001D,0xFF28001D,0xFF28001D,0xFF28001D,0xFF28001D, -0xFF28001D,0xD1240000,0xD1240000,0xD1240000,0xBB240001,0x1F00372,0x1F00372,0x1F00372,0x1F00372,0x1F00372,0x1F00372,0xE0D80001,0xE0D80001,0xE0D80001,0xBAFC0001,0x7DF80372,0x7DF80372,0x7DF80372,0xBA7C0001,0xA6000372,0xFD480288,0x14C0374,0x14C0374,0xFF3C0190,0xFF3400E9,0xFF340071,0xFF340071,0xF9240000,0xFD380244,0xFF34016D,0xE7180000,0xE0D80001, -0x5BFC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table154[] = { -0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0x69FC0000, -0x69FC0000,0x69FC0000,0x69FC0000,0x98000001,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1340000,0x1480000,0x1480000,0x1480000,0x1C80000,0x41FC0000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x4DFC0000, -0x4DFC0000,0xA7FC0000,0xA7FC0000,0xA7FC0000,0xC2000001,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0x4DFC0000,0xA7FC0000,0xA7FC0000,0xA7FC0000,0xC2000001,0xA7FC0000,0xA7FC0000,0xA7FC0000,0xC2000001,0xC2000001,0x9A00000,0x1880000,0x1880000,0x1C80000,0x1EC0000,0x23FC0000,0x23FC0000,0x6FFC0000,0x1C80000,0x1EC0000,0x91FC0000,0xA7FC0000, -0x91FC0000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0xBBFC0000,0xBBFC0000,0xBBFC0000,0xDDFC0000,0xDDFC0000,0xE6000001,0xBBFC0000,0xBBFC0000,0xBBFC0000,0xDDFC0000,0xDDFC0000,0xE6000001,0xDDFC0000,0xDDFC0000,0xE6000001,0xE6000001,0xBBFC0000,0xBBFC0000,0xBBFC0000,0xDDFC0000,0xDDFC0000,0xE6000001,0xDDFC0000,0xDDFC0000,0xE6000001,0xE6000001,0xDDFC0000, -0xDDFC0000,0xE6000001,0xE6000001,0xE6000001,0x5FFC0000,0xFEC0000,0x1D00000,0xABFC0000,0xC7FC0000,0xD5FC0000,0xD9FC0000,0xE1F40000,0x91FC0000,0xBBFC0000,0xD5FC0000,0xE6000001,0xD5FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1A416D1,0xFF981198,0xFF880D5D,0xFF840BE8,0xFF800E01,0xFF740886,0xFF740675,0xFF6405F9,0xFF60037D,0xF35C047D,0xFF740EFE,0xFF64082B,0xFF5C056A,0xFF4C03FE,0xFF40007D,0xF53C016E,0xFF38072E,0xF93002C2,0xEF3001F9,0xE33406EA,0x75FC16CF,0xFF580F0C,0xFF480BE8,0xFF2C0911,0xFF20044D,0xF31C047B,0xFF080A0E,0xFEE402DE,0xF2DC0112,0xE2EC06EA,0xBBFC16CF, -0xFE880BE8,0xF248047B,0xE20806EA,0xD00016CF,0xFF8C1241,0xF9A0153D,0xF9A015C4,0xFF800D27,0xFF68080D,0xFF54034B,0xFF4C015D,0xFF38000D,0xFF9011EA,0xFF740C8A,0xFF340318,0xF2DC0112,0xA9FC16CF,0x1C806E9,0xFFBC04EC,0xFFAC0331,0xFFAC0288,0xFFB0043D,0xFFA001C2,0xFF9800B9,0xFF940169,0xFD880008,0xF3880109,0xAFFC06E9,0xFF9403DB,0xFF840288,0xFF70028A,0xFF580019, -0xF358010A,0xD7FC06E9,0xFF040288,0xF2C80109,0xE20006EA,0xAFFC06E9,0xFF9403DB,0xFF840288,0xFF70028A,0xFF580019,0xF358010A,0xD7FC06E9,0xFF040288,0xF2C80109,0xE20006EA,0xD7FC06E9,0xFF040288,0xF2C80109,0xE20006EA,0xE20006EA,0xFFBC05CD,0xFBC4066D,0xFBC40681,0xFFB004A6,0xFF900329,0xFF7C0153,0xFF680041,0xFF380009,0xFFB405D6,0xFFA40482,0xFF5002A1,0xF2C80109, -0xCDFC06E9,0x1840BE8,0x1840BE8,0x1840BE8,0x1840BE8,0xFF740675,0xFF740675,0xFF740675,0xFF60037D,0xFF60037D,0xE75C0375,0xFF5C056A,0xFF5C056A,0xFF5C056A,0xFF40007D,0xFF40007D,0xEB4000A6,0xF3340288,0xF3340288,0xE3300041,0xD534028A,0x47FC0BE8,0x47FC0BE8,0x47FC0BE8,0xFF20044D,0xFF20044D,0xE7280373,0xFEE402DE,0xFEE402DE,0xE8E80002,0xD6F8028A,0xA5F80BE8, -0xA5F80BE8,0xE6780373,0xD440028A,0xC0000BEB,0xFF740934,0xF9800AC4,0x1840BE8,0xFF6806BD,0xFF580419,0xFF4C021D,0xFF4C015D,0xFF38000D,0xFD70090C,0xFF5C0656,0xFF3402F4,0xE8E80002,0x8DFC0BE8,0x1AC0288,0x1AC0288,0x1AC0288,0x1AC0288,0xFF9800B9,0xFF9800B9,0xFF9800B9,0xF9880000,0xF9880000,0xE7880001,0x83FC0288,0x83FC0288,0x83FC0288,0xFF580019,0xFF580019, -0xE9640001,0xC3F80288,0xC3F80288,0xE6F80001,0xD400028A,0x83FC0288,0x83FC0288,0x83FC0288,0xFF580019,0xFF580019,0xE9640001,0xC3F80288,0xC3F80288,0xE6F80001,0xD400028A,0xC3F80288,0xC3F80288,0xE6F80001,0xD400028A,0xD400028A,0xFFA00202,0xFDA80221,0x1AC0288,0xFF9801A5,0xFF84010D,0xFF740091,0xFF680041,0xFF380009,0xFF940212,0xFB9001A5,0xB3FC0288,0xE6F80001, -0xB3FC0288,0x1E80109,0xFFDC009D,0xFFD80034,0xFFD00000,0xDBFC0109,0xFFCC0064,0xFFB80001,0xEDFC0109,0xFF700000,0xF2000109,0xDBFC0109,0xFFCC0064,0xFFB80001,0xEDFC0109,0xFF700000,0xF2000109,0xEDFC0109,0xFF700000,0xF2000109,0xF2000109,0xDBFC0109,0xFFCC0064,0xFFB80001,0xEDFC0109,0xFF700000,0xF2000109,0xEDFC0109,0xFF700000,0xF2000109,0xF2000109,0xEDFC0109, -0xFF700000,0xF2000109,0xF2000109,0xF2000109,0xFBE400F2,0x47FC0109,0xFBE400F4,0xFFD800CA,0xFFD000A2,0xFFAC0050,0xFF8C0000,0xFF500000,0xFFDC00E1,0xFFD800C8,0xFF9C0009,0xF2000109,0xE9FC0109,0x15C0374,0x15C0374,0x15C0374,0x15C0374,0x15C0374,0x15C0374,0x15C0374,0x15C0374,0x15C0374,0x15C0374,0xFF3C0034,0xFF3C0034,0xFF3C0034,0xFF3C0034,0xFF3C0034, -0xFF3C0034,0xD9340000,0xD9340000,0xD9340000,0xC3340001,0xDFC0372,0xDFC0372,0xDFC0372,0xDFC0372,0xDFC0372,0xDFC0372,0xE8E80001,0xE8E80001,0xE8E80001,0xC30C0001,0x89F80372,0x89F80372,0x89F80372,0xC28C0001,0xAE000372,0xF55802AD,0x15C0374,0x15C0374,0xFF4C01B1,0xFD4C0120,0xFF440091,0xFF440091,0xFF340001,0xF94C0265,0xFB4801A5,0xEF280000,0xE8E80001, -0x69FC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table155[] = { -0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1E00000,0x1E00000,0x1E00000,0x1E00000,0x1E00000,0x1E00000,0x1E00000,0x1E00000,0x1E00000,0x1E00000,0x75FC0000, -0x75FC0000,0x75FC0000,0x75FC0000,0xA0000001,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x5580000,0x5580000,0x5580000,0x1E00000,0x51FC0000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x1980000,0x65FC0000,0x65FC0000,0x65FC0000,0x65FC0000,0x65FC0000, -0x65FC0000,0xB3FC0000,0xB3FC0000,0xB3FC0000,0xCA000001,0x65FC0000,0x65FC0000,0x65FC0000,0x65FC0000,0x65FC0000,0x65FC0000,0xB3FC0000,0xB3FC0000,0xB3FC0000,0xCA000001,0xB3FC0000,0xB3FC0000,0xB3FC0000,0xCA000001,0xCA000001,0x1B40000,0x1980000,0x1980000,0x5D80000,0x7FC0000,0x41FC0000,0x41FC0000,0x83FC0000,0x5D80000,0x7FC0000,0x9FFC0000,0xB3FC0000, -0x9FFC0000,0x1E00000,0x1E00000,0x1E00000,0x1E00000,0xD3FC0000,0xD3FC0000,0xD3FC0000,0xE9FC0000,0xE9FC0000,0xEE000001,0xD3FC0000,0xD3FC0000,0xD3FC0000,0xE9FC0000,0xE9FC0000,0xEE000001,0xE9FC0000,0xE9FC0000,0xEE000001,0xEE000001,0xD3FC0000,0xD3FC0000,0xD3FC0000,0xE9FC0000,0xE9FC0000,0xEE000001,0xE9FC0000,0xE9FC0000,0xEE000001,0xEE000001,0xE9FC0000, -0xE9FC0000,0xEE000001,0xEE000001,0xEE000001,0x97FC0000,0x17FC0000,0x1E00000,0xC9FC0000,0xDBFC0000,0xE3FC0000,0xE7F80000,0xEBF80000,0xB7FC0000,0xD3FC0000,0xE3FC0000,0xEE000001,0xE3FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1B01425,0xFFA4102C,0xFF940D09,0xFF940BE8,0xFF980CD1,0xFF840898,0xFF8006ED,0xFF7C05B1,0xFF7003B4,0xF76C03E5,0xFF8C0D6E,0xFF74082B,0xFF68060E,0xFF6403D6,0xFF54010D,0xF95000EA,0xFF4C0601,0xFD44028E,0xF53C011D,0xE944055E,0x87FC1423,0xFF700E44,0xFF600BE8,0xFF4C089F,0xFF3804C5,0xF73003E3,0xFF2008EE,0xFEFC034E,0xF6F00072,0xE900055E,0xC5F81423, -0xFEB80BE8,0xF66803E3,0xE820055E,0xD6001423,0xFFA0103F,0xFFAC12A9,0xFFAC133C,0xFF940C5A,0xFF7807F2,0xFF6803C5,0xFF5C0214,0xFF48005E,0xFF981017,0xFF800B9F,0xFF50039A,0xF6F00072,0xB5FC1423,0x1D00561,0xFFC40411,0xFFC002F4,0xFFBC0288,0xFFBC0359,0xFFB001B2,0xFFA80104,0xFFA000F1,0xFF980004,0xF7980071,0xBDFC055E,0xFFAC0363,0xFF9C0288,0xFF8801F2,0xFF700049, -0xF76C0072,0xDFF8055E,0xFF340288,0xF6E80071,0xE800055E,0xBDFC055E,0xFFAC0363,0xFF9C0288,0xFF8801F2,0xFF700049,0xF76C0072,0xDFF8055E,0xFF340288,0xF6E80071,0xE800055E,0xDFF8055E,0xFF340288,0xF6E80071,0xE800055E,0xE800055E,0xFDCC049A,0xFFCC04F5,0xFFCC0519,0xFFC003C4,0xFFA402B5,0xFF90016B,0xFF7C0082,0xFF640032,0xFFC4049D,0xFFB003C2,0xFF700298,0xF6E80071, -0xD7FC055E,0x1940BE8,0x1940BE8,0x1940BE8,0x1940BE8,0xFF8006ED,0xFF8006ED,0xFF8006ED,0xFF7003B4,0xFF7003B4,0xEF6C0375,0xFF68060E,0xFF68060E,0xFF68060E,0xFF54010D,0xFF54010D,0xF35000A6,0xFB440288,0xFB440288,0xEB400041,0xDD44028A,0x5FFC0BE8,0x5FFC0BE8,0x5FFC0BE8,0xFF3804C5,0xFF3804C5,0xEF380373,0xFEFC034E,0xFEFC034E,0xF0F80002,0xDF08028A,0xB1F80BE8, -0xB1F80BE8,0xEE880373,0xDC50028A,0xC8000BEB,0xFF84097D,0xFF8C0AD8,0x1940BE8,0xFF80074E,0xFF6C04D1,0xFF6002D9,0xFF5C0214,0xFF48005E,0xFF800956,0xFF7406E1,0xFF500381,0xF0F80002,0x9DF80BE8,0x1BC0288,0x1BC0288,0x1BC0288,0x1BC0288,0xFFA80104,0xFFA80104,0xFFA80104,0xFF980004,0xFF980004,0xEF980001,0x9BFC0288,0x9BFC0288,0x9BFC0288,0xFF700049,0xFF700049, -0xF1740001,0xCFF80288,0xCFF80288,0xEF080001,0xDC00028A,0x9BFC0288,0x9BFC0288,0x9BFC0288,0xFF700049,0xFF700049,0xF1740001,0xCFF80288,0xCFF80288,0xEF080001,0xDC00028A,0xCFF80288,0xCFF80288,0xEF080001,0xDC00028A,0xDC00028A,0xFBB40221,0xF5B80244,0x1BC0288,0xFFA401D4,0xFF980145,0xFF8C00E1,0xFF7C0082,0xFF640032,0xFDB00221,0xFFA401C2,0xC1FC0288,0xEF080001, -0xC1FC0288,0x1F00071,0xFFE80041,0xFFE40014,0xFFE00000,0xE9FC0071,0xFFD80028,0xFFD00000,0xF3FC0071,0xFFA00000,0xF6000071,0xE9FC0071,0xFFD80028,0xFFD00000,0xF3FC0071,0xFFA00000,0xF6000071,0xF3FC0071,0xFFA00000,0xF6000071,0xF6000071,0xE9FC0071,0xFFD80028,0xFFD00000,0xF3FC0071,0xFFA00000,0xF6000071,0xF3FC0071,0xFFA00000,0xF6000071,0xF6000071,0xF3FC0071, -0xFFA00000,0xF6000071,0xF6000071,0xF6000071,0xFFEC0062,0x87FC0071,0xFFEC0064,0xFDE80055,0xFDE40048,0xFFC8001D,0xFFB40000,0xFF8C0000,0xFDEC0062,0xFFE40055,0xFFBC0004,0xF6000071,0xF1FC0071,0x16C0374,0x16C0374,0x16C0374,0x16C0374,0x16C0374,0x16C0374,0x16C0374,0x16C0374,0x16C0374,0x16C0374,0xFF500055,0xFF500055,0xFF500055,0xFF500055,0xFF500055, -0xFF500055,0xE1440000,0xE1440000,0xE1440000,0xCB440001,0x25FC0372,0x25FC0372,0x25FC0372,0x25FC0372,0x25FC0372,0x25FC0372,0xF0F80001,0xF0F80001,0xF0F80001,0xCB1C0001,0x95F80372,0x95F80372,0x95F80372,0xCA9C0001,0xB6000372,0xFD6802AD,0x16C0374,0x16C0374,0xFD6401E1,0xFF580145,0xFF5400B9,0xFF5400B9,0xFF48000D,0xFF580271,0xFF5001BD,0xF7380000,0xF0F80001, -0x79FC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table156[] = { -0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x1FC0000,0x83F80000, -0x83F80000,0x83F80000,0x83F80000,0xAA000000,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x1540001,0x16C0000,0x16C0000,0x16C0000,0x1FC0000,0x61FC0000,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000, -0x81FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xD4000000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0x81FC0000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xD4000000,0xC1FC0000,0xC1FC0000,0xC1FC0000,0xD4000000,0xD4000000,0xBC40000,0x1A80001,0x1A80001,0x1F00000,0x33FC0000,0x63FC0000,0x63FC0000,0x99FC0000,0x1F00000,0x33FC0000,0xB1FC0000,0xC1FC0000, -0xB1FC0000,0x1F00001,0x1F00001,0x1F00001,0x1F00001,0xEFFC0000,0xEFFC0000,0xEFFC0000,0xF7F80000,0xF7F80000,0xF8000000,0xEFFC0000,0xEFFC0000,0xEFFC0000,0xF7F80000,0xF7F80000,0xF8000000,0xF7F80000,0xF7F80000,0xF8000000,0xF8000000,0xEFFC0000,0xEFFC0000,0xEFFC0000,0xF7F80000,0xF7F80000,0xF8000000,0xF7F80000,0xF7F80000,0xF8000000,0xF8000000,0xF7F80000, -0xF7F80000,0xF8000000,0xF8000000,0xF8000000,0xD7FC0000,0xA7FC0000,0x1F00001,0xEBFC0000,0xF1FC0000,0xF5FC0000,0xF5FC0000,0xF7FC0000,0xE3FC0000,0xEFFC0000,0xF5FC0000,0xF8000000,0xF5FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1BC1194,0xFFB00ED7,0xFFAC0CAC,0xFFA40BEB,0xFFA40BBE,0xFF9808C3,0xFF9807A2,0xFF8805A4,0xFF88043B,0xFB800386,0xFF980C15,0xFF8C0852,0xFF8006C9,0xFF7003FF,0xFF6C01EE,0xFD6400A9,0xFF64052C,0xFF58029B,0xF950007E,0xEF540409,0x9BFC1194,0xFF880D81,0xFF7C0BE8,0xFF64083E,0xFF58058D,0xFD440386,0xFF380829,0xFF2003F9,0xFB080015,0xEF140408,0xCFF81194, -0xFEF00BE8,0xFA8C0386,0xEE400408,0xDC001198,0xFFB40EBF,0xF5B810AC,0xF7BC1114,0xFF980B8A,0xFF8C0803,0xFF7C0476,0xFF78030B,0xFF640105,0xFDB00E9B,0xFF980AFA,0xFF640451,0xFB080015,0xC1FC1194,0x1DC040B,0xFFD4035E,0xFFCC02CA,0xFFCC028A,0xFFD0029B,0xFFC401B6,0xFFBC0151,0xFFB800C3,0xFFAC0032,0xFBA80015,0xCFFC0408,0xFFC002FE,0xFFB80288,0xFFA00196,0xFF94009D, -0xFB840014,0xE7FC0408,0xFF6C0288,0xFB0C0014,0xEE000408,0xCFFC0408,0xFFC002FE,0xFFB80288,0xFFA00196,0xFF94009D,0xFB840014,0xE7FC0408,0xFF6C0288,0xFB0C0014,0xEE000408,0xE7FC0408,0xFF6C0288,0xFB0C0014,0xEE000408,0xEE000408,0xFFD80385,0xF7DC03D5,0xF7DC03EA,0xFDD00321,0xFFC0025A,0xFFA80179,0xFFA000DA,0xFF7C0082,0xFFD00395,0xFFC802E4,0xFF980291,0xFB0C0014, -0xE1FC0408,0x1A40BEB,0x1A40BEB,0x1A40BEB,0x1A40BEB,0xFF9807A2,0xFF9807A2,0xFF9807A2,0xFF88043B,0xFF88043B,0xF9800372,0xFF8006C9,0xFF8006C9,0xFF8006C9,0xFF6C01EE,0xFF6C01EE,0xFD6400A5,0xFF58029B,0xFF58029B,0xF5540042,0xE7540289,0x7BFC0BE8,0x7BFC0BE8,0x7BFC0BE8,0xFF58058D,0xFF58058D,0xF9480372,0xFF2003F9,0xFF2003F9,0xF9080005,0xE71C0288,0xBFF80BE8, -0xBFF80BE8,0xF8940372,0xE6600288,0xD2000BE8,0xFFA009E1,0xFBA40B0A,0x1A40BEB,0xFF9007E6,0xFF8005BA,0xFF7803C3,0xFF78030B,0xFF640105,0xFF9409A4,0xFF8007C1,0xFF640438,0xF9080005,0xADFC0BE8,0x1CC028A,0x1CC028A,0x1CC028A,0x1CC028A,0xFFBC0151,0xFFBC0151,0xFFBC0151,0xFFAC0032,0xFFAC0032,0xF9A80001,0xB7FC0288,0xB7FC0288,0xB7FC0288,0xFF94009D,0xFF94009D, -0xF9880000,0xDDF40288,0xDDF40288,0xF9140000,0xE6000288,0xB7FC0288,0xB7FC0288,0xB7FC0288,0xFF94009D,0xFF94009D,0xF9880000,0xDDF40288,0xDDF40288,0xF9140000,0xE6000288,0xDDF40288,0xDDF40288,0xF9140000,0xE6000288,0xE6000288,0xFDC80242,0xFFCC0242,0x1CC028A,0xFDC00202,0xFFAC0195,0xFFA80128,0xFFA000DA,0xFF7C0082,0xFFC40242,0xFBC00200,0xD3FC0288,0xF9140000, -0xD3FC0288,0x1F80012,0xFFF4000A,0xFFF00005,0xFFF00001,0xF7FC0012,0xFFF00005,0xFFEC0000,0xFBFC0012,0xFFD80000,0xFA000014,0xF7FC0012,0xFFF00005,0xFFEC0000,0xFBFC0012,0xFFD80000,0xFA000014,0xFBFC0012,0xFFD80000,0xFA000014,0xFA000014,0xF7FC0012,0xFFF00005,0xFFEC0000,0xFBFC0012,0xFFD80000,0xFA000014,0xFBFC0012,0xFFD80000,0xFA000014,0xFA000014,0xFBFC0012, -0xFFD80000,0xFA000014,0xFA000014,0xFA000014,0xFFF8000D,0xD7FC0012,0xF5F80012,0xFFF4000D,0xFDF4000D,0xFFE80005,0xFFE00000,0xFFD00000,0xF5FC0012,0xFFF00011,0xFFE40001,0xFA000014,0xFBFC0012,0x1800372,0x1800372,0x1800372,0x1800372,0x1800372,0x1800372,0x1800372,0x1800372,0x1800372,0x1800372,0xFF680091,0xFF680091,0xFF680091,0xFF680091,0xFF680091, -0xFF680091,0xE9580001,0xE9580001,0xE9580001,0xD5540001,0x41FC0372,0x41FC0372,0x41FC0372,0x41FC0372,0x41FC0372,0x41FC0372,0xFB080001,0xFB080001,0xFB080001,0xD52C0000,0xA1FC0372,0xA1FC0372,0xA1FC0372,0xD4A80000,0xBE000374,0xF77C02D2,0x1800372,0x1800372,0xFF740202,0xFF6C0179,0xFF6800FA,0xFF6800FA,0xFF5C002D,0xFD70028A,0xFF6401F9,0xFD4C0001,0xFB080001, -0x89FC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table157[] = { -0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x19FC0000,0x8FF80000, -0x8FF80000,0x8FF80000,0x8FF80000,0xB2000000,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x1640001,0x77C0000,0x77C0000,0x77C0000,0x19FC0000,0x71FC0000,0x1B80001,0x1B80001,0x1B80001,0x1B80001,0x1B80001,0x1B80001,0x1B80001,0x1B80001,0x1B80001,0x1B80001,0x99FC0000,0x99FC0000,0x99FC0000,0x99FC0000,0x99FC0000, -0x99FC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xDC000000,0x99FC0000,0x99FC0000,0x99FC0000,0x99FC0000,0x99FC0000,0x99FC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xDC000000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xDC000000,0xDC000000,0x1D80000,0x1B80001,0x1B80001,0x11FC0000,0x5BFC0000,0x81FC0000,0x81FC0000,0xADFC0000,0x11FC0000,0x5BFC0000,0xBFFC0000,0xCDFC0000, -0xBFFC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1C80F44,0xFFBC0D5F,0xFFB80BD4,0xFFB40B53,0xFFB00ABA,0xFFA4089F,0xFFA407BE,0xFFA005B4,0xFF94049B,0xFF900372,0xFFA40AD9,0xFF980816,0xFF9806F5,0xFF880417,0xFF7C0296,0xFF7400C1,0xFF7C0494,0xFF7002A3,0xFD64003A,0xF3640305,0xABFC0F44,0xFF940C75,0xFF900B53,0xFF7C07D6,0xFF7005E5,0xFF580374,0xFF580773,0xFF380455,0xFF200005,0xF3280304,0xD7F80F44, -0xFF1C0B53,0xFEAC0372,0xF2600304,0xE2000F44,0xFFBC0CF6,0xFBC40E6C,0xFBC40EDC,0xFFAC0A8E,0xFF9C07CB,0xFF9004FA,0xFF8803B2,0xFF7401A6,0xFFB40CB7,0xFFA40A4C,0xFF780483,0xFF200005,0xCBFC0F44,0x1E80303,0xFFE002AE,0xFFE00263,0xFFDC0242,0xFFDC0213,0xFFD0019A,0xFFD0015A,0xFFC000D1,0xFFC0006D,0xFFB80001,0xDFFC0303,0xFFCC0282,0xFFCC0242,0xFFB80156,0xFFAC00CD, -0xFF980000,0xEFFC0303,0xFF940242,0xFF2C0000,0xF2000304,0xDFFC0303,0xFFCC0282,0xFFCC0242,0xFFB80156,0xFFAC00CD,0xFF980000,0xEFFC0303,0xFF940242,0xFF2C0000,0xF2000304,0xEFFC0303,0xFF940242,0xFF2C0000,0xF2000304,0xF2000304,0xFFE402C1,0xFBE402D9,0xFDE802EE,0xFFDC025D,0xFFD40202,0xFFB80171,0xFFB80104,0xFFA000B4,0xFFDC02BD,0xFFDC025A,0xFFB40246,0xFF2C0000, -0xEBFC0303,0x1B40B53,0x1B40B53,0x1B40B53,0x1B40B53,0xFFA407BE,0xFFA407BE,0xFFA407BE,0xFF94049B,0xFF94049B,0xFF900372,0xFF9806F5,0xFF9806F5,0xFF9806F5,0xFF7C0296,0xFF7C0296,0xFF7400C1,0xFF7002A3,0xFF7002A3,0xFB600032,0xED640245,0x8FFC0B53,0x8FFC0B53,0x8FFC0B53,0xFF7005E5,0xFF7005E5,0xFF580374,0xFF380455,0xFF380455,0xFF200005,0xED2C0244,0xC9F80B53, -0xC9F80B53,0xFEAC0372,0xEC780244,0xDA000B54,0xFFAC09A2,0xFFAC0A9E,0x1B40B53,0xFFA4081B,0xFF940612,0xFF900481,0xFF8803B2,0xFF7401A6,0xFFA0097D,0xFF9807C5,0xFF780473,0xFF200005,0xBBFC0B53,0x1DC0242,0x1DC0242,0x1DC0242,0x1DC0242,0xFFD0015A,0xFFD0015A,0xFFD0015A,0xFFC0006D,0xFFC0006D,0xFFB80001,0xCDFC0242,0xCDFC0242,0xCDFC0242,0xFFAC00CD,0xFFAC00CD, -0xFF980000,0xE7F80242,0xE7F80242,0xFF2C0000,0xEC000244,0xCDFC0242,0xCDFC0242,0xCDFC0242,0xFFAC00CD,0xFFAC00CD,0xFF980000,0xE7F80242,0xE7F80242,0xFF2C0000,0xEC000244,0xE7F80242,0xE7F80242,0xFF2C0000,0xEC000244,0xEC000244,0xFFD80200,0xF7DC0221,0x1DC0242,0xFBD401E1,0xFFC80190,0xFFB80140,0xFFB80104,0xFFA000B4,0xFFD00208,0xFFC801D4,0xDFFC0242,0xFF2C0000, -0xDFFC0242,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1900372,0x1900372,0x1900372,0x1900372,0x1900372,0x1900372,0x1900372,0x1900372,0x1900372,0x1900372,0xFF7400C1,0xFF7400C1,0xFF7400C1,0xFF7400C1,0xFF7400C1, -0xFF7400C1,0xF1680001,0xF1680001,0xF1680001,0xDD640001,0x59FC0372,0x59FC0372,0x59FC0372,0x59FC0372,0x59FC0372,0x59FC0372,0xFF200005,0xFF200005,0xFF200005,0xDD3C0000,0xADFC0372,0xADFC0372,0xADFC0372,0xDCB80000,0xC6000374,0xFF8C02D2,0x1900372,0x1900372,0xFF840225,0xFF8001A9,0xFF780132,0xFF780132,0xFF700059,0xF98402AD,0xFF780212,0xFF60000D,0xFF200005, -0x99FC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table158[] = { -0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x31FC0000,0x9BF80000, -0x9BF80000,0x9BF80000,0x9BF80000,0xBA000000,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0x1740001,0xF8C0000,0xF8C0000,0xF8C0000,0x31FC0000,0x7FFC0000,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0x1C80001,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xB1FC0000, -0xB1FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xE4000000,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xB1FC0000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xE4000000,0xD9FC0000,0xD9FC0000,0xD9FC0000,0xE4000000,0xE4000000,0x1E80000,0x1C80001,0x1C80001,0x49FC0000,0x81FC0000,0x9FFC0000,0x9FFC0000,0xC1FC0000,0x49FC0000,0x81FC0000,0xCFFC0000,0xD9FC0000, -0xCFFC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1D00C14,0xFFC40AAC,0xFFC00997,0xFFC00933,0xFFBC08C2,0xFFB4073B,0xFFB00696,0xFFAC050C,0xFFA00453,0xFFA00372,0xFFB00891,0xFFA40686,0xFFA405A5,0xFF940387,0xFF900266,0xFF8C0109,0xFF880354,0xFF7C01DB,0xFF74000A,0xF77401C5,0xB7FC0C14,0xFFAC09FD,0xFFA00934,0xFF8806A6,0xFF7C053D,0xFF700374,0xFF7005BB,0xFF58036A,0xFF380025,0xF73C01C4,0xDDF40C14, -0xFF400933,0xFEE00372,0xF68001C4,0xE6000C14,0xFFC40A74,0xFFCC0B5C,0xFFCC0BBC,0xFFBC089D,0xFFAC066B,0xFF98043E,0xFF900351,0xFF840195,0xFFBC0A41,0xFFB00837,0xFF9003AD,0xFF380025,0xD3FC0C14,0x1EC01C3,0xFFE8018B,0xFFE40162,0xFFE40152,0xFFE00141,0xFFDC00EE,0xFFDC00CA,0xFFD80079,0xFFCC0041,0xFFC80001,0xE5FC01C3,0xFFD80176,0xFFD80152,0xFFCC00D1,0xFFB8007D, -0xFFB00000,0xF3F801C3,0xFFAC0152,0xFF600000,0xF60001C4,0xE5FC01C3,0xFFD80176,0xFFD80152,0xFFCC00D1,0xFFB8007D,0xFFB00000,0xF3F801C3,0xFFAC0152,0xFF600000,0xF60001C4,0xF3F801C3,0xFFAC0152,0xFF600000,0xF60001C4,0xF60001C4,0xFBEC01A1,0xFFEC01A1,0xFFEC01B2,0xFFDC016D,0xFFDC0125,0xFFD000E3,0xFFC8009D,0xFFB8006A,0xF9EC01A1,0xFFDC015A,0xFFC80156,0xFF600000, -0xEFFC01C3,0x1C00933,0x1C00933,0x1C00933,0x1C00933,0xFFB00696,0xFFB00696,0xFFB00696,0xFFA00453,0xFFA00453,0xFFA00372,0xFFA405A5,0xFFA405A5,0xFFA405A5,0xFF900266,0xFF900266,0xFF8C0109,0xFF7C01DB,0xFF7C01DB,0xFD740009,0xF1740155,0xA3FC0933,0xA3FC0933,0xA3FC0933,0xFF7C053D,0xFF7C053D,0xFF700374,0xFF58036A,0xFF58036A,0xFF380025,0xF1400154,0xD1FC0933, -0xD1FC0933,0xFEE00372,0xF0980154,0xDE000934,0xFFBC07F2,0xF7BC08BB,0x1C00933,0xFFAC06AE,0xFFA00542,0xFF9803DA,0xFF900351,0xFF840195,0xFFB407AE,0xFFB0067E,0xFF9003A4,0xFF380025,0xC5FC0933,0x1E40152,0x1E40152,0x1E40152,0x1E40152,0xFFDC00CA,0xFFDC00CA,0xFFDC00CA,0xFFCC0041,0xFFCC0041,0xFFC80001,0xD9FC0152,0xD9FC0152,0xD9FC0152,0xFFB8007D,0xFFB8007D, -0xFFB00000,0xEDF80152,0xEDF80152,0xFF600000,0xF0000154,0xD9FC0152,0xD9FC0152,0xD9FC0152,0xFFB8007D,0xFFB8007D,0xFFB00000,0xEDF80152,0xEDF80152,0xFF600000,0xF0000154,0xEDF80152,0xEDF80152,0xFF600000,0xF0000154,0xF0000154,0xF7E40139,0xFBE40139,0x1E40152,0xFFDC0109,0xFFD400E1,0xFFD000CA,0xFFC8009D,0xFFB8006A,0xF5E40139,0xFFDC0109,0xE7FC0152,0xFF600000, -0xE7FC0152,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1A00372,0x1A00372,0x1A00372,0x1A00372,0x1A00372,0x1A00372,0x1A00372,0x1A00372,0x1A00372,0x1A00372,0xFF8C0109,0xFF8C0109,0xFF8C0109,0xFF8C0109,0xFF8C0109, -0xFF8C0109,0xF9780001,0xF9780001,0xF9780001,0xE5740001,0x71FC0372,0x71FC0372,0x71FC0372,0x71FC0372,0x71FC0372,0x71FC0372,0xFF380025,0xFF380025,0xFF380025,0xE54C0000,0xB9FC0372,0xB9FC0372,0xB9FC0372,0xE4C80000,0xCE000374,0xF79C02F9,0x1A00372,0x1A00372,0xFF900262,0xFF9401E1,0xFF90016D,0xFF90016D,0xFF800092,0xFF9002B9,0xFD900244,0xFF74002D,0xFF380025, -0xA7FC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table159[] = { -0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x49FC0000,0x49FC0000,0x49FC0000,0x49FC0000,0x49FC0000,0x49FC0000,0x49FC0000,0x49FC0000,0x49FC0000,0x49FC0000,0xA7F80000, -0xA7F80000,0xA7F80000,0xA7F80000,0xC2000000,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1840001,0x1A00000,0x1A00000,0x1A00000,0x49FC0000,0x8FFC0000,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0x1D80001,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xC9FC0000, -0xC9FC0000,0xE5F80000,0xE5F80000,0xE5F80000,0xEC000000,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xC9FC0000,0xE5F80000,0xE5F80000,0xE5F80000,0xEC000000,0xE5F80000,0xE5F80000,0xE5F80000,0xEC000000,0xEC000000,0x5F80000,0x1D80001,0x1D80001,0x83FC0000,0xA9FC0000,0xBDFC0000,0xBDFC0000,0xD3FC0000,0x83FC0000,0xA9FC0000,0xDFF80000,0xE5F80000, -0xDFF80000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1D80964,0xFFD00864,0xFFCC07AB,0xFFC8076B,0xFFC4070C,0xFFC40607,0xFFBC059E,0xFFB80484,0xFFB4040A,0xFFB00372,0xFFC40694,0xFFB40543,0xFFB0049D,0xFFAC031F,0xFFA0024E,0xFF980151,0xFF940274,0xFF940153,0xFF880002,0xF98400D9,0xC3FC0964,0xFFB807ED,0xFFB00768,0xFFA00596,0xFF9404A5,0xFF880374,0xFF880463,0xFF7002BA,0xFF580061,0xF95000D8,0xE1FC0964, -0xFF600768,0xFF100372,0xF8A800D8,0xEA000964,0xFFCC082B,0xF3D4091A,0xF5D8093F,0xFFC406CC,0xFFB4054F,0xFFAC03A2,0xFFA40306,0xFF9C019B,0xFFD0080D,0xFFC406A4,0xFF9C02F7,0xFF580061,0xDBFC0964,0x1F000DB,0xFDF000C3,0xFFEC00AB,0xFFEC00A2,0xFFE80093,0xFFE80072,0xFFE80062,0xFFE40039,0xFFE00022,0xFFD80001,0xEFFC00D8,0xFFE400B2,0xFFE400A2,0xFFD80061,0xFFD8003D, -0xFFC80000,0xF7F800D8,0xFFC800A2,0xFF900000,0xF80000D8,0xEFFC00D8,0xFFE400B2,0xFFE400A2,0xFFD80061,0xFFD8003D,0xFFC80000,0xF7F800D8,0xFFC800A2,0xFF900000,0xF80000D8,0xF7F800D8,0xFFC800A2,0xFF900000,0xF80000D8,0xF80000D8,0xFDF000C1,0xFFEC00D1,0xF1F000DB,0xFFEC00AE,0xFFE80092,0xFFE0006B,0xFFD80050,0xFFD00034,0xFBF000C1,0xFFE800A6,0xFFD800A3,0xFF900000, -0xF5FC00D8,0x1C8076B,0x1C8076B,0x1C8076B,0x1C8076B,0xFFBC059E,0xFFBC059E,0xFFBC059E,0xFFB4040A,0xFFB4040A,0xFFB00372,0xFFB0049D,0xFFB0049D,0xFFB0049D,0xFFA0024E,0xFFA0024E,0xFF980151,0xFF940153,0xFF940153,0xFF880002,0xF58400A5,0xB1FC0768,0xB1FC0768,0xB1FC0768,0xFF9404A5,0xFF9404A5,0xFF880374,0xFF7002BA,0xFF7002BA,0xFF580061,0xF55400A4,0xD9FC0768, -0xD9FC0768,0xFF100372,0xF4B800A4,0xE4000768,0xFBC40683,0xFDC80703,0x1C8076B,0xFFBC057D,0xFFB4046E,0xFFAC0362,0xFFA40306,0xFF9C019B,0xFFBC0651,0xFFB4055B,0xFF9C02EE,0xFF580061,0xCFFC0768,0x1EC00A2,0x1EC00A2,0x1EC00A2,0x1EC00A2,0xFFE80062,0xFFE80062,0xFFE80062,0xFFE00022,0xFFE00022,0xFFD80001,0xE5FC00A2,0xE5FC00A2,0xE5FC00A2,0xFFD8003D,0xFFD8003D, -0xFFC80000,0xF3F800A2,0xF3F800A2,0xFF900000,0xF40000A4,0xE5FC00A2,0xE5FC00A2,0xE5FC00A2,0xFFD8003D,0xFFD8003D,0xFFC80000,0xF3F800A2,0xF3F800A2,0xFF900000,0xF40000A4,0xF3F800A2,0xF3F800A2,0xFF900000,0xF40000A4,0xF40000A4,0xFBEC0091,0xFFEC0091,0x1EC00A2,0xF9EC0091,0xFFDC0074,0xFFD80061,0xFFD80050,0xFFD00034,0xF9EC0091,0xFDE40082,0xEFFC00A2,0xFF900000, -0xEFFC00A2,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1B00372,0x1B00372,0x1B00372,0x1B00372,0x1B00372,0x1B00372,0x1B00372,0x1B00372,0x1B00372,0x1B00372,0xFF980151,0xFF980151,0xFF980151,0xFF980151,0xFF980151, -0xFF980151,0xFF880002,0xFF880002,0xFF880002,0xED840001,0x89FC0372,0x89FC0372,0x89FC0372,0x89FC0372,0x89FC0372,0x89FC0372,0xFF580061,0xFF580061,0xFF580061,0xED5C0000,0xC5FC0372,0xC5FC0372,0xC5FC0372,0xECD80000,0xD6000374,0xFFAC02F9,0x1B00372,0x1B00372,0xFDA8028A,0xFFA00212,0xFF9801BA,0xFF9801BA,0xFF9400DA,0xFFA002E4,0xFFA00269,0xFF88007D,0xFF580061, -0xB7FC0372,}; -static const uint32_t g_etc1_to_bc7_m6_table160[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x300000,0x300000,0x300000,0x300000,0x2440000,0x2440000,0x2440000,0x8C0000,0x8C0000,0x16000001,0x2440000,0x2440000,0x2440000,0x8C0000,0x8C0000,0x16000001,0x8C0000,0x8C0000,0x16000001,0x16000001,0x2440000,0x2440000,0x2440000,0x8C0000,0x8C0000,0x16000001,0x8C0000,0x8C0000,0x16000001,0x16000001,0x8C0000, -0x8C0000,0x16000001,0x16000001,0x16000001,0x380000,0x340000,0x300000,0x400000,0x500000,0x640000,0x740000,0xAC0000,0x3C0000,0x2440000,0x640000,0x16000001,0x640000,0xA00000,0xEC0000,0x1E40000,0x4E000001,0xEC0000,0x1E40000,0x4E000001,0x1E40000,0x4E000001,0x4E000001,0xEC0000,0x1E40000,0x4E000001,0x1E40000,0x4E000001, -0x4E000001,0x1E40000,0x4E000001,0x4E000001,0x4E000001,0xEC0000,0x1E40000,0x4E000001,0x1E40000,0x4E000001,0x4E000001,0x1E40000,0x4E000001,0x4E000001,0x4E000001,0x1E40000,0x4E000001,0x4E000001,0x4E000001,0x4E000001,0xC80000,0xCA80000,0xCA80000,0x10C0000,0x1880000,0x25F00000,0x4E000001,0x4E000001,0xD80000,0x12C0000,0x45DC0000,0x4E000001, -0x1540000,0x3410B0,0xE2100180,0x72100180,0x4E100181,0x9C000620,0x6E0000A9,0x4E000005,0x4C000620,0x4400025D,0x32000622,0x6A000D2B,0x620004E9,0x46000263,0x46000851,0x3C00041A,0x30000732,0x32000D2C,0x32000831,0x2C000A06,0x22000D2B,0x4C10B0,0x4C0007F3,0x4000045C,0x3A000A12,0x3A00058B,0x2E000814,0x2E000E21,0x2C000952,0x26000ABF,0x22000DA4,0x9810B0, -0x26000BDD,0x20000CBA,0x1C000EF4,0x180010B4,0xFE0004A0,0xFA240B7A,0xFE2C0B05,0x9E000409,0x62000449,0x4C00041A,0x440002DE,0x36000555,0xD0000776,0x7C00058B,0x3E00065B,0x26000ABF,0x6C10B0,0x440D2C,0xDA180120,0x70180120,0x4E180121,0x9C000620,0x6E0000A9,0x4E000005,0x4C000620,0x4400025D,0x32000622,0x680D2B,0x620004E9,0x46000263,0x46000851,0x3C00041A, -0x30000732,0xD00D2B,0x32000831,0x2C000A06,0x22000D2B,0x680D2B,0x620004E9,0x46000263,0x46000851,0x3C00041A,0x30000732,0xD00D2B,0x32000831,0x2C000A06,0x22000D2B,0xD00D2B,0x32000831,0x2C000A06,0x22000D2B,0x22000D2B,0xFE0004A0,0xFE2C09C6,0xF63C08B8,0x9E000409,0x62000449,0x4C00041A,0x440002DE,0x36000555,0xD00006CD,0x7C00054B,0x3E00064B,0x2C000A06, -0x940D2B,0x100180,0x100180,0x100180,0x100180,0x48000000,0x48000000,0x48000000,0x22000000,0x22000000,0x16000001,0x24000120,0x24000120,0x24000120,0x1E00006D,0x1E00006D,0x1400003A,0x10000122,0x10000122,0x100000AA,0xA000122,0x180180,0x180180,0x180180,0x180000B1,0x180000B1,0x12000061,0xE000141,0xE000141,0xE0000CE,0xA000132,0x2C0180, -0x2C0180,0xA00010B,0xA000153,0x6000183,0x84000059,0xFA04002C,0x100180,0x42000075,0x3200006D,0x2200006A,0x2200005A,0x16000082,0x420000C5,0x3200009E,0x16000121,0xE0000CE,0x200180,0x180120,0x180120,0x180120,0x180120,0x48000000,0x48000000,0x48000000,0x22000000,0x22000000,0x16000001,0x240120,0x240120,0x240120,0x1E00006D,0x1E00006D, -0x1400003A,0x440120,0x440120,0x100000AA,0xA000122,0x240120,0x240120,0x240120,0x1E00006D,0x1E00006D,0x1400003A,0x440120,0x440120,0x100000AA,0xA000122,0x440120,0x440120,0x100000AA,0xA000122,0xA000122,0x84000059,0xFC080020,0x180120,0x42000075,0x3200006D,0x2200006A,0x2200005A,0x16000082,0x560000B4,0x32000095,0x300120,0x100000AA, -0x300120,0x680620,0xC2300000,0x6A300000,0x4E300001,0x2980620,0x6E0000A9,0x4E000005,0x1380620,0x4400025D,0x32000622,0x2980620,0x6E0000A9,0x4E000005,0x1380620,0x4400025D,0x32000622,0x1380620,0x4400025D,0x32000622,0x32000622,0x2980620,0x6E0000A9,0x4E000005,0x1380620,0x4400025D,0x32000622,0x1380620,0x4400025D,0x32000622,0x32000622,0x1380620, -0x4400025D,0x32000622,0x32000622,0x32000622,0xFE140320,0xE6C0620,0xF65C039D,0xA6000220,0x6A000269,0x54000249,0x46000195,0x3E000305,0xF2000352,0x8C0002A8,0x4C000161,0x32000622,0xDC0620,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table161[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x400000,0x400000,0x400000,0x400000,0x25C0000,0x25C0000,0x25C0000,0xBC0000,0xBC0000,0x1E000001,0x25C0000,0x25C0000,0x25C0000,0xBC0000,0xBC0000,0x1E000001,0xBC0000,0xBC0000,0x1E000001,0x1E000001,0x25C0000,0x25C0000,0x25C0000,0xBC0000,0xBC0000,0x1E000001,0xBC0000,0xBC0000,0x1E000001,0x1E000001,0xBC0000, -0xBC0000,0x1E000001,0x1E000001,0x1E000001,0x4480000,0x440000,0x400000,0x2540000,0x6C0000,0x880000,0x9C0000,0xE80000,0x500000,0x25C0000,0x880000,0x1E000001,0x880000,0xB00000,0x1040000,0x7FC0000,0x56000001,0x1040000,0x7FC0000,0x56000001,0x7FC0000,0x56000001,0x56000001,0x1040000,0x7FC0000,0x56000001,0x7FC0000,0x56000001, -0x56000001,0x7FC0000,0x56000001,0x56000001,0x56000001,0x1040000,0x7FC0000,0x56000001,0x7FC0000,0x56000001,0x56000001,0x7FC0000,0x56000001,0x56000001,0x56000001,0x7FC0000,0x56000001,0x56000001,0x56000001,0x56000001,0xDC0000,0xBC0000,0xBC0000,0x1280000,0x1AC0000,0x2FF00000,0x56000001,0x56000001,0xF00000,0x1480000,0x4DEC0000,0x56000001, -0x1740000,0x3C1430,0xF61402AC,0x7C1402AD,0x561402AD,0xB6000620,0x7A000059,0x5600000A,0x58000620,0x4A0001ED,0x3A000622,0x78000F80,0x680005A9,0x5200030B,0x4C000911,0x46000432,0x3A00078B,0x3A000F80,0x38000989,0x32000B46,0x26000F83,0x581430,0x580009AB,0x4C00058C,0x46000B62,0x40000613,0x340008C4,0x340010D1,0x38000AF2,0x2C000C47,0x2400102B,0xB01430, -0x2C000E45,0x26000EDA,0x200011F7,0x1C001434,0xFC080612,0xFE2C0E0A,0xFE2C0E55,0xA6000431,0x76000461,0x5A000421,0x480002C3,0x420005B3,0xD0000856,0x8600061A,0x46000791,0x2C000C47,0x7C1430,0x500F80,0xEA200200,0x7A200200,0x56200201,0xB6000620,0x7A000059,0x58040006,0x58000620,0x4A0001ED,0x3A000622,0x2740F80,0x680005A9,0x5200030B,0x4C000911,0x46000432, -0x3A00078B,0xF00F80,0x38000989,0x32000B46,0x26000F83,0x2740F80,0x680005A9,0x5200030B,0x4C000911,0x46000432,0x3A00078B,0xF00F80,0x38000989,0x32000B46,0x26000F83,0xF00F80,0x38000989,0x32000B46,0x26000F83,0x26000F83,0xFE0C05F6,0xF63C0BA4,0xFC480AC4,0xA6000431,0x76000461,0x5A000421,0x480002C3,0x420005B3,0xD00007AD,0x900005D9,0x46000781,0x32000B46, -0xA80F80,0x1402AC,0x1402AC,0x1402AC,0x1402AC,0x60000000,0x60000000,0x60000000,0x2E000000,0x2E000000,0x1E000001,0x30000200,0x30000200,0x30000200,0x220000B9,0x220000B9,0x1E000065,0x16000202,0x16000202,0x16000132,0xE000202,0x2002AB,0x2002AB,0x2002AB,0x22000132,0x22000132,0x180000B1,0x12000236,0x12000236,0x1400016E,0xE00021B,0x3C02AB, -0x3C02AB,0x100001D3,0xA000263,0xA0002AB,0xB600009D,0xFE0C00AC,0x1402AC,0x5C0000DA,0x3C0000C1,0x2E0000C1,0x2A00009D,0x220000E8,0x64000161,0x44000125,0x1E000204,0x1400016E,0x2C02AB,0x200200,0x200200,0x200200,0x200200,0x60000000,0x60000000,0x60000000,0x2E000000,0x2E000000,0x1E000001,0x300200,0x300200,0x300200,0x220000B9,0x220000B9, -0x1E000065,0x5C0200,0x5C0200,0x16000132,0xE000202,0x300200,0x300200,0x300200,0x220000B9,0x220000B9,0x1E000065,0x5C0200,0x5C0200,0x16000132,0xE000202,0x5C0200,0x5C0200,0x16000132,0xE000202,0xE000202,0xB600009D,0xFE0C0088,0x200200,0x5C0000DA,0x3C0000C1,0x2E0000C1,0x2A00009D,0x220000E8,0x6400013D,0x4A000112,0x400200,0x16000132, -0x400200,0x780620,0xCA400000,0x72400000,0x56400001,0x2B00620,0x7A000059,0x58080001,0x1680620,0x4A0001ED,0x3A000622,0x2B00620,0x7A000059,0x58080001,0x1680620,0x4A0001ED,0x3A000622,0x1680620,0x4A0001ED,0x3A000622,0x3A000622,0x2B00620,0x7A000059,0x58080001,0x1680620,0x4A0001ED,0x3A000622,0x1680620,0x4A0001ED,0x3A000622,0x3A000622,0x1680620, -0x4A0001ED,0x3A000622,0x3A000622,0x3A000622,0xF8280349,0x800620,0xFE6C039D,0xBC0001BD,0x80000209,0x5E0001D4,0x50000131,0x460002B1,0xF8040320,0x9E000239,0x560000E8,0x3A000622,0xFC0620,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table162[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x500000,0x500000,0x500000,0x500000,0x2740000,0x2740000,0x2740000,0xF00000,0xF00000,0x26000001,0x2740000,0x2740000,0x2740000,0xF00000,0xF00000,0x26000001,0xF00000,0xF00000,0x26000001,0x26000001,0x2740000,0x2740000,0x2740000,0xF00000,0xF00000,0x26000001,0xF00000,0xF00000,0x26000001,0x26000001,0xF00000, -0xF00000,0x26000001,0x26000001,0x26000001,0x5C0000,0x2540000,0x500000,0x6C0000,0x840000,0xA80000,0xC00000,0x1240000,0x640000,0x2740000,0xA80000,0x26000001,0xA80000,0xC00000,0x11C0000,0x13FC0000,0x5E000001,0x11C0000,0x13FC0000,0x5E000001,0x13FC0000,0x5E000001,0x5E000001,0x11C0000,0x13FC0000,0x5E000001,0x13FC0000,0x5E000001, -0x5E000001,0x13FC0000,0x5E000001,0x5E000001,0x5E000001,0x11C0000,0x13FC0000,0x5E000001,0x13FC0000,0x5E000001,0x5E000001,0x13FC0000,0x5E000001,0x5E000001,0x5E000001,0x13FC0000,0x5E000001,0x5E000001,0x5E000001,0x5E000001,0xF00000,0xCC0000,0xCC0000,0x3400000,0x1D40000,0x39F00000,0x5E000001,0x5E000001,0x1040000,0x1680000,0x55FC0000,0x5E000001, -0x1980000,0x441830,0xFE1C0435,0x861C042D,0x5E1C042D,0xCE000620,0x8C000025,0x62040035,0x64000620,0x56000195,0x42000622,0x8800122B,0x740006C9,0x580003F3,0x580009E9,0x4C00046A,0x400007EB,0x4200122B,0x3E000B29,0x38000CB6,0x2C00122B,0x641830,0x62000B89,0x52000704,0x52000CF2,0x460006D3,0x3A000994,0x3A0013E9,0x3E000CE2,0x32000E07,0x2A0012FF,0xCC1830, -0x32001115,0x2C00114A,0x26001547,0x20001834,0xFE1407F5,0xFE2C116A,0xF63C11FC,0xC800047D,0x8000049D,0x5E000465,0x520002DE,0x4A00061E,0xF2000977,0x9E0006AD,0x4E0008E5,0x32000E07,0x901830,0x5C122C,0xFA280320,0x84280320,0x5E280321,0xCE000620,0x8C000025,0x6008002A,0x64000620,0x56000195,0x42000622,0x84122B,0x740006C9,0x580003F3,0x580009E9,0x4C00046A, -0x400007EB,0x10C122B,0x3E000B29,0x38000CB6,0x2C00122B,0x84122B,0x740006C9,0x580003F3,0x580009E9,0x4C00046A,0x400007EB,0x10C122B,0x3E000B29,0x38000CB6,0x2C00122B,0x10C122B,0x3E000B29,0x38000CB6,0x2C00122B,0x2C00122B,0xFE140791,0xFC480DB8,0xFE4C0D38,0xC800047D,0x8000049D,0x5E000465,0x520002DE,0x4A00061E,0xFA00088B,0x9E000649,0x4E0008CC,0x38000CB6, -0xBC122B,0x1C042C,0x1C042C,0x1C042C,0x1C042C,0x78000000,0x78000000,0x78000000,0x3A000000,0x3A000000,0x26000001,0x3C000320,0x3C000320,0x3C000320,0x2E000121,0x2E000121,0x22000092,0x1C000322,0x1C000322,0x1C0001E2,0x12000322,0x224042B,0x224042B,0x224042B,0x280001E2,0x280001E2,0x2200010B,0x18000372,0x18000372,0x1A00023E,0x12000346,0x4C042B, -0x4C042B,0x160002E3,0x100003AB,0xC00042B,0xF60000FA,0xFE0C018C,0x1C042C,0x72000151,0x50000131,0x4000013A,0x38000105,0x2A000161,0x8200022D,0x500001C2,0x24000324,0x1A00023E,0x34042B,0x280320,0x280320,0x280320,0x280320,0x78000000,0x78000000,0x78000000,0x3A000000,0x3A000000,0x26000001,0x3C0320,0x3C0320,0x3C0320,0x2E000121,0x2E000121, -0x22000092,0x740320,0x740320,0x1C0001E2,0x12000322,0x3C0320,0x3C0320,0x3C0320,0x2E000121,0x2E000121,0x22000092,0x740320,0x740320,0x1C0001E2,0x12000322,0x740320,0x740320,0x1C0001E2,0x12000322,0x12000322,0xF60000FA,0xF4180139,0x280320,0x72000151,0x50000131,0x4000013A,0x38000105,0x2A000161,0x820001ED,0x5A0001A8,0x540320,0x1C0001E2, -0x540320,0x880620,0xD2500000,0x7A500000,0x5E500001,0xC80620,0x8C000025,0x60180001,0x1980620,0x56000195,0x42000622,0xC80620,0x8C000025,0x60180001,0x1980620,0x56000195,0x42000622,0x1980620,0x56000195,0x42000622,0x42000622,0xC80620,0x8C000025,0x60180001,0x1980620,0x56000195,0x42000622,0x1980620,0x56000195,0x42000622,0x42000622,0x1980620, -0x56000195,0x42000622,0x42000622,0x42000622,0xFE340355,0x900620,0xF67C03C8,0xD2000164,0x8A0001A9,0x68000190,0x580000DA,0x5000024A,0xFC140322,0xB40001E2,0x5E000095,0x42000622,0x1200620,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table163[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x600000,0x600000,0x600000,0x600000,0x28C0000,0x28C0000,0x28C0000,0x1200000,0x1200000,0x2E000001,0x28C0000,0x28C0000,0x28C0000,0x1200000,0x1200000,0x2E000001,0x1200000,0x1200000,0x2E000001,0x2E000001,0x28C0000,0x28C0000,0x28C0000,0x1200000,0x1200000,0x2E000001,0x1200000,0x1200000,0x2E000001,0x2E000001,0x1200000, -0x1200000,0x2E000001,0x2E000001,0x2E000001,0x700000,0xA640000,0x600000,0x2800000,0xA00000,0xCC0000,0xE80000,0x1640000,0x780000,0x28C0000,0xCC0000,0x2E000001,0xCC0000,0xD00000,0x1340000,0x1FF80000,0x66000001,0x1340000,0x1FF80000,0x66000001,0x1FF80000,0x66000001,0x66000001,0x1340000,0x1FF80000,0x66000001,0x1FF80000,0x66000001, -0x66000001,0x1FF80000,0x66000001,0x66000001,0x66000001,0x1340000,0x1FF80000,0x66000001,0x1FF80000,0x66000001,0x66000001,0x1FF80000,0x66000001,0x66000001,0x66000001,0x1FF80000,0x66000001,0x66000001,0x66000001,0x66000001,0x1040000,0x6DC0000,0x6DC0000,0x15C0000,0x1FC0000,0x43F00000,0x66000001,0x66000001,0x3180000,0x1840000,0x5FD00000,0x66000001, -0x1B80000,0x4C1CB0,0xFE24064C,0x92200600,0x66200601,0xE6000620,0x98000005,0x6A08008D,0x70000620,0x5C00013D,0x4A000622,0x9A00152B,0x80000829,0x62000519,0x62000AC2,0x580004BA,0x46000863,0x4A00152C,0x44000D11,0x3E000E56,0x3200152B,0x701CB0,0x6E000DF9,0x580008DC,0x58000EB2,0x520007AB,0x46000A74,0x46001751,0x44000F22,0x38000FFF,0x30001633,0xE41CB0, -0x3800144D,0x3200140A,0x2C00190F,0x24001CB4,0xFE140A75,0xF63C1528,0xFA44160C,0xDE0004E3,0x90000506,0x6E0004C2,0x5E000302,0x500006CB,0xFA000B2B,0xB2000792,0x56000A45,0x38000FFF,0xA01CB0,0x64152C,0xFE300490,0x8E300480,0x66300481,0xE6000620,0x98000005,0x6A08007D,0x70000620,0x5C00013D,0x4A000622,0x98152B,0x80000829,0x62000519,0x62000AC2,0x580004BA, -0x46000863,0x130152B,0x44000D11,0x3E000E56,0x3200152B,0x98152B,0x80000829,0x62000519,0x62000AC2,0x580004BA,0x46000863,0x130152B,0x44000D11,0x3E000E56,0x3200152B,0x130152B,0x44000D11,0x3E000E56,0x3200152B,0x3200152B,0xFE2009B6,0xFE4C1044,0xF65C1031,0xDE0004E3,0x90000506,0x6E0004C2,0x5E000302,0x500006CB,0xFA000A2B,0xB2000719,0x56000A2C,0x3E000E56, -0xD8152B,0x200600,0x200600,0x200600,0x200600,0x90000000,0x90000000,0x90000000,0x46000000,0x46000000,0x2E000001,0x48000480,0x48000480,0x48000480,0x3A0001A9,0x3A0001A9,0x280000DA,0x22000480,0x22000480,0x200002C5,0x16000482,0x300600,0x300600,0x300600,0x2E0002C2,0x2E0002C2,0x28000183,0x1E0004F6,0x1E0004F6,0x1C000336,0x160004C2,0x5C0600, -0x5C0600,0x16000433,0x14000556,0xE000603,0xF600018A,0xF21402D9,0x200600,0x900001E1,0x5E0001BA,0x480001BA,0x40000172,0x2E000212,0x9600032B,0x660002A1,0x2C000489,0x1C000336,0x400600,0x300480,0x300480,0x300480,0x300480,0x90000000,0x90000000,0x90000000,0x46000000,0x46000000,0x2E000001,0x2440480,0x2440480,0x2440480,0x3A0001A9,0x3A0001A9, -0x280000DA,0x8C0480,0x8C0480,0x200002C5,0x16000482,0x2440480,0x2440480,0x2440480,0x3A0001A9,0x3A0001A9,0x280000DA,0x8C0480,0x8C0480,0x200002C5,0x16000482,0x8C0480,0x8C0480,0x200002C5,0x16000482,0x16000482,0xF600018A,0xF8200221,0x300480,0x900001E1,0x5E0001BA,0x480001BA,0x40000172,0x2E000212,0xA40002D5,0x6C000274,0x640480,0x200002C5, -0x640480,0x980620,0xDA600000,0x82600000,0x66600001,0xE00620,0x98000005,0x68280001,0x1CC0620,0x5C00013D,0x4A000622,0xE00620,0x98000005,0x68280001,0x1CC0620,0x5C00013D,0x4A000622,0x1CC0620,0x5C00013D,0x4A000622,0x4A000622,0xE00620,0x98000005,0x68280001,0x1CC0620,0x5C00013D,0x4A000622,0x1CC0620,0x5C00013D,0x4A000622,0x4A000622,0x1CC0620, -0x5C00013D,0x4A000622,0x4A000622,0x4A000622,0xFC4C0374,0x8A00620,0xFE8C03C8,0xE600010D,0xA0000161,0x76000132,0x5E000091,0x58000202,0xFE2C0349,0xC600019A,0x6A000061,0x4A000622,0x1400620,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table164[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x2180000,0x2180000,0x2180000,0x2180000,0x2180000, -0x2180000,0x300000,0x300000,0x300000,0x8000000,0x2180000,0x2180000,0x2180000,0x2180000,0x2180000,0x2180000,0x300000,0x300000,0x300000,0x8000000,0x300000,0x300000,0x300000,0x8000000,0x8000000,0x140000,0x100001,0x100001,0x140000,0x2140000,0x180000,0x180000,0x1C0000,0x140000,0x2140000,0x240000,0x300000, -0x240000,0x700001,0x700001,0x700001,0x700001,0xA80000,0xA80000,0xA80000,0x1580000,0x1580000,0x38000000,0xA80000,0xA80000,0xA80000,0x1580000,0x1580000,0x38000000,0x1580000,0x1580000,0x38000000,0x38000000,0xA80000,0xA80000,0xA80000,0x1580000,0x1580000,0x38000000,0x1580000,0x1580000,0x38000000,0x38000000,0x1580000, -0x1580000,0x38000000,0x38000000,0x38000000,0x840000,0x4780000,0x700001,0x2980000,0xC00000,0xF00000,0x1140000,0x1A40000,0x48C0000,0xA80000,0xF00000,0x38000000,0xF00000,0xE00001,0x1500000,0x2DF80000,0x70000000,0x1500000,0x2DF80000,0x70000000,0x2DF80000,0x70000000,0x70000000,0x1500000,0x2DF80000,0x70000000,0x2DF80000,0x70000000, -0x70000000,0x2DF80000,0x70000000,0x70000000,0x70000000,0x1500000,0x2DF80000,0x70000000,0x2DF80000,0x70000000,0x70000000,0x2DF80000,0x70000000,0x70000000,0x70000000,0x2DF80000,0x70000000,0x70000000,0x70000000,0x70000000,0x11C0000,0xF00000,0xF00000,0x17C0000,0x11FC0000,0x4DF80000,0x70000000,0x70000000,0x1340000,0x1A80000,0x69C40000,0x70000000, -0x1E00000,0x581F9B,0xFE30080B,0x9C2C0756,0x702C0756,0xFA080649,0xA408002A,0x741000FA,0x7C080649,0x68040139,0x54080649,0xB600152B,0x92000706,0x6E000496,0x740009B1,0x62000339,0x5200076C,0x5800152B,0x50000BF2,0x44000D79,0x3A00152C,0x841F99,0x7A000EA6,0x62000946,0x68000E9D,0x5E00071A,0x4C000A11,0x5200182C,0x4E000EC8,0x44000F8A,0x3A001695,0x10C1F99, -0x440015A6,0x3E00150D,0x32001A90,0x2C001F99,0xFE200C15,0xFE4C178F,0xFE4C18DF,0xF400035A,0xA2000391,0x7C00033A,0x660001B8,0x60000548,0xFE000BDB,0xC6000621,0x5E000926,0x44000F8A,0xBC1F99,0x78152B,0xFE4404A6,0x96440482,0x70400482,0xEE140621,0xA0100006,0x741C007A,0x78140621,0x660C0131,0x54100621,0x2B0152B,0x92000706,0x6E000496,0x740009B1,0x62000339, -0x5200076C,0x168152B,0x50000BF2,0x44000D79,0x3A00152C,0x2B0152B,0x92000706,0x6E000496,0x740009B1,0x62000339,0x5200076C,0x168152B,0x50000BF2,0x44000D79,0x3A00152C,0x168152B,0x50000BF2,0x44000D79,0x3A00152C,0x3A00152C,0xFE340A02,0xFA641079,0xFE6C1036,0xF400035A,0xA2000391,0x7C00033A,0x660001B8,0x60000548,0xFE080A6C,0xC6000591,0x5E000902,0x44000D79, -0xFC152B,0x2C0756,0x2C0756,0x2C0756,0x2C0756,0xA4080029,0xA4080029,0xA4080029,0x52080029,0x52080029,0x38080029,0x64000480,0x64000480,0x64000480,0x46000115,0x46000115,0x34000050,0x30000480,0x30000480,0x2C000249,0x20000480,0x400756,0x400756,0x400756,0x3A0002EE,0x3A0002EE,0x2E000169,0x2800055B,0x2800055B,0x2600030E,0x1E0004EC,0x800756, -0x800756,0x200004CD,0x1C000609,0x14000759,0xFC0C01B4,0xF82003D5,0x2C0756,0xB2000151,0x7C000124,0x5E000131,0x560000DD,0x40000172,0xC20002F5,0x8400022D,0x3E000490,0x2600030E,0x5C0756,0x400482,0x400482,0x400482,0x400482,0x98140001,0x98140001,0x98140001,0x50100001,0x50100001,0x38100001,0x600480,0x600480,0x600480,0x46000115,0x46000115, -0x34000050,0xC40480,0xC40480,0x2C000249,0x20000480,0x600480,0x600480,0x600480,0x46000115,0x46000115,0x34000050,0xC40480,0xC40480,0x2C000249,0x20000480,0xC40480,0xC40480,0x2C000249,0x20000480,0x20000480,0xFE100188,0xFE2C0239,0x400482,0xB2000151,0x7C000124,0x5E000131,0x560000DD,0x40000172,0xD6000262,0x900001E1,0x8C0480,0x2C000249, -0x8C0480,0xA80622,0xE0740001,0x8C700001,0x70700001,0xFC0620,0xA4080001,0x703C0000,0x3F80620,0x660000F4,0x54000620,0xFC0620,0xA4080001,0x703C0000,0x3F80620,0x660000F4,0x54000620,0x3F80620,0x660000F4,0x54000620,0x54000620,0xFC0620,0xA4080001,0x703C0000,0x3F80620,0x660000F4,0x54000620,0x3F80620,0x660000F4,0x54000620,0x54000620,0x3F80620, -0x660000F4,0x54000620,0x54000620,0x54000620,0xF668039D,0x2B40620,0xF8A003F5,0xFC0000C8,0xAA000109,0x7E0400F2,0x6C000050,0x640001B1,0xF4480372,0xD800013D,0x74000022,0x54000620,0x1680620,0x80029,0x80029,0x80029,0x80029,0x80029,0x80029,0x80029,0x80029,0x80029,0x80029,0x1A000000,0x1A000000,0x1A000000,0x1A000000,0x1A000000, -0x1A000000,0xC000000,0xC000000,0xC000000,0x8000000,0xC0029,0xC0029,0xC0029,0xC0029,0xC0029,0xC0029,0xC000010,0xC000010,0xC000010,0x6000008,0x140029,0x140029,0x140029,0x4000019,0x4000029,0x88000000,0x80029,0x80029,0x3C000000,0x2A000000,0x20000000,0x20000000,0x14000000,0x3600000A,0x24000005,0x10000001,0xC000010, -0x100029,}; -static const uint32_t g_etc1_to_bc7_m6_table165[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x2300000,0x2300000,0x2300000,0x2300000,0x2300000, -0x2300000,0x640000,0x640000,0x640000,0x10000000,0x2300000,0x2300000,0x2300000,0x2300000,0x2300000,0x2300000,0x640000,0x640000,0x640000,0x10000000,0x640000,0x640000,0x640000,0x10000000,0x10000000,0x240000,0x200001,0x200001,0x280000,0x2280000,0x22C0000,0x22C0000,0x380000,0x280000,0x2280000,0x480000,0x640000, -0x480000,0x800001,0x800001,0x800001,0x800001,0xC00000,0xC00000,0xC00000,0x1880000,0x1880000,0x40000000,0xC00000,0xC00000,0xC00000,0x1880000,0x1880000,0x40000000,0x1880000,0x1880000,0x40000000,0x40000000,0xC00000,0xC00000,0xC00000,0x1880000,0x1880000,0x40000000,0x1880000,0x1880000,0x40000000,0x40000000,0x1880000, -0x1880000,0x40000000,0x40000000,0x40000000,0x980000,0xC880000,0x800001,0xB00000,0xD80000,0x1140000,0x13C0000,0x1E40000,0x4A00000,0xC00000,0x1140000,0x40000000,0x1140000,0xF00001,0x1680000,0x39F80000,0x78000000,0x1680000,0x39F80000,0x78000000,0x39F80000,0x78000000,0x78000000,0x1680000,0x39F80000,0x78000000,0x39F80000,0x78000000, -0x78000000,0x39F80000,0x78000000,0x78000000,0x78000000,0x1680000,0x39F80000,0x78000000,0x39F80000,0x78000000,0x78000000,0x39F80000,0x78000000,0x78000000,0x78000000,0x39F80000,0x78000000,0x78000000,0x78000000,0x78000000,0x1300000,0x9000000,0x9000000,0x3940000,0x1FF80000,0x57F80000,0x78000000,0x78000000,0x1480000,0x1C40000,0x71D40000,0x78000000, -0x3FC0000,0x642297,0xFE3C0A17,0xA63808E2,0x783808E2,0xFC1406C7,0xAE100096,0x7E1801AE,0x861006B1,0x6E100185,0x5C1006B1,0xCE00152B,0xA2000619,0x78000482,0x800008D9,0x6E000231,0x580006D0,0x6400152B,0x5C000B02,0x50000C99,0x4200152C,0x982295,0x8C000F9A,0x6E000A26,0x74000F05,0x620006C9,0x58000A19,0x5E001914,0x56000E8F,0x4A000F5A,0x400016F9,0x1302295, -0x4A00172A,0x4400162D,0x38001C24,0x32002295,0xFE340DE6,0xFE4C1A9F,0xF8601BF2,0xFE000293,0xB6000271,0x88000239,0x740000E4,0x64000421,0xFE100D87,0xDA000512,0x6E00085A,0x4A000F5A,0xD82295,0x88152B,0xFC5804D2,0x9E540482,0x78500482,0xF6240621,0xA8200006,0x7C2C007A,0x80240621,0x6E1C0131,0x5C200621,0xC8152B,0xA2000619,0x78040480,0x800008D9,0x6E000231, -0x580006D0,0x198152B,0x5C000B02,0x50000C99,0x4200152C,0xC8152B,0xA2000619,0x78040480,0x800008D9,0x6E000231,0x580006D0,0x198152B,0x5C000B02,0x50000C99,0x4200152C,0x198152B,0x5C000B02,0x50000C99,0x4200152C,0x4200152C,0xFC4C0A7E,0xFE6C10C1,0xF880108B,0xFE000293,0xB6000271,0x88000239,0x740000E4,0x64000421,0xFE240AC5,0xDA00044E,0x6E000829,0x50000C99, -0x120152B,0x3808E2,0x3808E2,0x3808E2,0x3808E2,0xB4100091,0xB4100091,0xB4100091,0x5C100091,0x5C100091,0x40100091,0x7C000480,0x7C000480,0x7C000480,0x580000A9,0x580000A9,0x40000010,0x3C000480,0x3C000480,0x320001E5,0x28000480,0x5008E1,0x5008E1,0x5008E1,0x46000356,0x46000356,0x3A000191,0x340005D3,0x340005D3,0x30000300,0x28000529,0xA008E1, -0xA008E1,0x26000599,0x200006E4,0x1A0008E1,0xFE100248,0xFE2C050D,0x3808E2,0xD00000E1,0x900000B4,0x6E0000B4,0x66000074,0x4C000104,0xF40002E3,0xA60001E9,0x4C000499,0x30000300,0x7008E1,0x500482,0x500482,0x500482,0x500482,0xA0240001,0xA0240001,0xA0240001,0x58200001,0x58200001,0x40200001,0x780480,0x780480,0x780480,0x580000A9,0x580000A9, -0x40000010,0xF40480,0xF40480,0x320001E5,0x28000480,0x780480,0x780480,0x780480,0x580000A9,0x580000A9,0x40000010,0xF40480,0xF40480,0x320001E5,0x28000480,0xF40480,0xF40480,0x320001E5,0x28000480,0x28000480,0xFE2001A5,0xFA440242,0x500482,0xD00000E1,0x900000B4,0x6E0000B4,0x66000074,0x4C000104,0xF4000202,0xAC000184,0xAC0480,0x320001E5, -0xAC0480,0xB80622,0xE8840001,0x94800001,0x78800001,0x1140620,0xAC180001,0x784C0000,0xFF80620,0x720000B4,0x5C000620,0x1140620,0xAC180001,0x784C0000,0xFF80620,0x720000B4,0x5C000620,0xFF80620,0x720000B4,0x5C000620,0x5C000620,0x1140620,0xAC180001,0x784C0000,0xFF80620,0x720000B4,0x5C000620,0xFF80620,0x720000B4,0x5C000620,0x5C000620,0xFF80620, -0x720000B4,0x5C000620,0x5C000620,0x5C000620,0xFE78039D,0xAC40620,0xFEAC03F9,0xFE1400DD,0xC00000CD,0x900000A9,0x74000020,0x6C000171,0xFC580372,0xEE0000FA,0x7E000008,0x5C000620,0x18C0620,0x100091,0x100091,0x100091,0x100091,0x100091,0x100091,0x100091,0x100091,0x100091,0x100091,0x32000000,0x32000000,0x32000000,0x32000000,0x32000000, -0x32000000,0x18000000,0x18000000,0x18000000,0x10000000,0x180091,0x180091,0x180091,0x180091,0x180091,0x180091,0x12000034,0x12000034,0x12000034,0xE00001D,0x2C0091,0x2C0091,0x2C0091,0xA000055,0x8000091,0xF8000001,0x100091,0x100091,0x76000000,0x52000000,0x3E000000,0x3E000000,0x28000000,0x6000002D,0x42000019,0x1E000005,0x12000034, -0x200091,}; -static const uint32_t g_etc1_to_bc7_m6_table166[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x300001,0x480000,0x480000,0x480000,0x480000,0x480000, -0x480000,0x940000,0x940000,0x940000,0x18000000,0x480000,0x480000,0x480000,0x480000,0x480000,0x480000,0x940000,0x940000,0x940000,0x18000000,0x940000,0x940000,0x940000,0x18000000,0x18000000,0x2340000,0x300001,0x300001,0x4380000,0x23C0000,0x440000,0x440000,0x540000,0x4380000,0x23C0000,0x680000,0x940000, -0x680000,0x900001,0x900001,0x900001,0x900001,0xD80000,0xD80000,0xD80000,0x1B80000,0x1B80000,0x48000000,0xD80000,0xD80000,0xD80000,0x1B80000,0x1B80000,0x48000000,0x1B80000,0x1B80000,0x48000000,0x48000000,0xD80000,0xD80000,0xD80000,0x1B80000,0x1B80000,0x48000000,0x1B80000,0x1B80000,0x48000000,0x48000000,0x1B80000, -0x1B80000,0x48000000,0x48000000,0x48000000,0x2A80000,0x9C0000,0x900001,0x2C40000,0xF40000,0x1340000,0x1640000,0x7FC0000,0x4B40000,0xD80000,0x1340000,0x48000000,0x1340000,0x1000001,0x1800000,0x45F80000,0x80000000,0x1800000,0x45F80000,0x80000000,0x45F80000,0x80000000,0x80000000,0x1800000,0x45F80000,0x80000000,0x45F80000,0x80000000, -0x80000000,0x45F80000,0x80000000,0x80000000,0x80000000,0x1800000,0x45F80000,0x80000000,0x45F80000,0x80000000,0x80000000,0x45F80000,0x80000000,0x80000000,0x80000000,0x45F80000,0x80000000,0x80000000,0x80000000,0x80000000,0x1440000,0x1140000,0x1140000,0x1B00000,0x2BFC0000,0x61FC0000,0x80000000,0x80000000,0x35C0000,0x1E40000,0x79E40000,0x80000000, -0x13FC0000,0x7025F3,0xFE480C9B,0xB0400AC2,0x80400AC2,0xFE2407C7,0xBA1C0142,0x842402A6,0x90180759,0x78180209,0x64180759,0xE600152B,0xAE000579,0x820804A6,0x8C000821,0x76000159,0x62000659,0x7000152B,0x66000A44,0x56000BE9,0x4A00152C,0xA825F1,0x980010EA,0x7A000B86,0x80000FAD,0x6E0006D1,0x5E000A4D,0x680019F0,0x60000E80,0x54000F30,0x4600177D,0x15825F1, -0x50001916,0x4A00179D,0x3E001DF0,0x380025F1,0xFE3C105F,0xFA641D9D,0xFE6C1F1A,0xFE0802E9,0xCA000191,0x96000163,0x7C000051,0x7000031D,0xFE180FB2,0xF2000433,0x7600075A,0x54000F30,0xF025F1,0x98152B,0xFE6804F6,0xA6640482,0x80600482,0xFE340621,0xB0300006,0x843C007A,0x88340621,0x762C0131,0x64300621,0xE0152B,0xAE000579,0x80140480,0x8C000821,0x76000159, -0x62000659,0x1CC152B,0x66000A44,0x56000BE9,0x4A00152C,0xE0152B,0xAE000579,0x80140480,0x8C000821,0x76000159,0x62000659,0x1CC152B,0x66000A44,0x56000BE9,0x4A00152C,0x1CC152B,0x66000A44,0x56000BE9,0x4A00152C,0x4A00152C,0xFE580AE5,0xFC8810CB,0xFE8C1093,0xFE1402D6,0xCA000191,0x96000163,0x7C000051,0x7000031D,0xFE340B25,0xF2000352,0x76000729,0x56000BE9, -0x140152B,0x400AC2,0x400AC2,0x400AC2,0x400AC2,0xC4180139,0xC4180139,0xC4180139,0x66180139,0x66180139,0x48180139,0x94000480,0x94000480,0x94000480,0x62000055,0x62000055,0x48000001,0x48000480,0x48000480,0x3E000185,0x30000480,0x600AC1,0x600AC1,0x600AC1,0x520003FE,0x520003FE,0x40000209,0x4000066B,0x4000066B,0x3A00030E,0x2E000569,0xC40AC1, -0xC40AC1,0x2C0006AD,0x260007EC,0x20000AC1,0xFE200335,0xF43806DA,0x400AC2,0xFA000088,0xA8000061,0x80000061,0x78000032,0x5A0000B4,0xFE000332,0xC40001B1,0x5C0004A4,0x3A00030E,0x8C0AC1,0x600482,0x600482,0x600482,0x600482,0xA8340001,0xA8340001,0xA8340001,0x60300001,0x60300001,0x48300001,0x900480,0x900480,0x900480,0x62000055,0x62000055, -0x48040000,0x1240480,0x1240480,0x3E000185,0x30000480,0x900480,0x900480,0x900480,0x62000055,0x62000055,0x48040000,0x1240480,0x1240480,0x3E000185,0x30000480,0x1240480,0x1240480,0x3E000185,0x30000480,0x30000480,0xFA3401C2,0xFE4C0262,0x600482,0xFA000088,0xA8000061,0x80000061,0x78000032,0x5A0000B4,0xFE0C0200,0xC4000121,0xD00480,0x3E000185, -0xD00480,0xC80622,0xF0940001,0x9C900001,0x80900001,0x12C0620,0xB4280001,0x805C0000,0x1BF80620,0x78000080,0x64000620,0x12C0620,0xB4280001,0x805C0000,0x1BF80620,0x78000080,0x64000620,0x1BF80620,0x78000080,0x64000620,0x64000620,0x12C0620,0xB4280001,0x805C0000,0x1BF80620,0x78000080,0x64000620,0x1BF80620,0x78000080,0x64000620,0x64000620,0x1BF80620, -0x78000080,0x64000620,0x64000620,0x64000620,0xFA8C03C8,0xD80620,0xF8C00422,0xFE2C0109,0xCA000091,0x9A00006A,0x7E000008,0x76000128,0xFC6C039D,0xFC0000C8,0x88000001,0x64000620,0x1AC0620,0x180139,0x180139,0x180139,0x180139,0x180139,0x180139,0x180139,0x180139,0x180139,0x180139,0x4A000000,0x4A000000,0x4A000000,0x4A000000,0x4A000000, -0x4A000000,0x24000000,0x24000000,0x24000000,0x18000000,0x240139,0x240139,0x240139,0x240139,0x240139,0x240139,0x1E000074,0x1E000074,0x1E000074,0x18000040,0x440139,0x440139,0x440139,0x100000B9,0xC000139,0xFC080029,0x180139,0x180139,0xAE000000,0x78000000,0x5C000000,0x5C000000,0x3C000000,0x92000061,0x74000032,0x2E000009,0x1E000074, -0x300139,}; -static const uint32_t g_etc1_to_bc7_m6_table167[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x600000,0x600000,0x600000,0x600000,0x600000, -0x600000,0xC40000,0xC40000,0xC40000,0x20000000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0xC40000,0xC40000,0xC40000,0x20000000,0xC40000,0xC40000,0xC40000,0x20000000,0x20000000,0xA440000,0x400001,0x400001,0x4C0000,0x4500000,0x580000,0x580000,0x26C0000,0x4C0000,0x4500000,0x8C0000,0xC40000, -0x8C0000,0xA00001,0xA00001,0xA00001,0xA00001,0xF00000,0xF00000,0xF00000,0x1E80000,0x1E80000,0x50000000,0xF00000,0xF00000,0xF00000,0x1E80000,0x1E80000,0x50000000,0x1E80000,0x1E80000,0x50000000,0x50000000,0xF00000,0xF00000,0xF00000,0x1E80000,0x1E80000,0x50000000,0x1E80000,0x1E80000,0x50000000,0x50000000,0x1E80000, -0x1E80000,0x50000000,0x50000000,0x50000000,0xBC0000,0xAC0000,0xA00001,0xDC0000,0x1100000,0x1580000,0x18C0000,0x13F80000,0xCC0000,0xF00000,0x1580000,0x50000000,0x1580000,0x1100001,0x1980000,0x51F80000,0x88000000,0x1980000,0x51F80000,0x88000000,0x51F80000,0x88000000,0x88000000,0x1980000,0x51F80000,0x88000000,0x51F80000,0x88000000, -0x88000000,0x51F80000,0x88000000,0x88000000,0x88000000,0x1980000,0x51F80000,0x88000000,0x51F80000,0x88000000,0x88000000,0x51F80000,0x88000000,0x88000000,0x88000000,0x51F80000,0x88000000,0x88000000,0x88000000,0x88000000,0x1580000,0x1240000,0x1240000,0x1CC0000,0x39FC0000,0x6BFC0000,0x88000000,0x88000000,0x1740000,0x5FC0000,0x81F40000,0x88000000, -0x21FC0000,0x7C29AF,0xFE540F97,0xB84C0CF6,0x884C0CF6,0xFE300953,0xC4240236,0x8E2C03EA,0x9A200841,0x801C02D1,0x6C200841,0xFE00152B,0xC00004F9,0x8C100502,0x98000789,0x800000B5,0x6C000629,0x7C00152B,0x72000984,0x60000B40,0x5200152C,0xBC29AD,0xA800126A,0x82000D65,0x8C001095,0x7A000739,0x68000AC5,0x74001B10,0x68000E8C,0x5C000F2A,0x52001805,0x17C29AD, -0x5C001B46,0x5000195D,0x4A001FD0,0x3E0029AD,0xFE501342,0xFE6C2105,0xFE6C231A,0xFE1403DA,0xD80000EA,0xA20000B9,0x88000009,0x7A000236,0xFE2412A6,0xFC00038C,0x7E0006CC,0x5C000F2A,0x10C29AD,0xA8152B,0xFE78053B,0xAE740482,0x88700482,0xFE440629,0xB8400006,0x8C4C007A,0x90440621,0x7E3C0131,0x6C400621,0xF8152B,0xC00004F9,0x88240480,0x98000789,0x800000B5, -0x6C000629,0x1FC152B,0x72000984,0x60000B40,0x5200152C,0xF8152B,0xC00004F9,0x88240480,0x98000789,0x800000B5,0x6C000629,0x1FC152B,0x72000984,0x60000B40,0x5200152C,0x1FC152B,0x72000984,0x60000B40,0x5200152C,0x5200152C,0xFC740B5A,0xF4981121,0xF8A010E6,0xFE240355,0xD80000EA,0xA20000B9,0x88000009,0x7A000236,0xFE440B94,0xFC00028C,0x7E00068C,0x60000B40, -0x164152B,0x4C0CF6,0x4C0CF6,0x4C0CF6,0x4C0CF6,0xD4200221,0xD4200221,0xD4200221,0x70200221,0x70200221,0x50200221,0xAC000480,0xAC000480,0xAC000480,0x7400001D,0x7400001D,0x52040018,0x54000480,0x54000480,0x44000139,0x38000480,0x700CF6,0x700CF6,0x700CF6,0x620004C5,0x620004C5,0x4C0002C1,0x4C000723,0x4C000723,0x40000332,0x340005C1,0xE40CF6, -0xE40CF6,0x380007FD,0x2C000924,0x24000CF9,0xFE2C0498,0xFA4408C6,0x4C0CF6,0xFE080098,0xBC000029,0x9200002D,0x84000008,0x6A000068,0xFE000452,0xD4000186,0x660004AC,0x40000332,0xA00CF6,0x700482,0x700482,0x700482,0x700482,0xB0440001,0xB0440001,0xB0440001,0x68400001,0x68400001,0x50400001,0xA80480,0xA80480,0xA80480,0x7400001D,0x7400001D, -0x50140000,0x1580480,0x1580480,0x44000139,0x38000480,0xA80480,0xA80480,0xA80480,0x7400001D,0x7400001D,0x50140000,0x1580480,0x1580480,0x44000139,0x38000480,0x1580480,0x1580480,0x44000139,0x38000480,0x38000480,0xF64801E1,0xFA640265,0x700482,0xFE080088,0xBC000029,0x9200002D,0x84000008,0x6A000068,0xF6240221,0xDA0000CA,0xF00480,0x44000139, -0xF00480,0xD80622,0xF8A40001,0xA4A00001,0x88A00001,0x1440620,0xBC380001,0x886C0000,0x27F80620,0x84000050,0x6C000620,0x1440620,0xBC380001,0x886C0000,0x27F80620,0x84000050,0x6C000620,0x27F80620,0x84000050,0x6C000620,0x6C000620,0x1440620,0xBC380001,0x886C0000,0x27F80620,0x84000050,0x6C000620,0x27F80620,0x84000050,0x6C000620,0x6C000620,0x27F80620, -0x84000050,0x6C000620,0x6C000620,0x6C000620,0xFE9403E8,0xE80620,0xFECC042A,0xFE400128,0xE000006D,0xA6000050,0x88000000,0x7E0000F4,0xF88403C8,0xFE1400E1,0x90100001,0x6C000620,0x1D00620,0x200221,0x200221,0x200221,0x200221,0x200221,0x200221,0x200221,0x200221,0x200221,0x200221,0x64000000,0x64000000,0x64000000,0x64000000,0x64000000, -0x64000000,0x30000000,0x30000000,0x30000000,0x20000000,0x300221,0x300221,0x300221,0x300221,0x300221,0x300221,0x280000C2,0x280000C2,0x280000C2,0x1E000068,0x5C0221,0x5C0221,0x5C0221,0x16000145,0x10000221,0xFE0C009D,0x200221,0x200221,0xE8000000,0xA0000000,0x7A000000,0x7A000000,0x50000000,0xC40000A9,0x96000055,0x3E000010,0x280000C2, -0x400221,}; -static const uint32_t g_etc1_to_bc7_m6_table168[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0x7C0000, -0x7C0000,0xFC0000,0xFC0000,0xFC0000,0x28000001,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0xFC0000,0xFC0000,0xFC0000,0x28000001,0xFC0000,0xFC0000,0xFC0000,0x28000001,0x28000001,0x4580000,0x540000,0x540000,0x2600000,0x680000,0x2700000,0x2700000,0x8C0000,0x2600000,0x680000,0xB00000,0xFC0000, -0xB00000,0xB40000,0xB40000,0xB40000,0xB40000,0x10C0000,0x10C0000,0x10C0000,0xBF80000,0xBF80000,0x58000001,0x10C0000,0x10C0000,0x10C0000,0xBF80000,0xBF80000,0x58000001,0xBF80000,0xBF80000,0x58000001,0x58000001,0x10C0000,0x10C0000,0x10C0000,0xBF80000,0xBF80000,0x58000001,0xBF80000,0xBF80000,0x58000001,0x58000001,0xBF80000, -0xBF80000,0x58000001,0x58000001,0x58000001,0xD00000,0xC00000,0xB40000,0xF40000,0x32C0000,0x17C0000,0x1B80000,0x1FF80000,0x2E00000,0x10C0000,0x17C0000,0x58000001,0x17C0000,0x1240000,0x3B00000,0x5DFC0000,0x90000001,0x3B00000,0x5DFC0000,0x90000001,0x5DFC0000,0x90000001,0x90000001,0x3B00000,0x5DFC0000,0x90000001,0x5DFC0000,0x90000001, -0x90000001,0x5DFC0000,0x90000001,0x90000001,0x90000001,0x3B00000,0x5DFC0000,0x90000001,0x5DFC0000,0x90000001,0x90000001,0x5DFC0000,0x90000001,0x90000001,0x90000001,0x5DFC0000,0x90000001,0x90000001,0x90000001,0x90000001,0x36C0000,0x1380000,0x1380000,0x1EC0000,0x49F80000,0x77F40000,0x90000001,0x90000001,0x18C0000,0x17FC0000,0x8BE80000,0x90000001, -0x33FC0000,0x8C2E54,0xFE681371,0xC2580FD9,0x90580FD9,0xFE3C0BBC,0xD42C039D,0x9A3805B5,0xA42C0994,0x8A240406,0x74280996,0xFE0C1590,0xD20004A2,0x961805A9,0xA80006E9,0x8C000042,0x7404062A,0x8A00152B,0x780008B3,0x6C000A83,0x5C00152B,0xCC2E54,0xB40014BB,0x8E000FFC,0x980011E6,0x8000080E,0x6E000BB6,0x80001C6F,0x74000EBF,0x66000F3F,0x5800189C,0x1A02E54, -0x60001E64,0x5C001B96,0x50002227,0x44002E54,0xFE58170A,0xF88025CC,0xFA842794,0xFE2405E6,0xEA000061,0xB0000042,0x9204000A,0x86000173,0xFE3415FA,0xFE00042C,0x90000627,0x66000F3F,0x1242E54,0xB8152C,0xFE8C0581,0xB8840480,0x90840481,0xFE5C0642,0xC2540005,0x945C007D,0x9A540620,0x864C0132,0x74540622,0x114152B,0xD20004A2,0x90380481,0xA80006E9,0x8C000042, -0x740C0622,0xFF8152B,0x780008B3,0x6C000A83,0x5C00152B,0x114152B,0xD20004A2,0x90380481,0xA80006E9,0x8C000042,0x740C0622,0xFF8152B,0x780008B3,0x6C000A83,0x5C00152B,0xFF8152B,0x780008B3,0x6C000A83,0x5C00152B,0x5C00152B,0xFE800BBE,0xFEAC1122,0xFEAC1101,0xFE4003CB,0xEA000061,0xB0000042,0x92080002,0x86000173,0xFE5C0BD5,0xFE1002E5,0x900005D6,0x6C000A83, -0x18C152B,0x580FD8,0x580FD8,0x580FD8,0x580FD8,0xE42C0374,0xE42C0374,0xE42C0374,0x7A2C0374,0x7A2C0374,0x58280375,0xC8000480,0xC8000480,0xC8000480,0x82000005,0x82000005,0x5C0C005E,0x60000482,0x60000482,0x500000E1,0x40000482,0x2800FD8,0x2800FD8,0x2800FD8,0x6E000615,0x6E000615,0x520003EB,0x58000811,0x58000811,0x4C00037E,0x4000063B,0x1080FD8, -0x1080FD8,0x3E0009BD,0x32000AC6,0x2A000FDB,0xFC380694,0xFE4C0B58,0x580FD8,0xFE100141,0xDA000005,0xA6000005,0x98000002,0x7600002D,0xFE1405F9,0xF6000172,0x7C0004C0,0x4C00037E,0xB80FD8,0x840480,0x840480,0x840480,0x840480,0xBA540000,0xBA540000,0xBA540000,0x70540000,0x70540000,0x58540001,0xC40480,0xC40480,0xC40480,0x82000005,0x82000005, -0x5A240001,0x18C0480,0x18C0480,0x500000E1,0x40000482,0xC40480,0xC40480,0xC40480,0x82000005,0x82000005,0x5A240001,0x18C0480,0x18C0480,0x500000E1,0x40000482,0x18C0480,0x18C0480,0x500000E1,0x40000482,0x40000482,0xFE5801E1,0xF4780288,0x840480,0xFE2400A2,0xDA000005,0xA6000005,0x96040000,0x7600002D,0xFE340221,0xFC000082,0x1180480,0x500000E1, -0x1180480,0xEC0620,0xFEB40004,0xACB40000,0x90B40001,0x35C0620,0xC44C0001,0x927C0001,0x33FC0620,0x8E00002D,0x74000622,0x35C0620,0xC44C0001,0x927C0001,0x33FC0620,0x8E00002D,0x74000622,0x33FC0620,0x8E00002D,0x74000622,0x74000622,0x35C0620,0xC44C0001,0x927C0001,0x33FC0620,0x8E00002D,0x74000622,0x33FC0620,0x8E00002D,0x74000622,0x74000622,0x33FC0620, -0x8E00002D,0x74000622,0x74000622,0x74000622,0xFEB403F5,0xFC0620,0xFAE40451,0xFE580164,0xEA00003D,0xB2000022,0x90140001,0x8A0000C1,0xFE9803C8,0xFE380120,0x9A200000,0x74000622,0x1F40620,0x280374,0x280374,0x280374,0x280374,0x280374,0x280374,0x280374,0x280374,0x280374,0x280374,0x7E000000,0x7E000000,0x7E000000,0x7E000000,0x7E000000, -0x7E000000,0x3E000000,0x3E000000,0x3E000000,0x28000001,0x23C0372,0x23C0372,0x23C0372,0x23C0372,0x23C0372,0x23C0372,0x2E000145,0x2E000145,0x2E000145,0x240000A9,0x7C0372,0x7C0372,0x7C0372,0x1C000212,0x14000372,0xF61C016D,0x280374,0x280374,0xFE040014,0xCC000000,0x9C000000,0x9C000000,0x66000000,0xF6000112,0xB4000092,0x4E000019,0x2E000145, -0x580372,}; -static const uint32_t g_etc1_to_bc7_m6_table169[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x940000,0x940000,0x940000,0x940000,0x940000, -0x940000,0x12C0000,0x12C0000,0x12C0000,0x30000001,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x12C0000,0x12C0000,0x12C0000,0x30000001,0x12C0000,0x12C0000,0x12C0000,0x30000001,0x30000001,0xC680000,0x640000,0x640000,0x740000,0x7C0000,0x880000,0x880000,0xA80000,0x740000,0x7C0000,0xD40000,0x12C0000, -0xD40000,0xC40000,0xC40000,0xC40000,0xC40000,0x1240000,0x1240000,0x1240000,0x17F80000,0x17F80000,0x60000001,0x1240000,0x1240000,0x1240000,0x17F80000,0x17F80000,0x60000001,0x17F80000,0x17F80000,0x60000001,0x60000001,0x1240000,0x1240000,0x1240000,0x17F80000,0x17F80000,0x60000001,0x17F80000,0x17F80000,0x60000001,0x60000001,0x17F80000, -0x17F80000,0x60000001,0x60000001,0x60000001,0xE40000,0xD00000,0xC40000,0x3080000,0x1480000,0x1A00000,0x1E00000,0x29FC0000,0x2F40000,0x1240000,0x1A00000,0x60000001,0x1A00000,0x1340000,0x1C80000,0x69FC0000,0x98000001,0x1C80000,0x69FC0000,0x98000001,0x69FC0000,0x98000001,0x98000001,0x1C80000,0x69FC0000,0x98000001,0x69FC0000,0x98000001, -0x98000001,0x69FC0000,0x98000001,0x98000001,0x98000001,0x1C80000,0x69FC0000,0x98000001,0x69FC0000,0x98000001,0x98000001,0x69FC0000,0x98000001,0x98000001,0x98000001,0x69FC0000,0x98000001,0x98000001,0x98000001,0x98000001,0x3800000,0x1480000,0x1480000,0x7FC0000,0x55FC0000,0x81F40000,0x98000001,0x98000001,0x1A40000,0x29FC0000,0x93F80000,0x98000001, -0x41FC0000,0x9832DC,0xFE741765,0xCC6412C4,0x986412C5,0xFE440E76,0xDE340525,0xA0440799,0xAE340B04,0x942C055E,0x7C300B06,0xFE18169C,0xE2000481,0xA0200679,0xB4000691,0x96000012,0x7C080656,0x9600152B,0x840007FB,0x720009EB,0x6400152B,0xE032DC,0xC0001733,0x980012C5,0xA2001322,0x8C000906,0x7A000CD6,0x8C001DCF,0x7C000F22,0x6E000F63,0x62001933,0x1C432DC, -0x6C00213C,0x60001E14,0x5600247F,0x4A0032DC,0xFE641ADA,0xFE8C29F4,0xFE8C2C04,0xFE2C081A,0xFA00002A,0xBC00000D,0x9C08003B,0x900000E2,0xFE4419D3,0xFE100626,0x960005A9,0x6E000F63,0x13C32DC,0xC8152C,0xFEA405E1,0xC0940480,0x98940481,0xFE740672,0xCA640005,0x9C6C007D,0xA2640620,0x8E5C0132,0x7C640622,0x12C152B,0xE2000481,0x98480481,0xB4000691,0x96000012, -0x7C1C0622,0x1BF8152B,0x840007FB,0x720009EB,0x6400152B,0x12C152B,0xE2000481,0x98480481,0xB4000691,0x96000012,0x7C1C0622,0x1BF8152B,0x840007FB,0x720009EB,0x6400152B,0x1BF8152B,0x840007FB,0x720009EB,0x6400152B,0x6400152B,0xFE940C03,0xF6BC1178,0xFAC41144,0xFE540456,0xFA00002A,0xBC00000D,0x9A180002,0x900000E2,0xFE780C41,0xFE240375,0x9A000551,0x720009EB, -0x1AC152B,0x6412C4,0x6412C4,0x6412C4,0x6412C4,0xF43404E4,0xF43404E4,0xF43404E4,0x843404E4,0x843404E4,0x603004E5,0xE0000480,0xE0000480,0xE0000480,0x90000005,0x90000005,0x641000C2,0x6C000482,0x6C000482,0x5C0000A9,0x48000482,0x9012C3,0x9012C3,0x9012C3,0x7A000785,0x7A000785,0x5E000533,0x620008E2,0x620008E2,0x520003DE,0x460006A3,0x12412C3, -0x12412C3,0x44000B9D,0x3E000C56,0x300012C3,0xFE3C08B8,0xF4580E45,0x6412C4,0xFE1C0258,0xF0000001,0xB8000001,0xA6040018,0x8000000D,0xFE200802,0xFE0001C8,0x860004C8,0x520003DE,0xD012C3,0x940480,0x940480,0x940480,0x940480,0xC2640000,0xC2640000,0xC2640000,0x78640000,0x78640000,0x60640001,0xDC0480,0xDC0480,0xDC0480,0x8C0C0001,0x8C0C0001, -0x62340001,0x1BC0480,0x1BC0480,0x5C0000A9,0x48000482,0xDC0480,0xDC0480,0xDC0480,0x8C0C0001,0x8C0C0001,0x62340001,0x1BC0480,0x1BC0480,0x5C0000A9,0x48000482,0x1BC0480,0x1BC0480,0x5C0000A9,0x48000482,0x48000482,0xFA6C0200,0xFC880288,0x940480,0xFE3400B9,0xEE040000,0xB6040000,0x9E140000,0x8000000D,0xFA480244,0xFC140091,0x1380480,0x5C0000A9, -0x1380480,0xFC0620,0xFEC8000D,0xB4C40000,0x98C40001,0x3740620,0xCC5C0001,0x9A8C0001,0x3FFC0620,0x96000012,0x7C000622,0x3740620,0xCC5C0001,0x9A8C0001,0x3FFC0620,0x96000012,0x7C000622,0x3FFC0620,0x96000012,0x7C000622,0x7C000622,0x3740620,0xCC5C0001,0x9A8C0001,0x3FFC0620,0x96000012,0x7C000622,0x3FFC0620,0x96000012,0x7C000622,0x7C000622,0x3FFC0620, -0x96000012,0x7C000622,0x7C000622,0x7C000622,0xFAC80422,0x10C0620,0xFEEC0469,0xFE740190,0xF8040029,0xBC00000D,0x98240001,0x90000091,0xFEAC03F5,0xFE4C013D,0xA2300000,0x7C000622,0xDFC0620,0x3004E4,0x3004E4,0x3004E4,0x3004E4,0x3004E4,0x3004E4,0x3004E4,0x3004E4,0x3004E4,0x3004E4,0x96000000,0x96000000,0x96000000,0x96000000,0x96000000, -0x96000000,0x4A000000,0x4A000000,0x4A000000,0x30000001,0x4804E2,0x4804E2,0x4804E2,0x4804E2,0x4804E2,0x4804E2,0x3A0001CD,0x3A0001CD,0x3A0001CD,0x2E0000EA,0x9404E2,0x9404E2,0x9404E2,0x20000305,0x180004E2,0xFA240265,0x3004E4,0x3004E4,0xFE100071,0xF4000000,0xBA000000,0xBA000000,0x7A000000,0xF60001C2,0xD60000CD,0x5E000024,0x3A0001CD, -0x6804E2,}; -static const uint32_t g_etc1_to_bc7_m6_table170[] = { -0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x80000, -0x80000,0x80000,0x80000,0x1,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x80000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000, -0xAC0000,0x15C0000,0x15C0000,0x15C0000,0x38000001,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0xAC0000,0x15C0000,0x15C0000,0x15C0000,0x38000001,0x15C0000,0x15C0000,0x15C0000,0x38000001,0x38000001,0x7C0000,0x740000,0x740000,0x6840000,0x900000,0x9C0000,0x9C0000,0x2C00000,0x6840000,0x900000,0xF40000,0x15C0000, -0xF40000,0xD40000,0xD40000,0xD40000,0xD40000,0x13C0000,0x13C0000,0x13C0000,0x21FC0000,0x21FC0000,0x68000001,0x13C0000,0x13C0000,0x13C0000,0x21FC0000,0x21FC0000,0x68000001,0x21FC0000,0x21FC0000,0x68000001,0x68000001,0x13C0000,0x13C0000,0x13C0000,0x21FC0000,0x21FC0000,0x68000001,0x21FC0000,0x21FC0000,0x68000001,0x68000001,0x21FC0000, -0x21FC0000,0x68000001,0x68000001,0x68000001,0x4F40000,0x8E00000,0xD40000,0x1200000,0x1640000,0x1C00000,0x5FC0000,0x35F80000,0x3080000,0x13C0000,0x1C00000,0x68000001,0x1C00000,0x1440000,0x1E00000,0x75FC0000,0xA0000001,0x1E00000,0x75FC0000,0xA0000001,0x75FC0000,0xA0000001,0xA0000001,0x1E00000,0x75FC0000,0xA0000001,0x75FC0000,0xA0000001, -0xA0000001,0x75FC0000,0xA0000001,0xA0000001,0xA0000001,0x1E00000,0x75FC0000,0xA0000001,0x75FC0000,0xA0000001,0xA0000001,0x75FC0000,0xA0000001,0xA0000001,0xA0000001,0x75FC0000,0xA0000001,0xA0000001,0xA0000001,0xA0000001,0x3940000,0x5580000,0x5580000,0x1BFC0000,0x63FC0000,0x8BF40000,0xA0000001,0xA0000001,0x1B80000,0x39FC0000,0x9DCC0000,0xA0000001, -0x51FC0000,0xA43680,0xFE801AD5,0xD470152D,0xA070152D,0xFE5C1112,0xE440067D,0xAA4C0931,0xB83C0C40,0x9A38068A,0x843C0C42,0xFE3017F0,0xF0080485,0xAA280735,0xC6000655,0xA0080012,0x86100686,0xA004152B,0x90000767,0x7E00095F,0x6C04152B,0xF43680,0xD20018EB,0xA204152C,0xAE0013F2,0x9800099A,0x80000D92,0x92001E87,0x86000EDF,0x76000F1F,0x68001953,0x1F03680, -0x7200234C,0x6C001F94,0x5C00261B,0x50003684,0xFE781E55,0xFE8C2DE0,0xF8A02FFD,0xFE400A6F,0xFE0C0052,0xC804000E,0xA4100086,0x98000089,0xFE541CF7,0xFE18084E,0xA4000539,0x76000F1F,0x15C3680,0xD8152C,0xFEB00631,0xC8A40480,0xA0A40481,0xFE8006A6,0xD2740005,0xA47C007D,0xAA740620,0x966C0132,0x84740622,0x144152B,0xEA100481,0xA0580481,0xC6000651,0xA008000E, -0x842C0622,0x27F8152B,0x90000763,0x7E00095B,0x6C00152B,0x144152B,0xEA100481,0xA0580481,0xC6000651,0xA008000E,0x842C0622,0x27F8152B,0x90000763,0x7E00095B,0x6C00152B,0x27F8152B,0x90000763,0x7E00095B,0x6C00152B,0x6C00152B,0xFEA00C86,0xFECC1178,0xFECC1164,0xFE6C04E5,0xFE100035,0xC410000D,0xA2280002,0x98000085,0xFE880CA0,0xFE4003DB,0xA40004F9,0x7E00095B, -0x1D0152B,0x70152C,0x70152C,0x70152C,0x70152C,0xFE3C0624,0xFE3C0624,0xFE3C0624,0x8E3C0620,0x8E3C0620,0x683C0621,0xF4040480,0xF4040480,0xF4040480,0x9E040011,0x9E040011,0x6C180131,0x78040480,0x78040480,0x64000081,0x50040482,0xA4152B,0xA4152B,0xA4152B,0x86000889,0x86000889,0x68000641,0x6E000966,0x6E000966,0x5E0003E6,0x4C0006C3,0x14C152B, -0x14C152B,0x4A000D01,0x44000D62,0x3600152B,0xFE4C0A8D,0xFA641079,0x70152C,0xFE2C037D,0xFE08000D,0xC608000C,0xB6080038,0x8E040001,0xFC3009DD,0xFE0002CC,0x960004B9,0x5E0003E6,0xE8152B,0xA40480,0xA40480,0xA40480,0xA40480,0xCA740000,0xCA740000,0xCA740000,0x80740000,0x80740000,0x68740001,0xF40480,0xF40480,0xF40480,0x941C0001,0x941C0001, -0x6A440001,0x1F00480,0x1F00480,0x6400007D,0x50000482,0xF40480,0xF40480,0xF40480,0x941C0001,0x941C0001,0x6A440001,0x1F00480,0x1F00480,0x6400007D,0x50000482,0x1F00480,0x1F00480,0x6400007D,0x50000482,0x50000482,0xFE740220,0xF49802AD,0xA40480,0xFE4400DA,0xF6140000,0xBE140000,0xA6240000,0x8E000001,0xF2600265,0xFE2400A4,0x15C0480,0x6400007D, -0x15C0480,0x10C0620,0xFEDC0020,0xBCD40000,0xA0D40001,0x38C0620,0xD46C0001,0xA29C0001,0x4BFC0620,0xA0000005,0x84000622,0x38C0620,0xD46C0001,0xA29C0001,0x4BFC0620,0xA0000005,0x84000622,0x4BFC0620,0xA0000005,0x84000622,0x84000622,0x38C0620,0xD46C0001,0xA29C0001,0x4BFC0620,0xA0000005,0x84000622,0x4BFC0620,0xA0000005,0x84000622,0x84000622,0x4BFC0620, -0xA0000005,0x84000622,0x84000622,0x84000622,0xFED00442,0x71C0620,0xFB040480,0xFC9001C4,0xFE100035,0xCA000004,0xA0340001,0x9C00006A,0xF4C80422,0xFE640179,0xAA400000,0x84000622,0x1DF80620,0x3C0620,0x3C0620,0x3C0620,0x3C0620,0x3C0620,0x3C0620,0x3C0620,0x3C0620,0x3C0620,0x3C0620,0xAC040000,0xAC040000,0xAC040000,0xAC040000,0xAC040000, -0xAC040000,0x54040000,0x54040000,0x54040000,0x38040001,0x580620,0x580620,0x580620,0x580620,0x580620,0x580620,0x4600021D,0x4600021D,0x4600021D,0x340000FA,0xB00620,0xB00620,0xB00620,0x260003A9,0x1C000622,0xFE2C0349,0x3C0620,0x3C0620,0xFA1800F4,0xFC08000D,0xD2040000,0xD2040000,0x8A040000,0xFA08028A,0xFE0000E8,0x66000019,0x4600021D, -0x7C0620,}; -static const uint32_t g_etc1_to_bc7_m6_table171[] = { -0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x380000, -0x380000,0x380000,0x380000,0x8000001,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x2140000,0x2140000,0x2140000,0x1C0000,0x280000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000, -0xC40000,0x18C0000,0x18C0000,0x18C0000,0x40000001,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0x18C0000,0x18C0000,0x18C0000,0x40000001,0x18C0000,0x18C0000,0x18C0000,0x40000001,0x40000001,0x8C0000,0x840000,0x840000,0x2980000,0xA40000,0xB40000,0xB40000,0xDC0000,0x2980000,0xA40000,0x1180000,0x18C0000, -0x1180000,0xE40000,0xE40000,0xE40000,0xE40000,0x3500000,0x3500000,0x3500000,0x2DFC0000,0x2DFC0000,0x70000001,0x3500000,0x3500000,0x3500000,0x2DFC0000,0x2DFC0000,0x70000001,0x2DFC0000,0x2DFC0000,0x70000001,0x70000001,0x3500000,0x3500000,0x3500000,0x2DFC0000,0x2DFC0000,0x70000001,0x2DFC0000,0x2DFC0000,0x70000001,0x70000001,0x2DFC0000, -0x2DFC0000,0x70000001,0x70000001,0x70000001,0x1080000,0xF40000,0xE40000,0x3340000,0x1800000,0x1E40000,0x13F80000,0x3FFC0000,0x31C0000,0x3500000,0x1E40000,0x70000001,0x1E40000,0x1540000,0x1F80000,0x81FC0000,0xA8000001,0x1F80000,0x81FC0000,0xA8000001,0x81FC0000,0xA8000001,0xA8000001,0x1F80000,0x81FC0000,0xA8000001,0x81FC0000,0xA8000001, -0xA8000001,0x81FC0000,0xA8000001,0xA8000001,0xA8000001,0x1F80000,0x81FC0000,0xA8000001,0x81FC0000,0xA8000001,0xA8000001,0x81FC0000,0xA8000001,0xA8000001,0xA8000001,0x81FC0000,0xA8000001,0xA8000001,0xA8000001,0xA8000001,0x3A80000,0xD680000,0xD680000,0x2FFC0000,0x71FC0000,0x95F40000,0xA8000001,0xA8000001,0x3CC0000,0x4BFC0000,0xA5DC0000,0xA8000001, -0x5FFC0000,0xB43680,0xFE8C1BA1,0xDC80152D,0xA880152D,0xFE6811EA,0xEC50067D,0xB25C0931,0xC04C0C40,0xA248068A,0x8C4C0C42,0xFE441883,0xF8180485,0xB2380735,0xCC0C0651,0xA8180012,0x8E200686,0xA814152B,0x98040733,0x86040933,0x7414152B,0x10C3680,0xE20017A1,0xAA14152C,0xC0001242,0xA2000823,0x8C000CC2,0xA2001CE9,0x92000CD7,0x80000D2B,0x7000182C,0xBF83680, -0x7E0021E4,0x72001DD4,0x660024F4,0x58003684,0xFE8C1F58,0xFCA82E0A,0xFEAC3001,0xFE540B96,0xFE2000B6,0xD014000E,0xAC200086,0xA20C0082,0xFE701E35,0xFE300991,0xB00004B1,0x80000D2B,0x17C3680,0xE8152C,0xFEC40690,0xD0B40480,0xA8B40481,0xFE9806E6,0xDA840005,0xAC8C007D,0xB2840620,0x9E7C0132,0x8C840622,0x15C152B,0xF2200481,0xA8680481,0xD2000629,0xA818000E, -0x8C3C0622,0x33F8152B,0x960006DB,0x840008D3,0x7400152B,0x15C152B,0xF2200481,0xA8680481,0xD2000629,0xA818000E,0x8C3C0622,0x33F8152B,0x960006DB,0x840008D3,0x7400152B,0x33F8152B,0x960006DB,0x840008D3,0x7400152B,0x7400152B,0xFEB40CC9,0xF6DC11D2,0xFAE411A1,0xFE78057E,0xFE280069,0xCC20000D,0xAA380002,0xA400004B,0xFE980D11,0xFE54047D,0xB00004B0,0x840008D3, -0x1F0152B,0x80152C,0x80152C,0x80152C,0x80152C,0xFE500631,0xFE500631,0xFE500631,0x964C0620,0x964C0620,0x704C0621,0xFC140480,0xFC140480,0xFC140480,0xA6140011,0xA6140011,0x74280131,0x80140480,0x80140480,0x6C0C007A,0x58140482,0xBC152B,0xBC152B,0xBC152B,0x980007B9,0x980007B9,0x70040621,0x7A000866,0x7A000866,0x680002AA,0x580005EB,0x17C152B, -0x17C152B,0x56000C19,0x4A000C82,0x3E00152B,0xFC640B12,0xFE6C10C1,0x80152C,0xFE3C03E0,0xFE1C001D,0xCE18000C,0xBE180038,0x96140001,0xFE3C0A38,0xFE18031E,0xA8000481,0x680002AA,0x10C152B,0xB40480,0xB40480,0xB40480,0xB40480,0xD2840000,0xD2840000,0xD2840000,0x88840000,0x88840000,0x70840001,0x10C0480,0x10C0480,0x10C0480,0x9C2C0001,0x9C2C0001, -0x72540001,0xBF80480,0xBF80480,0x6C00004A,0x58000482,0x10C0480,0x10C0480,0x10C0480,0x9C2C0001,0x9C2C0001,0x72540001,0xBF80480,0xBF80480,0x6C00004A,0x58000482,0xBF80480,0xBF80480,0x6C00004A,0x58000482,0x58000482,0xFE900221,0xFCA802AD,0xB40480,0xFE5C00E1,0xFE240000,0xC6240000,0xAE340000,0x980C0000,0xFA700265,0xFA4000C8,0x17C0480,0x6C00004A, -0x17C0480,0x11C0620,0xFEEC0041,0xC4E40000,0xA8E40001,0x3A40620,0xDC7C0001,0xAAAC0001,0x57FC0620,0xA8000001,0x8C000622,0x3A40620,0xDC7C0001,0xAAAC0001,0x57FC0620,0xA8000001,0x8C000622,0x57FC0620,0xA8000001,0x8C000622,0x8C000622,0x3A40620,0xDC7C0001,0xAAAC0001,0x57FC0620,0xA8000001,0x8C000622,0x57FC0620,0xA8000001,0x8C000622,0x8C000622,0x57FC0620, -0xA8000001,0x8C000622,0x8C000622,0x8C000622,0xFAF00451,0xF2C0620,0xFF0C04A0,0xFEA001F9,0xFE380055,0xD4040000,0xA8440001,0xA400004A,0xFCD80422,0xFE8401A5,0xB2500000,0x8C000622,0x2BFC0620,0x4C0620,0x4C0620,0x4C0620,0x4C0620,0x4C0620,0x4C0620,0x4C0620,0x4C0620,0x4C0620,0x4C0620,0xB4140000,0xB4140000,0xB4140000,0xB4140000,0xB4140000, -0xB4140000,0x5C140000,0x5C140000,0x5C140000,0x40140001,0x700620,0x700620,0x700620,0x700620,0x700620,0x700620,0x52000185,0x52000185,0x52000185,0x4000007A,0xE40620,0xE40620,0xE40620,0x32000321,0x24000622,0xF63C0374,0x4C0620,0x4C0620,0xFE2C0109,0xFC1C0019,0xDA140000,0xDA140000,0x92140000,0xFC1C02AD,0xFA0C00DD,0x7A040000,0x52000185, -0xA00620,}; -static const uint32_t g_etc1_to_bc7_m6_table172[] = { -0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x700000, -0x700000,0x700000,0x700000,0x12000000,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x280000,0x280000,0x280000,0x380000,0x500000,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000, -0x2DC0000,0x1C40000,0x1C40000,0x1C40000,0x4A000000,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x1C40000,0x1C40000,0x1C40000,0x4A000000,0x1C40000,0x1C40000,0x1C40000,0x4A000000,0x4A000000,0xA00000,0x940001,0x940001,0x4AC0000,0xBC0000,0xCC0000,0xCC0000,0xFC0000,0x4AC0000,0xBC0000,0x13C0000,0x1C40000, -0x13C0000,0xF40001,0xF40001,0xF40001,0xF40001,0x16C0000,0x16C0000,0x16C0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x16C0000,0x16C0000,0x16C0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x3BFC0000,0x3BFC0000,0x7A000000,0x7A000000,0x16C0000,0x16C0000,0x16C0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x3BFC0000,0x3BFC0000,0x7A000000,0x7A000000,0x3BFC0000, -0x3BFC0000,0x7A000000,0x7A000000,0x7A000000,0x51C0000,0xB040000,0xF40001,0x34C0000,0x19C0000,0x7FC0000,0x21FC0000,0x4BFC0000,0x1340000,0x16C0000,0x7FC0000,0x7A000000,0x7FC0000,0x1640001,0x19FC0000,0x8FF80000,0xB2000000,0x19FC0000,0x8FF80000,0xB2000000,0x8FF80000,0xB2000000,0xB2000000,0x19FC0000,0x8FF80000,0xB2000000,0x8FF80000,0xB2000000, -0xB2000000,0x8FF80000,0xB2000000,0xB2000000,0xB2000000,0x19FC0000,0x8FF80000,0xB2000000,0x8FF80000,0xB2000000,0xB2000000,0x8FF80000,0xB2000000,0xB2000000,0xB2000000,0x8FF80000,0xB2000000,0xB2000000,0xB2000000,0xB2000000,0x1C00000,0x77C0000,0x77C0000,0x45FC0000,0x7FFC0000,0xA1F00000,0xB2000000,0xB2000000,0x1E80000,0x5DFC0000,0xAFD00000,0xB2000000, -0x71FC0000,0xC43684,0xFEA41C83,0xE690152B,0xB290152B,0xFE801302,0xF664067B,0xBA6C0933,0xC8600C42,0xAC5C0686,0x965C0C42,0xFE5C1959,0xFE28048A,0xBA480733,0xD620064F,0xB0280012,0x9630068A,0xB224152C,0xA0140735,0x8E140931,0x7C24152D,0x3243680,0xF400169F,0xB228152B,0xD20010AA,0xAE000711,0x94000C50,0xAE001B7F,0x9E000AD1,0x8C000B51,0x7A0016F9,0x17FC3680, -0x8A00206C,0x7E001C08,0x720023A4,0x62003680,0xFEA0204E,0xF6BC2EC4,0xF8C0308C,0xFE6C0D33,0xFE38015B,0xDA28000E,0xB6340082,0xAA1C0086,0xFE781F23,0xFE440AEA,0xBA0C049F,0x8C000B51,0x1A43680,0xFC152B,0xFED40722,0xD8C80482,0xB2C40482,0xFEB00749,0xE2940006,0xB6A0007A,0xBA980621,0xA8900131,0x96940621,0x374152B,0xFC300481,0xB2780480,0xDE080621,0xB0280011, -0x964C0620,0x3FFC152B,0xA6000662,0x9000084C,0x7C00152C,0x374152B,0xFC300481,0xB2780480,0xDE080621,0xB0280011,0x964C0620,0x3FFC152B,0xA6000662,0x9000084C,0x7C00152C,0x3FFC152B,0xA6000662,0x9000084C,0x7C00152C,0x7C00152C,0xFED00D38,0xFEEC11D9,0xFEEC11E2,0xFE940602,0xFE4800C6,0xD630000D,0xB24C0001,0xAE04003D,0xFEB40D8E,0xFE700541,0xBC000486,0x9000084C, -0xDFC152B,0x90152B,0x90152B,0x90152B,0x90152B,0xFE600653,0xFE600653,0xFE600653,0x9E600622,0x9E600622,0x7A5C0622,0xFE280489,0xFE280489,0xFE280489,0xB028000E,0xB028000E,0x7E3C0132,0x88280481,0x88280481,0x741C007D,0x62240481,0xD4152B,0xD4152B,0xD4152B,0xA80006F1,0xA80006F1,0x7A140620,0x86000771,0x86000771,0x7400019D,0x62000529,0x1B0152B, -0x1B0152B,0x60000B4C,0x56000B91,0x4800152C,0xFE740B63,0xFC8810CB,0x90152B,0xFE54045A,0xFE300042,0xD628000B,0xC628003B,0x9E240002,0xFE500A8E,0xFE30038A,0xB2100480,0x7400019D,0x130152B,0xC40482,0xC40482,0xC40482,0xC40482,0xDA980001,0xDA980001,0xDA980001,0x92940001,0x92940001,0x7A940001,0x3240480,0x3240480,0x3240480,0xA63C0001,0xA63C0001, -0x7A680000,0x17FC0480,0x17FC0480,0x78000028,0x62000480,0x3240480,0x3240480,0x3240480,0xA63C0001,0xA63C0001,0x7A680000,0x17FC0480,0x17FC0480,0x78000028,0x62000480,0x17FC0480,0x17FC0480,0x78000028,0x62000480,0x62000480,0xFEA00244,0xF6BC02D2,0xC40482,0xFE780109,0xFC3C0004,0xD0340000,0xB6480000,0xA21C0000,0xF8880288,0xFC5800DD,0x1A40480,0x78000028, -0x1A40480,0x12C0622,0xFF00006A,0xCEF40001,0xB2F40001,0x1C00620,0xE68C0001,0xB2C00000,0x65F80620,0xB2100000,0x96000620,0x1C00620,0xE68C0001,0xB2C00000,0x65F80620,0xB2100000,0x96000620,0x65F80620,0xB2100000,0x96000620,0x96000620,0x1C00620,0xE68C0001,0xB2C00000,0x65F80620,0xB2100000,0x96000620,0x65F80620,0xB2100000,0x96000620,0x96000620,0x65F80620, -0xB2100000,0x96000620,0x96000620,0x96000620,0xFEF80479,0x9400620,0xFD2804B1,0xFEC00225,0xFE500089,0xDC180000,0xB2500000,0xAC000034,0xFEF00451,0xFEA001E5,0xBA640001,0x96000620,0x3DF80620,0x5C0622,0x5C0622,0x5C0622,0x5C0622,0x5C0622,0x5C0622,0x5C0622,0x5C0622,0x5C0622,0x5C0622,0xBA280001,0xBA280001,0xBA280001,0xBA280001,0xBA280001, -0xBA280001,0x66240001,0x66240001,0x66240001,0x4A240001,0x8C0620,0x8C0620,0x8C0620,0x8C0620,0x8C0620,0x8C0620,0x620000E9,0x620000E9,0x620000E9,0x4800001D,0x1180620,0x1180620,0x1180620,0x3E0002A1,0x2E000620,0xFE4C037A,0x5C0622,0x5C0622,0xFE3C0122,0xFC300029,0xE2280001,0xE2280001,0x9A280001,0xF83002D2,0xFC1C00F4,0x82180000,0x620000E9, -0xC80620,}; -static const uint32_t g_etc1_to_bc7_m6_table173[] = { -0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0xA00000, -0xA00000,0xA00000,0xA00000,0x1A000000,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x4380000,0x4380000,0x4380000,0x500000,0x700000,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x2F40000, -0x2F40000,0x1F40000,0x1F40000,0x1F40000,0x52000000,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x1F40000,0x1F40000,0x1F40000,0x52000000,0x1F40000,0x1F40000,0x1F40000,0x52000000,0x52000000,0xB00000,0xA40001,0xA40001,0xC00000,0xD00000,0xE00000,0xE00000,0x3140000,0xC00000,0xD00000,0x1600000,0x1F40000, -0x1600000,0x1040001,0x1040001,0x1040001,0x1040001,0x1840000,0x1840000,0x1840000,0x47FC0000,0x47FC0000,0x82000000,0x1840000,0x1840000,0x1840000,0x47FC0000,0x47FC0000,0x82000000,0x47FC0000,0x47FC0000,0x82000000,0x82000000,0x1840000,0x1840000,0x1840000,0x47FC0000,0x47FC0000,0x82000000,0x47FC0000,0x47FC0000,0x82000000,0x82000000,0x47FC0000, -0x47FC0000,0x82000000,0x82000000,0x82000000,0x1300000,0x1180000,0x1040001,0x1640000,0x1B80000,0x17FC0000,0x2FFC0000,0x57F80000,0x1480000,0x1840000,0x17FC0000,0x82000000,0x17FC0000,0x1740001,0x31FC0000,0x9BF80000,0xBA000000,0x31FC0000,0x9BF80000,0xBA000000,0x9BF80000,0xBA000000,0xBA000000,0x31FC0000,0x9BF80000,0xBA000000,0x9BF80000,0xBA000000, -0xBA000000,0x9BF80000,0xBA000000,0xBA000000,0xBA000000,0x31FC0000,0x9BF80000,0xBA000000,0x9BF80000,0xBA000000,0xBA000000,0x9BF80000,0xBA000000,0xBA000000,0xBA000000,0x9BF80000,0xBA000000,0xBA000000,0xBA000000,0xBA000000,0x1D40000,0xF8C0000,0xF8C0000,0x59FC0000,0x8DFC0000,0xABF00000,0xBA000000,0xBA000000,0x1FC0000,0x6FFC0000,0xB7E00000,0xBA000000, -0x7FFC0000,0xD43684,0xFEB01D6B,0xEEA0152B,0xBAA0152B,0xFE8C1422,0xFE74067B,0xC27C0933,0xD0700C42,0xB46C0686,0x9E6C0C42,0xFE681A21,0xFE4004BA,0xC2580733,0xDE30064F,0xB8380012,0x9E40068A,0xBA34152C,0xA8240735,0x96240931,0x8434152D,0x33C3680,0xFE04164D,0xBA38152B,0xE2000F5A,0xBA000681,0x9E080C40,0xBA001A5F,0xA8000936,0x940009ED,0x82001640,0x23FC3680, -0x96001F3C,0x84001A90,0x78002270,0x6A003680,0xFEA82162,0xFECC2EC4,0xFECC30A4,0xFE800E82,0xFE4C0223,0xE238000E,0xBE440082,0xB22C0086,0xFE902009,0xFE5C0C52,0xC21C049F,0x940009ED,0x1C83680,0x10C152B,0xFEE80793,0xE0D80482,0xBAD40482,0xFEC407A3,0xEAA40006,0xBEB0007A,0xC2A80621,0xB0A00131,0x9EA40621,0x38C152B,0xFE4C0491,0xBA880480,0xE6180621,0xB8380011, -0x9E5C0620,0x4BFC152B,0xAC0005E6,0x960007E4,0x8400152C,0x38C152B,0xFE4C0491,0xBA880480,0xE6180621,0xB8380011,0x9E5C0620,0x4BFC152B,0xAC0005E6,0x960007E4,0x8400152C,0x4BFC152B,0xAC0005E6,0x960007E4,0x8400152C,0x8400152C,0xFEE40DD9,0xF900122B,0xFB041203,0xFEB006D2,0xFE680131,0xDE40000D,0xBA5C0001,0xB80C0038,0xFEC40DFB,0xFE8405C2,0xC6040480,0x960007E4, -0x1DF8152B,0xA0152B,0xA0152B,0xA0152B,0xA0152B,0xFE740672,0xFE740672,0xFE740672,0xA6700622,0xA6700622,0x826C0622,0xFE3C049B,0xFE3C049B,0xFE3C049B,0xB838000E,0xB838000E,0x864C0132,0x90380481,0x90380481,0x7C2C007D,0x6A340481,0xEC152B,0xEC152B,0xEC152B,0xBA000681,0xBA000681,0x82240620,0x980006B1,0x980006B1,0x7A0000E9,0x680004B9,0x1E4152B, -0x1E4152B,0x6C000A8C,0x5C000AD1,0x5000152C,0xFE840BAE,0xF4981122,0xA0152B,0xFE6804E6,0xFE440072,0xDE38000B,0xCE38003B,0xA6340002,0xFE640B11,0xFE440409,0xBA200480,0x7A0000E9,0x154152B,0xD40482,0xD40482,0xD40482,0xD40482,0xE2A80001,0xE2A80001,0xE2A80001,0x9AA40001,0x9AA40001,0x82A40001,0x33C0480,0x33C0480,0x33C0480,0xAE4C0001,0xAE4C0001, -0x82780000,0x23FC0480,0x23FC0480,0x7E000014,0x6A000480,0x33C0480,0x33C0480,0x33C0480,0xAE4C0001,0xAE4C0001,0x82780000,0x23FC0480,0x23FC0480,0x7E000014,0x6A000480,0x23FC0480,0x23FC0480,0x7E000014,0x6A000480,0x6A000480,0xFAB40265,0xFECC02D2,0xD40482,0xFE880122,0xFE540008,0xD8440000,0xBE580000,0xAA2C0000,0xFE940290,0xFE6800F4,0x1C80480,0x7E000014, -0x1C80480,0x13C0622,0xFF1800A2,0xD7040001,0xBB040001,0x1D80620,0xEE9C0001,0xBAD00000,0x71F80620,0xBA200000,0x9E000620,0x1D80620,0xEE9C0001,0xBAD00000,0x71F80620,0xBA200000,0x9E000620,0x71F80620,0xBA200000,0x9E000620,0x9E000620,0x1D80620,0xEE9C0001,0xBAD00000,0x71F80620,0xBA200000,0x9E000620,0x71F80620,0xBA200000,0x9E000620,0x9E000620,0x71F80620, -0xBA200000,0x9E000620,0x9E000620,0x9E000620,0xFF140482,0x1540620,0xF53804E2,0xFED80269,0xFE7C00B5,0xE4280000,0xBA600000,0xB6000019,0xF7080480,0xFEC00221,0xC2740001,0x9E000620,0x4BFC0620,0x6C0622,0x6C0622,0x6C0622,0x6C0622,0x6C0622,0x6C0622,0x6C0622,0x6C0622,0x6C0622,0x6C0622,0xC2380001,0xC2380001,0xC2380001,0xC2380001,0xC2380001, -0xC2380001,0x6E340001,0x6E340001,0x6E340001,0x52340001,0xA40620,0xA40620,0xA40620,0xA40620,0xA40620,0xA40620,0x74000089,0x74000089,0x74000089,0x52000000,0x14C0620,0x14C0620,0x14C0620,0x4400022D,0x36000620,0xF860039D,0x6C0622,0x6C0622,0xFE4C013D,0xFE3C003A,0xEA380001,0xEA380001,0xA2380001,0xFE3C02DA,0xFC300109,0x8A280000,0x74000089, -0xE80620,}; -static const uint32_t g_etc1_to_bc7_m6_table174[] = { -0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0xD00000, -0xD00000,0xD00000,0xD00000,0x22000000,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0xC480000,0xC480000,0xC480000,0x680000,0x940000,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0xB40001,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0x30C0000, -0x30C0000,0xBFC0000,0xBFC0000,0xBFC0000,0x5A000000,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0x30C0000,0xBFC0000,0xBFC0000,0xBFC0000,0x5A000000,0xBFC0000,0xBFC0000,0xBFC0000,0x5A000000,0x5A000000,0x8C00000,0xB40001,0xB40001,0xD40000,0xE40000,0xF80000,0xF80000,0x1300000,0xD40000,0xE40000,0x1800000,0xBFC0000, -0x1800000,0x1140001,0x1140001,0x1140001,0x1140001,0x19C0000,0x19C0000,0x19C0000,0x53FC0000,0x53FC0000,0x8A000000,0x19C0000,0x19C0000,0x19C0000,0x53FC0000,0x53FC0000,0x8A000000,0x53FC0000,0x53FC0000,0x8A000000,0x8A000000,0x19C0000,0x19C0000,0x19C0000,0x53FC0000,0x53FC0000,0x8A000000,0x53FC0000,0x53FC0000,0x8A000000,0x8A000000,0x53FC0000, -0x53FC0000,0x8A000000,0x8A000000,0x8A000000,0x1440000,0x1280000,0x1140001,0x3780000,0x1D40000,0x25FC0000,0x3DF80000,0x61FC0000,0x15C0000,0x19C0000,0x25FC0000,0x8A000000,0x25FC0000,0x1840001,0x49FC0000,0xA7F80000,0xC2000000,0x49FC0000,0xA7F80000,0xC2000000,0xA7F80000,0xC2000000,0xC2000000,0x49FC0000,0xA7F80000,0xC2000000,0xA7F80000,0xC2000000, -0xC2000000,0xA7F80000,0xC2000000,0xC2000000,0xC2000000,0x49FC0000,0xA7F80000,0xC2000000,0xA7F80000,0xC2000000,0xC2000000,0xA7F80000,0xC2000000,0xC2000000,0xC2000000,0xA7F80000,0xC2000000,0xC2000000,0xC2000000,0xC2000000,0x1E80000,0x1A00000,0x1A00000,0x6DFC0000,0x9BFC0000,0xB5F00000,0xC2000000,0xC2000000,0x1FFC0000,0x81FC0000,0xBFF00000,0xC2000000, -0x8FFC0000,0xE43684,0xFEC41E2C,0xF6B0152B,0xC2B0152B,0xFEA41532,0xFE8406AF,0xCA8C0933,0xD8800C42,0xBC7C0686,0xA67C0C42,0xFE801AF9,0xFE540527,0xCA680733,0xE640064F,0xC0480012,0xA650068A,0xC244152C,0xB0340735,0x9E340931,0x8C44152D,0x1543680,0xFE1416C3,0xC248152B,0xEE000E4A,0xC6000651,0xA6180C40,0xCC001953,0xB40007E6,0x9E0008C1,0x8C0015A5,0x2FFC3680, -0x9C001E10,0x90001910,0x84002170,0x72003680,0xFEBC2236,0xF6DC2F82,0xF8E0311F,0xFE940FF3,0xFE640317,0xEA48000E,0xC6540082,0xBA3C0086,0xFEA42172,0xFE700DE2,0xCA2C049F,0x9E0008C1,0x1E83680,0x11C152B,0xFEF4082B,0xE8E80482,0xC2E40482,0xFED0081B,0xF2B40006,0xC6C0007A,0xCAB80621,0xB8B00131,0xA6B40621,0x3A4152B,0xFE6404B9,0xC2980480,0xEE280621,0xC0480011, -0xA66C0620,0x57FC152B,0xB8000586,0xA0000789,0x8C00152C,0x3A4152B,0xFE6404B9,0xC2980480,0xEE280621,0xC0480011,0xA66C0620,0x57FC152B,0xB8000586,0xA0000789,0x8C00152C,0x57FC152B,0xB8000586,0xA0000789,0x8C00152C,0x8C00152C,0xFEF80E2A,0xFF0C123B,0xFF0C124B,0xFEC0075E,0xFE7C01A9,0xE650000D,0xC26C0001,0xC01C0038,0xFEDC0E35,0xFE980692,0xCE140480,0xA0000789, -0x2BFC152B,0xB0152B,0xB0152B,0xB0152B,0xB0152B,0xFE8406AB,0xFE8406AB,0xFE8406AB,0xAE800622,0xAE800622,0x8A7C0622,0xFE5004B9,0xFE5004B9,0xFE5004B9,0xC048000E,0xC048000E,0x8E5C0132,0x98480481,0x98480481,0x843C007D,0x72440481,0x104152B,0x104152B,0x104152B,0xC6000641,0xC6000641,0x8A340620,0xA20005F6,0xA20005F6,0x86000061,0x72000489,0x7FC152B, -0x7FC152B,0x720009E8,0x66000A24,0x5800152C,0xFE900C2A,0xFCA81122,0xB0152B,0xFE78055A,0xFE5800B2,0xE648000B,0xD648003B,0xAE440002,0xFE780B5A,0xFE500485,0xC2300480,0x86000061,0x174152B,0xE40482,0xE40482,0xE40482,0xE40482,0xEAB80001,0xEAB80001,0xEAB80001,0xA2B40001,0xA2B40001,0x8AB40001,0x1540480,0x1540480,0x1540480,0xB65C0001,0xB65C0001, -0x8A880000,0x2FFC0480,0x2FFC0480,0x8A000004,0x72000480,0x1540480,0x1540480,0x1540480,0xB65C0001,0xB65C0001,0x8A880000,0x2FFC0480,0x2FFC0480,0x8A000004,0x72000480,0x2FFC0480,0x2FFC0480,0x8A000004,0x72000480,0x72000480,0xF6C80288,0xF6DC02F9,0xE40482,0xFE980145,0xFC6C0019,0xE0540000,0xC6680000,0xB23C0000,0xFAAC02AD,0xFE800120,0x1E80480,0x8A000004, -0x1E80480,0x14C0622,0xFF2400DA,0xDF140001,0xC3140001,0x1F00620,0xF6AC0001,0xC2E00000,0x7DF80620,0xC2300000,0xA6000620,0x1F00620,0xF6AC0001,0xC2E00000,0x7DF80620,0xC2300000,0xA6000620,0x7DF80620,0xC2300000,0xA6000620,0xA6000620,0x1F00620,0xF6AC0001,0xC2E00000,0x7DF80620,0xC2300000,0xA6000620,0x7DF80620,0xC2300000,0xA6000620,0xA6000620,0x7DF80620, -0xC2300000,0xA6000620,0xA6000620,0xA6000620,0xFB2C04B1,0x1640620,0xFD4804E2,0xFEEC029A,0xFE9000F5,0xEC380000,0xC2700000,0xC000000D,0xFF180480,0xFECC0262,0xCA840001,0xA6000620,0x5BFC0620,0x7C0622,0x7C0622,0x7C0622,0x7C0622,0x7C0622,0x7C0622,0x7C0622,0x7C0622,0x7C0622,0x7C0622,0xCA480001,0xCA480001,0xCA480001,0xCA480001,0xCA480001, -0xCA480001,0x76440001,0x76440001,0x76440001,0x5A440001,0xBC0620,0xBC0620,0xBC0620,0xBC0620,0xBC0620,0xBC0620,0x80000041,0x80000041,0x80000041,0x5A100000,0x17C0620,0x17C0620,0x17C0620,0x500001CD,0x3E000620,0xFE6C03A9,0x7C0622,0x7C0622,0xFA60016D,0xFE50004A,0xF2480001,0xF2480001,0xAA480001,0xFC5402F9,0xFE3C0132,0x92380000,0x80000041, -0x10C0620,}; -static const uint32_t g_etc1_to_bc7_m6_table175[] = { -0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x1000000, -0x1000000,0x1000000,0x1000000,0x2A000000,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x5C0000,0x5C0000,0x5C0000,0x800000,0xB40000,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000, -0x3240000,0x17FC0000,0x17FC0000,0x17FC0000,0x62000000,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000,0x17FC0000,0x17FC0000,0x17FC0000,0x62000000,0x17FC0000,0x17FC0000,0x17FC0000,0x62000000,0x62000000,0xD40000,0xC40001,0xC40001,0x4E40000,0xF80000,0x10C0000,0x10C0000,0x14C0000,0x4E40000,0xF80000,0x1A40000,0x17FC0000, -0x1A40000,0x1240001,0x1240001,0x1240001,0x1240001,0x1B40000,0x1B40000,0x1B40000,0x5FF80000,0x5FF80000,0x92000000,0x1B40000,0x1B40000,0x1B40000,0x5FF80000,0x5FF80000,0x92000000,0x5FF80000,0x5FF80000,0x92000000,0x92000000,0x1B40000,0x1B40000,0x1B40000,0x5FF80000,0x5FF80000,0x92000000,0x5FF80000,0x5FF80000,0x92000000,0x92000000,0x5FF80000, -0x5FF80000,0x92000000,0x92000000,0x92000000,0x3540000,0x5380000,0x1240001,0x1900000,0x3EC0000,0x35FC0000,0x49FC0000,0x6DF80000,0x1700000,0x1B40000,0x35FC0000,0x92000000,0x35FC0000,0x1940001,0x63FC0000,0xB3F80000,0xCA000000,0x63FC0000,0xB3F80000,0xCA000000,0xB3F80000,0xCA000000,0xCA000000,0x63FC0000,0xB3F80000,0xCA000000,0xB3F80000,0xCA000000, -0xCA000000,0xB3F80000,0xCA000000,0xCA000000,0xCA000000,0x63FC0000,0xB3F80000,0xCA000000,0xB3F80000,0xCA000000,0xCA000000,0xB3F80000,0xCA000000,0xCA000000,0xCA000000,0xB3F80000,0xCA000000,0xCA000000,0xCA000000,0xCA000000,0x1FC0000,0x1B00000,0x1B00000,0x81FC0000,0xA9F80000,0xBFF00000,0xCA000000,0xCA000000,0x3DFC0000,0x91FC0000,0xC9C40000,0xCA000000, -0x9FF80000,0xF43684,0xFED01F3C,0xFEC0152B,0xCAC0152B,0xFEB01682,0xFE980713,0xD29C0933,0xE0900C42,0xC48C0686,0xAE8C0C42,0xFE981BF1,0xFE6C05CF,0xD2780733,0xEE50064F,0xC8580012,0xAE60068A,0xCA54152C,0xB8440735,0xA6440931,0x9454152D,0x16C3680,0xFE38176F,0xCA58152B,0xFA000D7A,0xCE100651,0xAE280C40,0xD8001863,0xBC0006CD,0xA60007D9,0x94001550,0x3BFC3680, -0xA6001D1B,0x960017C8,0x8A002044,0x7A003680,0xFED02341,0xFEEC2F82,0xFEEC313B,0xFEAC11B2,0xFE78043B,0xF258000E,0xCE640082,0xC24C0086,0xFEB4221F,0xFE800F42,0xD23C049F,0xA60007D9,0x7FC3680,0x12C152B,0xFF0C08BB,0xF0F80482,0xCAF40482,0xFEE80893,0xFAC40006,0xCED0007A,0xD2C80621,0xC0C00131,0xAEC40621,0x3BC152B,0xFE7C0501,0xCAA80480,0xF6380621,0xC8580011, -0xAE7C0620,0x63FC152B,0xC2000540,0xAA000740,0x9400152C,0x3BC152B,0xFE7C0501,0xCAA80480,0xF6380621,0xC8580011,0xAE7C0620,0x63FC152B,0xC2000540,0xAA000740,0x9400152C,0x63FC152B,0xC2000540,0xAA000740,0x9400152C,0x9400152C,0xFF000EBB,0xF9201289,0xFD281262,0xFED40821,0xFE900249,0xEE60000D,0xCA7C0001,0xC82C0038,0xFCF40ED9,0xFEC0074A,0xD6240480,0xAA000740, -0x3BFC152B,0xC0152B,0xC0152B,0xC0152B,0xC0152B,0xFE9806E2,0xFE9806E2,0xFE9806E2,0xB6900622,0xB6900622,0x928C0622,0xFE6804F1,0xFE6804F1,0xFE6804F1,0xC858000E,0xC858000E,0x966C0132,0xA0580481,0xA0580481,0x8C4C007D,0x7A540481,0x11C152B,0x11C152B,0x11C152B,0xD8000621,0xD8000621,0x92440620,0xAE000576,0xAE000576,0x92000019,0x7A080480,0x13FC152B, -0x13FC152B,0x7E000938,0x72000984,0x6000152C,0xFEA00C7D,0xF4B8117B,0xC0152B,0xFE8805D3,0xFE6C0102,0xEE58000B,0xDE58003B,0xB6540002,0xFE900BE5,0xFE680518,0xCA400480,0x92000019,0x198152B,0xF40482,0xF40482,0xF40482,0xF40482,0xF2C80001,0xF2C80001,0xF2C80001,0xAAC40001,0xAAC40001,0x92C40001,0x16C0480,0x16C0480,0x16C0480,0xBE6C0001,0xBE6C0001, -0x92980000,0x3BFC0480,0x3BFC0480,0x92000000,0x7A000480,0x16C0480,0x16C0480,0x16C0480,0xBE6C0001,0xBE6C0001,0x92980000,0x3BFC0480,0x3BFC0480,0x92000000,0x7A000480,0x3BFC0480,0x3BFC0480,0x92000000,0x7A000480,0x7A000480,0xFED80288,0xFEEC02F9,0xF40482,0xFEB4016D,0xFE800029,0xE8640000,0xCE780000,0xBA4C0000,0xFCC002D2,0xFE980139,0x7FC0480,0x92000000, -0x7FC0480,0x15C0622,0xFF3C0122,0xE7240001,0xCB240001,0xDFC0620,0xFEBC0001,0xCAF00000,0x89F80620,0xCA400000,0xAE000620,0xDFC0620,0xFEBC0001,0xCAF00000,0x89F80620,0xCA400000,0xAE000620,0x89F80620,0xCA400000,0xAE000620,0xAE000620,0xDFC0620,0xFEBC0001,0xCAF00000,0x89F80620,0xCA400000,0xAE000620,0x89F80620,0xCA400000,0xAE000620,0xAE000620,0x89F80620, -0xCA400000,0xAE000620,0xAE000620,0xAE000620,0xFF3404C9,0x3740620,0xF5580515,0xFF0402E4,0xFEBC0139,0xF4480000,0xCA800000,0xCA000004,0xFF2C04B1,0xFEF002B1,0xD2940001,0xAE000620,0x69FC0620,0x8C0622,0x8C0622,0x8C0622,0x8C0622,0x8C0622,0x8C0622,0x8C0622,0x8C0622,0x8C0622,0x8C0622,0xD2580001,0xD2580001,0xD2580001,0xD2580001,0xD2580001, -0xD2580001,0x7E540001,0x7E540001,0x7E540001,0x62540001,0x2D00620,0x2D00620,0x2D00620,0x2D00620,0x2D00620,0x2D00620,0x8C000019,0x8C000019,0x8C000019,0x62200000,0x1AC0620,0x1AC0620,0x1AC0620,0x56000171,0x46000620,0xF88003CA,0x8C0622,0x8C0622,0xFC70018A,0xFE640062,0xFA580001,0xFA580001,0xB2580001,0xFC640320,0xFE500145,0x9A480000,0x8C000019, -0x12C0620,}; -static const uint32_t g_etc1_to_bc7_m6_table176[] = { -0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000,0x1380000, -0x1380000,0x1380000,0x1380000,0x32000001,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0xE6C0000,0xE6C0000,0xE6C0000,0x2980000,0xDC0000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000, -0x1400000,0x25F80000,0x25F80000,0x25F80000,0x6A000001,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x25F80000,0x25F80000,0x25F80000,0x6A000001,0x25F80000,0x25F80000,0x25F80000,0x6A000001,0x6A000001,0xAE40000,0xD80000,0xD80000,0xFC0000,0x50C0000,0x3240000,0x3240000,0x16C0000,0xFC0000,0x50C0000,0x1CC0000,0x25F80000, -0x1CC0000,0x1380000,0x1380000,0x1380000,0x1380000,0x1D00000,0x1D00000,0x1D00000,0x6DF80000,0x6DF80000,0x9A000001,0x1D00000,0x1D00000,0x1D00000,0x6DF80000,0x6DF80000,0x9A000001,0x6DF80000,0x6DF80000,0x9A000001,0x9A000001,0x1D00000,0x1D00000,0x1D00000,0x6DF80000,0x6DF80000,0x9A000001,0x6DF80000,0x6DF80000,0x9A000001,0x9A000001,0x6DF80000, -0x6DF80000,0x9A000001,0x9A000001,0x9A000001,0x7680000,0x14C0000,0x1380000,0x1A80000,0xDFC0000,0x45FC0000,0x59FC0000,0x79F80000,0x1880000,0x1D00000,0x45FC0000,0x9A000001,0x45FC0000,0x1A80000,0x7DFC0000,0xBFFC0000,0xD2000001,0x7DFC0000,0xBFFC0000,0xD2000001,0xBFFC0000,0xD2000001,0xD2000001,0x7DFC0000,0xBFFC0000,0xD2000001,0xBFFC0000,0xD2000001, -0xD2000001,0xBFFC0000,0xD2000001,0xD2000001,0xD2000001,0x7DFC0000,0xBFFC0000,0xD2000001,0xBFFC0000,0xD2000001,0xD2000001,0xBFFC0000,0xD2000001,0xD2000001,0xD2000001,0xBFFC0000,0xD2000001,0xD2000001,0xD2000001,0xD2000001,0x2DFC0000,0x1C40000,0x1C40000,0x97FC0000,0xB7FC0000,0xC9FC0000,0xD2000001,0xD2000001,0x5FFC0000,0xA5FC0000,0xD1F40000,0xD2000001, -0xAFFC0000,0x1083680,0xFEE82044,0xFED41550,0xD2D4152D,0xFED017C8,0xFEAC07D9,0xDCB00931,0xEAA00C40,0xCC9C068A,0xB6A00C42,0xFEB01D1B,0xFE8406CD,0xDC8C0735,0xF6600651,0xD26C0012,0xB8740686,0xD268152B,0xC2580733,0xB0580933,0x9E68152B,0x1883680,0xFE4C1863,0xD468152C,0xFE080D7A,0xD620064F,0xB63C0C42,0xE200176F,0xC80005CF,0xB2000713,0x9E00152B,0x49F83680, -0xB2001BF1,0xA0001682,0x96001F3C,0x82003684,0xFEE42462,0xF9003040,0xFB0431B0,0xFEC01361,0xFE9005B6,0xFA68000E,0xD6740086,0xCC600082,0xFED02373,0xFE981122,0xDC4C049F,0xB2000713,0x19FC3680,0x13C152C,0xFF180984,0xFB080480,0xD3080481,0xFF000938,0xFED80019,0xD6E0007D,0xDCD80620,0xC8D00132,0xB6D80622,0x1D8152B,0xFEA00576,0xD2BC0481,0xFE4C0621,0xD26C000E, -0xB6900622,0x71F8152B,0xCA0004F1,0xB20006E2,0x9E00152B,0x1D8152B,0xFEA00576,0xD2BC0481,0xFE4C0621,0xD26C000E,0xB6900622,0x71F8152B,0xCA0004F1,0xB20006E2,0x9E00152B,0x71F8152B,0xCA0004F1,0xB20006E2,0x9E00152B,0x9E00152B,0xFF140F21,0xFF2C12B6,0xF53812C4,0xFEEC08F1,0xFEA80326,0xF674000D,0xD48C0002,0xD03C003B,0xFF080F33,0xFEC8081B,0xDE380481,0xB20006E2, -0x4BFC152B,0xD4152C,0xD4152C,0xD4152C,0xD4152C,0xFEA80740,0xFEA80740,0xFEA80740,0xC0A00620,0xC0A00620,0x9AA00621,0xFE780540,0xFE780540,0xFE780540,0xD0680011,0xD0680011,0x9E7C0131,0xAA680480,0xAA680480,0x9660007A,0x82680482,0x138152B,0x138152B,0x138152B,0xE2100621,0xE2100621,0x9A580621,0xC0000501,0xC0000501,0x9C040006,0x821C0482,0x21F8152B, -0x21F8152B,0x8A000893,0x780008BB,0x6800152B,0xFEBC0CE5,0xFECC1178,0xD4152C,0xFE980671,0xFE80016D,0xF86C000C,0xE86C0038,0xC0680001,0xFEA00C3A,0xFE8005A2,0xD2540481,0x9C040006,0x1BC152B,0x1080480,0x1080480,0x1080480,0x1080480,0xFCD80000,0xFCD80000,0xFCD80000,0xB2D80000,0xB2D80000,0x9AD80001,0x1880480,0x1880480,0x1880480,0xC6800001,0xC6800001, -0x9CA80001,0x49F80480,0x49F80480,0x9A180001,0x82000482,0x1880480,0x1880480,0x1880480,0xC6800001,0xC6800001,0x9CA80001,0x49F80480,0x49F80480,0x9A180001,0x82000482,0x49F80480,0x49F80480,0x9A180001,0x82000482,0x82000482,0xFAEC02AD,0xF9000320,0x1080480,0xFEC40190,0xFE980041,0xF0780000,0xD8880000,0xC2600000,0xFED002DA,0xFEB0016D,0x19FC0480,0x9A180001, -0x19FC0480,0x1700620,0xFF500171,0xEF380000,0xD3380001,0x29FC0620,0xFED80019,0xD5000001,0x97F80620,0xD2540001,0xB6000622,0x29FC0620,0xFED80019,0xD5000001,0x97F80620,0xD2540001,0xB6000622,0x97F80620,0xD2540001,0xB6000622,0xB6000622,0x29FC0620,0xFED80019,0xD5000001,0x97F80620,0xD2540001,0xB6000622,0x97F80620,0xD2540001,0xB6000622,0xB6000622,0x97F80620, -0xD2540001,0xB6000622,0xB6000622,0xB6000622,0xFF5004E4,0x1880620,0xFF6C0515,0xFF200332,0xFED00195,0xFE580000,0xD2980001,0xD2080001,0xF94804E2,0xFF0802E4,0xDCA40000,0xB6000622,0x7BFC0620,0xA00620,0xA00620,0xA00620,0xA00620,0xA00620,0xA00620,0xA00620,0xA00620,0xA00620,0xA00620,0xDE680000,0xDE680000,0xDE680000,0xDE680000,0xDE680000, -0xDE680000,0x86680000,0x86680000,0x86680000,0x6A680001,0xEC0620,0xEC0620,0xEC0620,0xEC0620,0xEC0620,0xEC0620,0x9E000001,0x9E000001,0x9E000001,0x6C300001,0x1E40620,0x1E40620,0x1E40620,0x60000122,0x4E000622,0xFE8C03E8,0xA00620,0xA00620,0xFE8401A5,0xFE780080,0xFE680004,0xFE680004,0xBC680000,0xFE740328,0xFE640179,0xA4580000,0x9E000001, -0x1540620,}; -static const uint32_t g_etc1_to_bc7_m6_table177[] = { -0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x1680000, -0x1680000,0x1680000,0x1680000,0x3A000001,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x800000,0x800000,0x800000,0x2B00000,0xFC0000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000, -0x1580000,0x31F80000,0x31F80000,0x31F80000,0x72000001,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x31F80000,0x31F80000,0x31F80000,0x72000001,0x31F80000,0x31F80000,0x31F80000,0x72000001,0x72000001,0xF80000,0xE80000,0xE80000,0x30C0000,0x5200000,0x13C0000,0x13C0000,0x1840000,0x30C0000,0x5200000,0x1EC0000,0x31F80000, -0x1EC0000,0x1480000,0x1480000,0x1480000,0x1480000,0x1E80000,0x1E80000,0x1E80000,0x79F80000,0x79F80000,0xA2000001,0x1E80000,0x1E80000,0x1E80000,0x79F80000,0x79F80000,0xA2000001,0x79F80000,0x79F80000,0xA2000001,0xA2000001,0x1E80000,0x1E80000,0x1E80000,0x79F80000,0x79F80000,0xA2000001,0x79F80000,0x79F80000,0xA2000001,0xA2000001,0x79F80000, -0x79F80000,0xA2000001,0xA2000001,0xA2000001,0x37C0000,0x75C0000,0x1480000,0x3BC0000,0x21FC0000,0x55FC0000,0x67F80000,0x83FC0000,0x19C0000,0x1E80000,0x55FC0000,0xA2000001,0x55FC0000,0x1B80000,0x95FC0000,0xCBFC0000,0xDA000001,0x95FC0000,0xCBFC0000,0xDA000001,0xCBFC0000,0xDA000001,0xDA000001,0x95FC0000,0xCBFC0000,0xDA000001,0xCBFC0000,0xDA000001, -0xDA000001,0xCBFC0000,0xDA000001,0xDA000001,0xDA000001,0x95FC0000,0xCBFC0000,0xDA000001,0xCBFC0000,0xDA000001,0xDA000001,0xCBFC0000,0xDA000001,0xDA000001,0xDA000001,0xCBFC0000,0xDA000001,0xDA000001,0xDA000001,0xDA000001,0x55FC0000,0x3D40000,0x3D40000,0xABFC0000,0xC5FC0000,0xD3FC0000,0xDA000001,0xDA000001,0x7DFC0000,0xB5FC0000,0xDBC80000,0xDA000001, -0xBFF80000,0x1183680,0xFEF42170,0xFEE415A5,0xDAE4152D,0xFEDC1910,0xFEC008C1,0xE4C00931,0xF2B00C40,0xD4AC068A,0xBEB00C42,0xFEC41E10,0xFE9407E6,0xE49C0735,0xFE700651,0xDA7C0012,0xC0840686,0xDA78152B,0xCA680733,0xB8680933,0xA678152B,0x1A03680,0xFE641953,0xDC78152C,0xFE200E4A,0xDE30064F,0xBE4C0C42,0xF40016C3,0xD4000527,0xBC0006AF,0xA610152B,0x55F83680, -0xBE001AF9,0xAC001532,0x9C001E2C,0x8A003684,0xFEF8257D,0xFF0C3050,0xFF0C31F0,0xFED414FA,0xFEA4073E,0xFE7C001E,0xDE840086,0xD4700082,0xFEDC247F,0xFEB012E5,0xE45C049F,0xBC0006AF,0x27FC3680,0x14C152C,0xFF300A24,0xFF180489,0xDB180481,0xFF1809E8,0xFEF00061,0xDEF0007D,0xE4E80620,0xD0E00132,0xBEE80622,0x1F0152B,0xFEB805F6,0xDACC0481,0xFE640641,0xDA7C000E, -0xBEA00622,0x7DF8152B,0xD60004B9,0xBC0006AB,0xA600152B,0x1F0152B,0xFEB805F6,0xDACC0481,0xFE640641,0xDA7C000E,0xBEA00622,0x7DF8152B,0xD60004B9,0xBC0006AB,0xA600152B,0x7DF8152B,0xD60004B9,0xBC0006AB,0xA600152B,0xA600152B,0xFD300F89,0xFB4412E4,0xFD4812C4,0xFF0009C8,0xFEC803F2,0xFE84000D,0xDC9C0002,0xD84C003B,0xFF180FA2,0xFEE80905,0xE6480481,0xBC0006AB, -0x5BFC152B,0xE4152C,0xE4152C,0xE4152C,0xE4152C,0xFEBC0789,0xFEBC0789,0xFEBC0789,0xC8B00620,0xC8B00620,0xA2B00621,0xFE8C0586,0xFE8C0586,0xFE8C0586,0xD8780011,0xD8780011,0xA68C0131,0xB2780480,0xB2780480,0x9E70007A,0x8A780482,0x150152B,0x150152B,0x150152B,0xEA200621,0xEA200621,0xA2680621,0xCC0004B9,0xCC0004B9,0xA4140006,0x8A2C0482,0x2DF8152B, -0x2DF8152B,0x9600081B,0x8400082B,0x7000152B,0xFECC0D34,0xF6DC11D1,0xE4152C,0xFEAC0709,0xFE9401DD,0xFC7C000E,0xF07C0038,0xC8780001,0xFEB40C89,0xFE980632,0xDA640481,0xA4140006,0x1E0152B,0x1180480,0x1180480,0x1180480,0x1180480,0xFEE80004,0xFEE80004,0xFEE80004,0xBAE80000,0xBAE80000,0xA2E80001,0x1A00480,0x1A00480,0x1A00480,0xCE900001,0xCE900001, -0xA4B80001,0x55F80480,0x55F80480,0xA2280001,0x8A000482,0x1A00480,0x1A00480,0x1A00480,0xCE900001,0xCE900001,0xA4B80001,0x55F80480,0x55F80480,0xA2280001,0x8A000482,0x55F80480,0x55F80480,0xA2280001,0x8A000482,0x8A000482,0xF70002D2,0xFF0C0328,0x1180480,0xFEDC01A5,0xFEAC0061,0xF8880000,0xE0980000,0xCA700000,0xF8EC02F9,0xFEC40188,0x27FC0480,0xA2280001, -0x27FC0480,0x1800620,0xFF5C01CD,0xF7480000,0xDB480001,0x41FC0620,0xFEFC0041,0xDD100001,0xA1FC0620,0xDA640001,0xBE000622,0x41FC0620,0xFEFC0041,0xDD100001,0xA1FC0620,0xDA640001,0xBE000622,0xA1FC0620,0xDA640001,0xBE000622,0xBE000622,0x41FC0620,0xFEFC0041,0xDD100001,0xA1FC0620,0xDA640001,0xBE000622,0xA1FC0620,0xDA640001,0xBE000622,0xBE000622,0xA1FC0620, -0xDA640001,0xBE000622,0xBE000622,0xBE000622,0xF7680515,0x5980620,0xF77C0548,0xFF400372,0xFEFC01E5,0xFE84000D,0xDAA80001,0xDA180001,0xFD5804E4,0xFF200340,0xE4B40000,0xBE000622,0x89FC0620,0xB00620,0xB00620,0xB00620,0xB00620,0xB00620,0xB00620,0xB00620,0xB00620,0xB00620,0xB00620,0xE6780000,0xE6780000,0xE6780000,0xE6780000,0xE6780000, -0xE6780000,0x8E780000,0x8E780000,0x8E780000,0x72780001,0x1040620,0x1040620,0x1040620,0x1040620,0x1040620,0x1040620,0xA6100001,0xA6100001,0xA6100001,0x74400001,0x7FC0620,0x7FC0620,0x7FC0620,0x6C0000DA,0x56000622,0xFAA403F5,0xB00620,0xB00620,0xFE9001D4,0xFC8800A4,0xFC7C000D,0xFC7C000D,0xC4780000,0xFC8C0349,0xFE780190,0xAC680000,0xA6100001, -0x1740620,}; -static const uint32_t g_etc1_to_bc7_m6_table178[] = { -0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0x1980000, -0x1980000,0x1980000,0x1980000,0x42000001,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x900000,0x900000,0x900000,0xC80000,0x1200000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0xF80000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000, -0x1700000,0x3DF80000,0x3DF80000,0x3DF80000,0x7A000001,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x3DF80000,0x3DF80000,0x3DF80000,0x7A000001,0x3DF80000,0x3DF80000,0x3DF80000,0x7A000001,0x7A000001,0x1080000,0xF80000,0xF80000,0x1200000,0x5340000,0x1500000,0x1500000,0x1A00000,0x1200000,0x5340000,0x9FC0000,0x3DF80000, -0x9FC0000,0x1580000,0x1580000,0x1580000,0x1580000,0x3FC0000,0x3FC0000,0x3FC0000,0x85F80000,0x85F80000,0xAA000001,0x3FC0000,0x3FC0000,0x3FC0000,0x85F80000,0x85F80000,0xAA000001,0x85F80000,0x85F80000,0xAA000001,0xAA000001,0x3FC0000,0x3FC0000,0x3FC0000,0x85F80000,0x85F80000,0xAA000001,0x85F80000,0x85F80000,0xAA000001,0xAA000001,0x85F80000, -0x85F80000,0xAA000001,0xAA000001,0xAA000001,0x1900000,0xF6C0000,0x1580000,0x1D40000,0x33FC0000,0x63FC0000,0x75F80000,0x8FF80000,0x1B00000,0x3FC0000,0x63FC0000,0xAA000001,0x63FC0000,0x1C80000,0xAFFC0000,0xD7FC0000,0xE2000001,0xAFFC0000,0xD7FC0000,0xE2000001,0xD7FC0000,0xE2000001,0xE2000001,0xAFFC0000,0xD7FC0000,0xE2000001,0xD7FC0000,0xE2000001, -0xE2000001,0xD7FC0000,0xE2000001,0xE2000001,0xE2000001,0xAFFC0000,0xD7FC0000,0xE2000001,0xD7FC0000,0xE2000001,0xE2000001,0xD7FC0000,0xE2000001,0xE2000001,0xE2000001,0xD7FC0000,0xE2000001,0xE2000001,0xE2000001,0xE2000001,0x7DFC0000,0xBE40000,0xBE40000,0xBDFC0000,0xD3F80000,0xDDFC0000,0xE2000001,0xE2000001,0x9BFC0000,0xC7FC0000,0xE3D80000,0xE2000001, -0xCDFC0000,0x1283680,0xFF0C2270,0xFEF81640,0xE2F4152D,0xFEF41A90,0xFED409ED,0xECD00931,0xFAC00C40,0xDCBC068A,0xC6C00C42,0xFED01F3C,0xFEAC0936,0xECAC0735,0xFE880681,0xE28C0012,0xC8940686,0xE288152B,0xD2780733,0xC0780933,0xAE88152B,0x1B83680,0xFE881A5F,0xE488152C,0xFE380F5A,0xE640064F,0xC65C0C42,0xFC00164D,0xDE0004BA,0xC400067B,0xAE20152B,0x61F83680, -0xCA001A21,0xB8001422,0xA6001D6B,0x92003684,0xFF0C269A,0xF9203102,0xFB243245,0xFEDC16B2,0xFEB808F6,0xFE900086,0xE6940086,0xDC800082,0xFEF82596,0xFEC41488,0xEC6C049F,0xC400067B,0x37FC3680,0x15C152C,0xFF440AD1,0xFF2C04B9,0xE3280481,0xFF240A8C,0xFF0400E9,0xE700007D,0xECF80620,0xD8F00132,0xC6F80622,0xDFC152B,0xFECC06B1,0xE2DC0481,0xFE880681,0xE28C000E, -0xC6B00622,0x89F8152B,0xE000049B,0xC4000672,0xAE00152B,0xDFC152B,0xFECC06B1,0xE2DC0481,0xFE880681,0xE28C000E,0xC6B00622,0x89F8152B,0xE000049B,0xC4000672,0xAE00152B,0x89F8152B,0xE000049B,0xC4000672,0xAE00152B,0xAE00152B,0xFF3C1004,0xFF4C1324,0xF5581329,0xFF140A71,0xFEE804D9,0xFE980052,0xE4AC0002,0xE05C003B,0xFF34103A,0xFF0409B3,0xEE580481,0xC4000672, -0x69FC152B,0xF4152C,0xF4152C,0xF4152C,0xF4152C,0xFED007E4,0xFED007E4,0xFED007E4,0xD0C00620,0xD0C00620,0xAAC00621,0xFEA405E6,0xFEA405E6,0xFEA405E6,0xE0880011,0xE0880011,0xAE9C0131,0xBA880480,0xBA880480,0xA680007A,0x92880482,0x168152B,0x168152B,0x168152B,0xF2300621,0xF2300621,0xAA780621,0xD8000491,0xD8000491,0xAC240006,0x923C0482,0x39F8152B, -0x39F8152B,0x9C0007A3,0x8A000793,0x7800152B,0xFED80DAA,0xFEEC11D1,0xF4152C,0xFEC40794,0xFEAC0275,0xFE900022,0xF88C0038,0xD0880001,0xFEBC0D2A,0xFEB006D1,0xE2740481,0xAC240006,0x3FC152B,0x1280480,0x1280480,0x1280480,0x1280480,0xFCFC0014,0xFCFC0014,0xFCFC0014,0xC2F80000,0xC2F80000,0xAAF80001,0x1B80480,0x1B80480,0x1B80480,0xD6A00001,0xD6A00001, -0xACC80001,0x61F80480,0x61F80480,0xAA380001,0x92000482,0x1B80480,0x1B80480,0x1B80480,0xD6A00001,0xD6A00001,0xACC80001,0x61F80480,0x61F80480,0xAA380001,0x92000482,0x61F80480,0x61F80480,0xAA380001,0x92000482,0x92000482,0xFF1002D2,0xF9200349,0x1280480,0xFCF401E1,0xFEC80082,0xFC9C0001,0xE8A80000,0xD2800000,0xFEF802FD,0xFEDC01A5,0x37FC0480,0xAA380001, -0x37FC0480,0x1900620,0xFF74022D,0xFF580000,0xE3580001,0x59FC0620,0xFF140089,0xE5200001,0xADFC0620,0xE2740001,0xC6000622,0x59FC0620,0xFF140089,0xE5200001,0xADFC0620,0xE2740001,0xC6000622,0xADFC0620,0xE2740001,0xC6000622,0xC6000622,0x59FC0620,0xFF140089,0xE5200001,0xADFC0620,0xE2740001,0xC6000622,0xADFC0620,0xE2740001,0xC6000622,0xC6000622,0xADFC0620, -0xE2740001,0xC6000622,0xC6000622,0xC6000622,0xFF780515,0xDA80620,0xFF8C0548,0xFF5803C8,0xFF100249,0xFEAC0041,0xE2B80001,0xE2280001,0xFF700515,0xFF40037A,0xECC40000,0xC6000622,0x99FC0620,0xC00620,0xC00620,0xC00620,0xC00620,0xC00620,0xC00620,0xC00620,0xC00620,0xC00620,0xC00620,0xEE880000,0xEE880000,0xEE880000,0xEE880000,0xEE880000, -0xEE880000,0x96880000,0x96880000,0x96880000,0x7A880001,0x11C0620,0x11C0620,0x11C0620,0x11C0620,0x11C0620,0x11C0620,0xAE200001,0xAE200001,0xAE200001,0x7C500001,0x13FC0620,0x13FC0620,0x13FC0620,0x720000A2,0x5E000622,0xFEAC041D,0xC00620,0xC00620,0xFEA001F9,0xFC9C00C8,0xFE900019,0xFE900019,0xCC880000,0xFC9C0372,0xFC9001C2,0xB4780000,0xAE200001, -0x1980620,}; -static const uint32_t g_etc1_to_bc7_m6_table179[] = { -0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0xE00000,0xE00000,0xE00000,0xE00000,0xE00000,0xE00000,0xE00000,0xE00000,0xE00000,0xE00000,0x1CC0000, -0x1CC0000,0x1CC0000,0x1CC0000,0x4A000001,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x8A00000,0x8A00000,0x8A00000,0xE00000,0x1400000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000, -0x1880000,0x49F80000,0x49F80000,0x49F80000,0x82000001,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x49F80000,0x49F80000,0x49F80000,0x82000001,0x49F80000,0x49F80000,0x49F80000,0x82000001,0x82000001,0x5180000,0x1080000,0x1080000,0x1340000,0x14C0000,0x1680000,0x1680000,0x1BC0000,0x1340000,0x14C0000,0x19FC0000,0x49F80000, -0x19FC0000,0x1680000,0x1680000,0x1680000,0x1680000,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x91F80000,0x91F80000,0xB2000001,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x91F80000,0x91F80000,0xB2000001,0x91F80000,0x91F80000,0xB2000001,0xB2000001,0x1BFC0000,0x1BFC0000,0x1BFC0000,0x91F80000,0x91F80000,0xB2000001,0x91F80000,0x91F80000,0xB2000001,0xB2000001,0x91F80000, -0x91F80000,0xB2000001,0xB2000001,0xB2000001,0x7A00000,0x1800000,0x1680000,0x3E80000,0x47FC0000,0x73FC0000,0x81FC0000,0x99FC0000,0x1C40000,0x1BFC0000,0x73FC0000,0xB2000001,0x73FC0000,0x1D80000,0xC7FC0000,0xE3FC0000,0xEA000001,0xC7FC0000,0xE3FC0000,0xEA000001,0xE3FC0000,0xEA000001,0xEA000001,0xC7FC0000,0xE3FC0000,0xEA000001,0xE3FC0000,0xEA000001, -0xEA000001,0xE3FC0000,0xEA000001,0xEA000001,0xEA000001,0xC7FC0000,0xE3FC0000,0xEA000001,0xE3FC0000,0xEA000001,0xEA000001,0xE3FC0000,0xEA000001,0xEA000001,0xEA000001,0xE3FC0000,0xEA000001,0xEA000001,0xEA000001,0xEA000001,0xA3FC0000,0x1F80000,0x1F80000,0xD1FC0000,0xDFFC0000,0xE7FC0000,0xEA000001,0xEA000001,0xB9FC0000,0xD7FC0000,0xEBE80000,0xEA000001, -0xDDF80000,0x1383680,0xFF1823A4,0xFF0816F9,0xEB04152D,0xFF001C08,0xFEE40B51,0xF4E00931,0xFED40C50,0xE4CC068A,0xCED00C42,0xFEE8206C,0xFEC00AD1,0xF4BC0735,0xFEA00711,0xEA9C0012,0xD0A40686,0xEA98152B,0xDA880733,0xC8880933,0xB698152B,0x1D03680,0xFEA01B7F,0xEC98152C,0xFE5810AA,0xEE50064F,0xCE6C0C42,0xFE14169F,0xEA00048A,0xCC10067B,0xB630152B,0x6DF83680, -0xD0001959,0xBE001302,0xAC001C83,0x9A003684,0xFF2027A2,0xFF2C311A,0xFF2C328D,0xFEF0188A,0xFECC0ADE,0xFEAC0156,0xEEA40086,0xE4900082,0xFF0026DC,0xFEDC1672,0xF47C049F,0xCC10067B,0x45FC3680,0x16C152C,0xFF500B91,0xFF380529,0xEB380481,0xFF3C0B4C,0xFF14019D,0xEF10007D,0xF5080620,0xE1000132,0xCF080622,0x25FC152B,0xFEF00771,0xEAEC0481,0xFEAC06F1,0xEA9C000E, -0xCEC00622,0x95F8152B,0xEA000489,0xCE000653,0xB600152B,0x25FC152B,0xFEF00771,0xEAEC0481,0xFEAC06F1,0xEA9C000E,0xCEC00622,0x95F8152B,0xEA000489,0xCE000653,0xB600152B,0x95F8152B,0xEA000489,0xCE000653,0xB600152B,0xB600152B,0xFF501055,0xFD681342,0xFD681329,0xFF300B69,0xFEFC05D5,0xFEC000CD,0xECBC0002,0xE86C003B,0xFF44109B,0xFF180AA9,0xF6680481,0xCE000653, -0x79FC152B,0x104152C,0x104152C,0x104152C,0x104152C,0xFEDC084C,0xFEDC084C,0xFEDC084C,0xD8D00620,0xD8D00620,0xB2D00621,0xFEB00662,0xFEB00662,0xFEB00662,0xE8980011,0xE8980011,0xB6AC0131,0xC2980480,0xC2980480,0xAE90007A,0x9A980482,0x180152B,0x180152B,0x180152B,0xFA400621,0xFA400621,0xB2880621,0xE6040481,0xE6040481,0xB4340006,0x9A4C0482,0x45F8152B, -0x45F8152B,0xA6000749,0x94000722,0x8000152B,0xFEE80E01,0xF6FC122C,0x104152C,0xFED40821,0xFEC00309,0xFEA40058,0xFCA0003D,0xD8980001,0xFED00D73,0xFEBC076A,0xEA840481,0xB4340006,0x13FC152B,0x1380480,0x1380480,0x1380480,0x1380480,0xFF0C0028,0xFF0C0028,0xFF0C0028,0xCB080000,0xCB080000,0xB3080001,0x1D00480,0x1D00480,0x1D00480,0xDEB00001,0xDEB00001, -0xB4D80001,0x6DF80480,0x6DF80480,0xB2480001,0x9A000482,0x1D00480,0x1D00480,0x1D00480,0xDEB00001,0xDEB00001,0xB4D80001,0x6DF80480,0x6DF80480,0xB2480001,0x9A000482,0x6DF80480,0x6DF80480,0xB2480001,0x9A000482,0x9A000482,0xFF2002F9,0xFF2C0355,0x1380480,0xFD040202,0xFEDC00AA,0xFEB40008,0xF0B80000,0xDA900000,0xFD100320,0xFEE801D4,0x45FC0480,0xB2480001, -0x45FC0480,0x1A00620,0xFF8002A1,0xFF6C001D,0xEB680001,0x71FC0620,0xFF3800E9,0xED300001,0xB9FC0620,0xEA840001,0xCE000622,0x71FC0620,0xFF3800E9,0xED300001,0xB9FC0620,0xEA840001,0xCE000622,0xB9FC0620,0xEA840001,0xCE000622,0xCE000622,0x71FC0620,0xFF3800E9,0xED300001,0xB9FC0620,0xEA840001,0xCE000622,0xB9FC0620,0xEA840001,0xCE000622,0xCE000622,0xB9FC0620, -0xEA840001,0xCE000622,0xCE000622,0xCE000622,0xFF8C0548,0x1BC0620,0xF79C057D,0xFF6C03F9,0xFF3C02B1,0xFED40092,0xEAC80001,0xEA380001,0xF7880548,0xFF5803DA,0xF4D40000,0xCE000622,0xA7FC0620,0xD00620,0xD00620,0xD00620,0xD00620,0xD00620,0xD00620,0xD00620,0xD00620,0xD00620,0xD00620,0xF6980000,0xF6980000,0xF6980000,0xF6980000,0xF6980000, -0xF6980000,0x9E980000,0x9E980000,0x9E980000,0x82980001,0x1340620,0x1340620,0x1340620,0x1340620,0x1340620,0x1340620,0xB6300001,0xB6300001,0xB6300001,0x84600001,0x1FF80620,0x1FF80620,0x1FF80620,0x7E00006A,0x66000622,0xFAC40424,0xD00620,0xD00620,0xFCB80221,0xFEA800E9,0xFCA00034,0xFCA00034,0xD4980000,0xF8B0039D,0xFEA001E1,0xBC880000,0xB6300001, -0x1B80620,}; -static const uint32_t g_etc1_to_bc7_m6_table180[] = { -0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0x3F80000, -0x3F80000,0x3F80000,0x3F80000,0x54000000,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0x2B40000,0x2B40000,0x2B40000,0xFC0000,0x1680000,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000, -0x1A40000,0x57F80000,0x57F80000,0x57F80000,0x8C000000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x57F80000,0x57F80000,0x57F80000,0x8C000000,0x57F80000,0x57F80000,0x57F80000,0x8C000000,0x8C000000,0x12C0000,0x1180001,0x1180001,0x1480000,0x3600000,0x1800000,0x1800000,0x1D80000,0x1480000,0x3600000,0x29FC0000,0x57F80000, -0x29FC0000,0x1780001,0x1780001,0x1780001,0x1780001,0x37FC0000,0x37FC0000,0x37FC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x37FC0000,0x37FC0000,0x37FC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x9DFC0000,0x9DFC0000,0xBC000000,0xBC000000,0x37FC0000,0x37FC0000,0x37FC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x9DFC0000,0x9DFC0000,0xBC000000,0xBC000000,0x9DFC0000, -0x9DFC0000,0xBC000000,0xBC000000,0xBC000000,0x1B80000,0x1940000,0x1780001,0x9FC0000,0x5DFC0000,0x83FC0000,0x91FC0000,0xA5FC0000,0x3D80000,0x37FC0000,0x83FC0000,0xBC000000,0x83FC0000,0x1E80001,0xE3FC0000,0xF1F80000,0xF4000000,0xE3FC0000,0xF1F80000,0xF4000000,0xF1F80000,0xF4000000,0xF4000000,0xE3FC0000,0xF1F80000,0xF4000000,0xF1F80000,0xF4000000, -0xF4000000,0xF1F80000,0xF4000000,0xF4000000,0xF4000000,0xE3FC0000,0xF1F80000,0xF4000000,0xF1F80000,0xF4000000,0xF4000000,0xF1F80000,0xF4000000,0xF4000000,0xF4000000,0xF1F80000,0xF4000000,0xF4000000,0xF4000000,0xF4000000,0xD1FC0000,0x67FC0000,0x67FC0000,0xE7FC0000,0xEFFC0000,0xF3F80000,0xF4000000,0xF4000000,0xDBFC0000,0xEBFC0000,0xF5DC0000,0xF4000000, -0xEDFC0000,0x1483684,0xFF3024F4,0xFF1C182C,0xF514152B,0xFF181DD4,0xFEFC0D2B,0xFCF00933,0xFEE40CC2,0xEEE00686,0xD8E00C42,0xFF0021E4,0xFED80CD7,0xFCCC0733,0xFEB80823,0xF2AC0012,0xD8B4068A,0xF4A8152C,0xE2980735,0xD0980931,0xBEA8152D,0x3E83680,0xFEB81CE9,0xF4AC152B,0xFE7C1242,0xF8640651,0xD87C0C40,0xFE3817A1,0xF20C0485,0xD618067D,0xBE40152D,0x79FC3680, -0xDC001883,0xCA0011EA,0xB8001BA1,0xA4003680,0xFF3428DF,0xFB4431C4,0xFD4832DC,0xFF081AA2,0xFEE80D2F,0xFEC002B7,0xF8B80082,0xECA00086,0xFF1827ED,0xFEF418C3,0xFC90049F,0xD618067D,0x57FC3680,0x180152B,0xFF680C82,0xFF4C05EB,0xF5480482,0xFF500C19,0xFF2C02AA,0xF924007A,0xFD1C0621,0xEB140131,0xD9180621,0x41FC152B,0xFF080866,0xF4FC0480,0xFECC07B9,0xF2AC0011, -0xD8D00620,0xA1FC152B,0xF4000480,0xD6000631,0xBE00152C,0x41FC152B,0xFF080866,0xF4FC0480,0xFECC07B9,0xF2AC0011,0xD8D00620,0xA1FC152B,0xF4000480,0xD6000631,0xBE00152C,0xA1FC152B,0xF4000480,0xD6000631,0xBE00152C,0xBE00152C,0xFF64110D,0xF57813A3,0xF77C138B,0xFF400C54,0xFF100726,0xFEDC01AD,0xF4D00001,0xF2800038,0xFF5C10F2,0xFF2C0BE1,0xFE7C0484,0xD6000631, -0x89FC152B,0x114152B,0x114152B,0x114152B,0x114152B,0xFEF408D3,0xFEF408D3,0xFEF408D3,0xE0E40622,0xE0E40622,0xBCE00622,0xFED006DB,0xFED006DB,0xFED006DB,0xF2AC000E,0xF2AC000E,0xC0C00132,0xCAAC0481,0xCAAC0481,0xB6A0007D,0xA4A80481,0x398152B,0x398152B,0x398152B,0xFE580629,0xFE580629,0xBC980620,0xEE180481,0xEE180481,0xBC480005,0xA45C0480,0x51FC152B, -0x51FC152B,0xB20006E6,0x9C000690,0x8A00152C,0xFF040E83,0xFF0C1233,0x114152B,0xFEE808EA,0xFED403B6,0xFEBC00B2,0xFEB4004B,0xE0A80002,0xFEE40E19,0xFED007FD,0xF4940480,0xBC480005,0x23FC152B,0x1480482,0x1480482,0x1480482,0x1480482,0xFF24004A,0xFF24004A,0xFF24004A,0xD5180001,0xD5180001,0xBD180001,0x3E80480,0x3E80480,0x3E80480,0xE8C00001,0xE8C00001, -0xBCEC0000,0x79FC0480,0x79FC0480,0xBC540000,0xA4000480,0x3E80480,0x3E80480,0x3E80480,0xE8C00001,0xE8C00001,0xBCEC0000,0x79FC0480,0x79FC0480,0xBC540000,0xA4000480,0x79FC0480,0x79FC0480,0xBC540000,0xA4000480,0xA4000480,0xFB340322,0xFB440372,0x1480482,0xFF180225,0xFEF000E8,0xFED00022,0xF8CC0000,0xE4A00000,0xF9280349,0xFF040208,0x57FC0480,0xBC540000, -0x57FC0480,0x1B00622,0xFF980321,0xFF7C007A,0xF5780001,0x8DFC0620,0xFF580185,0xF5440000,0xC7FC0620,0xF4940000,0xD8000620,0x8DFC0620,0xFF580185,0xF5440000,0xC7FC0620,0xF4940000,0xD8000620,0xC7FC0620,0xF4940000,0xD8000620,0xD8000620,0x8DFC0620,0xFF580185,0xF5440000,0xC7FC0620,0xF4940000,0xD8000620,0xC7FC0620,0xF4940000,0xD8000620,0xD8000620,0xC7FC0620, -0xF4940000,0xD8000620,0xD8000620,0xD8000620,0xF7A4057D,0xFCC0620,0xFFAC0581,0xFF84045D,0xFF500335,0xFF040112,0xF4D40000,0xF4440000,0xFF98054A,0xFD800451,0xFCE80001,0xD8000620,0xB9FC0620,0xE00622,0xE00622,0xE00622,0xE00622,0xE00622,0xE00622,0xE00622,0xE00622,0xE00622,0xE00622,0xFCAC0001,0xFCAC0001,0xFCAC0001,0xFCAC0001,0xFCAC0001, -0xFCAC0001,0xA8A80001,0xA8A80001,0xA8A80001,0x8CA80001,0x1500620,0x1500620,0x1500620,0x1500620,0x1500620,0x1500620,0xC0400001,0xC0400001,0xC0400001,0x8C740000,0x2DF80620,0x2DF80620,0x2DF80620,0x88000041,0x70000620,0xF4D80451,0xE00622,0xE00622,0xFECC0242,0xFEBC0115,0xFEB4004A,0xFEB4004A,0xDCAC0001,0xFEBC03A9,0xFEB40202,0xC49C0000,0xC0400001, -0x1E00620,}; -static const uint32_t g_etc1_to_bc7_m6_table181[] = { -0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0xFF80000, -0xFF80000,0xFF80000,0xFF80000,0x5C000000,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xAC40000,0xAC40000,0xAC40000,0x1140000,0x18C0000,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000, -0x1BC0000,0x61FC0000,0x61FC0000,0x61FC0000,0x94000000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x61FC0000,0x61FC0000,0x61FC0000,0x94000000,0x61FC0000,0x61FC0000,0x61FC0000,0x94000000,0x94000000,0x73C0000,0x1280001,0x1280001,0x5580000,0x3740000,0x1940000,0x1940000,0x1F40000,0x5580000,0x3740000,0x39FC0000,0x61FC0000, -0x39FC0000,0x1880001,0x1880001,0x1880001,0x1880001,0x4FFC0000,0x4FFC0000,0x4FFC0000,0xA9FC0000,0xA9FC0000,0xC4000000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0xA9FC0000,0xA9FC0000,0xC4000000,0xA9FC0000,0xA9FC0000,0xC4000000,0xC4000000,0x4FFC0000,0x4FFC0000,0x4FFC0000,0xA9FC0000,0xA9FC0000,0xC4000000,0xA9FC0000,0xA9FC0000,0xC4000000,0xC4000000,0xA9FC0000, -0xA9FC0000,0xC4000000,0xC4000000,0xC4000000,0x5C80000,0x1A40000,0x1880001,0x27FC0000,0x71FC0000,0x93FC0000,0x9FF80000,0xB1F80000,0x3EC0000,0x4FFC0000,0x93FC0000,0xC4000000,0x93FC0000,0x1F80001,0xFBFC0000,0xFDF80000,0xFC000000,0xFBFC0000,0xFDF80000,0xFC000000,0xFDF80000,0xFC000000,0xFC000000,0xFBFC0000,0xFDF80000,0xFC000000,0xFDF80000,0xFC000000, -0xFC000000,0xFDF80000,0xFC000000,0xFC000000,0xFC000000,0xFBFC0000,0xFDF80000,0xFC000000,0xFDF80000,0xFC000000,0xFC000000,0xFDF80000,0xFC000000,0xFC000000,0xFC000000,0xFDF80000,0xFC000000,0xFC000000,0xFC000000,0xFC000000,0xF7FC0000,0xE7FC0000,0xE7FC0000,0xFBFC0000,0xFDF80000,0xFDF80000,0xFC000000,0xFC000000,0xF9FC0000,0xFBFC0000,0xFDEC0000,0xFC000000, -0xFDF80000,0x1583684,0xFF44261B,0xFF2C1953,0xFD24152B,0xFF241F94,0xFF100F1F,0xFF00095F,0xFEFC0D92,0xF6F00686,0xE0F00C42,0xFF18234C,0xFEF00EDF,0xFEDC0767,0xFECC099A,0xFABC0012,0xE0C4068A,0xFCB8152C,0xEAA80735,0xD8A80931,0xC6B8152D,0x7FC3680,0xFED81E87,0xFCBC152B,0xFE9413F2,0xFE700655,0xE08C0C40,0xFE5818EB,0xFA1C0485,0xDE28067D,0xC650152D,0x85FC3680, -0xE60017F0,0xD0001112,0xBE001AD5,0xAC003680,0xFF3C29CC,0xFF4C3204,0xFF4C3354,0xFF181C7A,0xFEFC0F77,0xFED40452,0xFECC0089,0xF4B00086,0xFF342916,0xFF101AAA,0xFEA404D3,0xDE28067D,0x65FC3680,0x190152B,0xFF740D62,0xFF6406C3,0xFD580482,0xFF680D01,0xFF4003E6,0xFF340081,0xFF2C0641,0xF3240131,0xE1280621,0x59FC152B,0xFF200966,0xFD0C0480,0xFEF00889,0xFABC0011, -0xE0E00620,0xADFC152B,0xFC100480,0xE0000624,0xC600152C,0x59FC152B,0xFF200966,0xFD0C0480,0xFEF00889,0xFABC0011,0xE0E00620,0xADFC152B,0xFC100480,0xE0000624,0xC600152C,0xADFC152B,0xFC100480,0xE0000624,0xC600152C,0xC600152C,0xFF781162,0xFD8813A3,0xFF8C138B,0xFF540D51,0xFF28086E,0xFEF002BB,0xFCE00001,0xFA900038,0xFD7411AE,0xFF480C8E,0xFEA004D1,0xE0000624, -0x99FC152B,0x124152B,0x124152B,0x124152B,0x124152B,0xFF00095B,0xFF00095B,0xFF00095B,0xE8F40622,0xE8F40622,0xC4F00622,0xFEDC0763,0xFEDC0763,0xFEDC0763,0xFABC000E,0xFABC000E,0xC8D00132,0xD2BC0481,0xD2BC0481,0xBEB0007D,0xACB80481,0x3B0152B,0x3B0152B,0x3B0152B,0xFE700651,0xFE700651,0xC4A80620,0xF6280481,0xF6280481,0xC4580005,0xAC6C0480,0x5DFC152B, -0x5DFC152B,0xBE0006A6,0xA6000631,0x9200152C,0xFF100EED,0xF9201286,0x124152B,0xFEF80996,0xFEE40481,0xFECC0139,0xFECC0085,0xE8B80002,0xFEF80E66,0xFEE808E2,0xFCA40480,0xC4580005,0x33FC152B,0x1580482,0x1580482,0x1580482,0x1580482,0xFF34007D,0xFF34007D,0xFF34007D,0xDD280001,0xDD280001,0xC5280001,0x7FC0480,0x7FC0480,0x7FC0480,0xF0D00001,0xF0D00001, -0xC4FC0000,0x85FC0480,0x85FC0480,0xC4640000,0xAC000480,0x7FC0480,0x7FC0480,0x7FC0480,0xF0D00001,0xF0D00001,0xC4FC0000,0x85FC0480,0x85FC0480,0xC4640000,0xAC000480,0x85FC0480,0x85FC0480,0xC4640000,0xAC000480,0xAC000480,0xF7480349,0xFF4C0392,0x1580482,0xFD300265,0xFD140120,0xFEEC003D,0xFCE00001,0xECB00000,0xFF34034D,0xFF18022D,0x65FC0480,0xC4640000, -0x65FC0480,0x1C00622,0xFFB003A9,0xFF9400FA,0xFD880001,0xA5FC0620,0xFF70021D,0xFD540000,0xD3FC0620,0xFCA40000,0xE0000620,0xA5FC0620,0xFF70021D,0xFD540000,0xD3FC0620,0xFCA40000,0xE0000620,0xD3FC0620,0xFCA40000,0xE0000620,0xE0000620,0xA5FC0620,0xFF70021D,0xFD540000,0xD3FC0620,0xFCA40000,0xE0000620,0xD3FC0620,0xFCA40000,0xE0000620,0xE0000620,0xD3FC0620, -0xFCA40000,0xE0000620,0xE0000620,0xE0000620,0xFFB4057D,0x1E00620,0xF9C005B2,0xFF9404B2,0xFF7C03A9,0xFF2C01BA,0xFCE40000,0xFC540000,0xFFAC0581,0xFF940488,0xFF10001D,0xE0000620,0xC7FC0620,0xF00622,0xF00622,0xF00622,0xF00622,0xF00622,0xF00622,0xF00622,0xF00622,0xF00622,0xF00622,0xFEBC0005,0xFEBC0005,0xFEBC0005,0xFEBC0005,0xFEBC0005, -0xFEBC0005,0xB0B80001,0xB0B80001,0xB0B80001,0x94B80001,0x1680620,0x1680620,0x1680620,0x1680620,0x1680620,0x1680620,0xC8500001,0xC8500001,0xC8500001,0x94840000,0x39F80620,0x39F80620,0x39F80620,0x90000020,0x78000620,0xFCE80451,0xF00622,0xF00622,0xFED80271,0xFED0013D,0xFEC4006A,0xFEC4006A,0xE4BC0001,0xFCD403C8,0xFAC80244,0xCCAC0000,0xC8500001, -0x3FC0620,}; -static const uint32_t g_etc1_to_bc7_m6_table182[] = { -0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x1BF80000, -0x1BF80000,0x1BF80000,0x1BF80000,0x64000000,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xD80000,0xD80000,0xD80000,0x12C0000,0x1AC0000,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x1380001,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x3D00000, -0x3D00000,0x6DFC0000,0x6DFC0000,0x6DFC0000,0x9C000000,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x3D00000,0x6DFC0000,0x6DFC0000,0x6DFC0000,0x9C000000,0x6DFC0000,0x6DFC0000,0x6DFC0000,0x9C000000,0x9C000000,0xF4C0000,0x1380001,0x1380001,0x16C0000,0x3880000,0x1AC0000,0x1AC0000,0xFFC0000,0x16C0000,0x3880000,0x47FC0000,0x6DFC0000, -0x47FC0000,0x1980001,0x1980001,0x1980001,0x1980001,0x69FC0000,0x69FC0000,0x69FC0000,0xB5FC0000,0xB5FC0000,0xCC000000,0x69FC0000,0x69FC0000,0x69FC0000,0xB5FC0000,0xB5FC0000,0xCC000000,0xB5FC0000,0xB5FC0000,0xCC000000,0xCC000000,0x69FC0000,0x69FC0000,0x69FC0000,0xB5FC0000,0xB5FC0000,0xCC000000,0xB5FC0000,0xB5FC0000,0xCC000000,0xCC000000,0xB5FC0000, -0xB5FC0000,0xCC000000,0xCC000000,0xCC000000,0x1DC0000,0x3B40000,0x1980001,0x45FC0000,0x85FC0000,0xA1FC0000,0xABFC0000,0xBBFC0000,0xBFC0000,0x69FC0000,0xA1FC0000,0xCC000000,0xA1FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x16832DC,0xFF50247F,0xFF381933,0xFF34152B,0xFF3C1E14,0xFF200F63,0xFF1809EB,0xFF080CD6,0xF9000656,0xE5000B06,0xFF24213C,0xFF040F22,0xFEF407FB,0xFEE40906,0xFED00012,0xE8D4055E,0xFECC12C5,0xEEBC0679,0xDCBC0799,0xCCC812C5,0x19FC32DC,0xFEE41DCF,0xFED0152B,0xFEB81322,0xFE940691,0xE4A00B04,0xFE701733,0xFE380481,0xE4400525,0xCC6412C4,0x8FF832DC, -0xF200169C,0xDC000E76,0xC4001765,0xB20032DC,0xFF502793,0xF9602F06,0xFB642FFB,0xFF2C1B7E,0xFF0C0F8A,0xFEE804E2,0xFEDC00E2,0xFAC4003B,0xFF3C26EC,0xFF1819CA,0xFEB804ED,0xE4400525,0x71FC32DC,0x19C12C3,0xFF800C56,0xFF7006A3,0xFF680482,0xFF740B9D,0xFF5803DE,0xFF4400A9,0xFF400533,0xF73400C2,0xE53804E5,0x6BFC12C3,0xFF3808E2,0xFF200482,0xFF080785,0xFCD80005, -0xE4F404E4,0xB7F812C3,0xFE380480,0xE41404E4,0xCC0012C4,0x6BFC12C3,0xFF3808E2,0xFF200482,0xFF080785,0xFCD80005,0xE4F404E4,0xB7F812C3,0xFE380480,0xE41404E4,0xCC0012C4,0xB7F812C3,0xFE380480,0xE41404E4,0xCC0012C4,0xCC0012C4,0xFF800FC5,0xFF8C1193,0xF59811A2,0xFF6C0C12,0xFF4807DA,0xFF1002DD,0xFEF8000D,0xFCB00018,0xFF780FAA,0xFF5C0B8A,0xFED004E3,0xE41404E4, -0xA3FC12C3,0x134152B,0x134152B,0x134152B,0x134152B,0xFF1809EB,0xFF1809EB,0xFF1809EB,0xF1040622,0xF1040622,0xCD000622,0xFEF407FB,0xFEF407FB,0xFEF407FB,0xFED00012,0xFED00012,0xD0E00132,0xDACC0481,0xDACC0481,0xC6C0007D,0xB4C80481,0x1C8152B,0x1C8152B,0x1C8152B,0xFE940691,0xFE940691,0xCCB80620,0xFE380481,0xFE380481,0xCC680005,0xB47C0480,0x69FC152B, -0x69FC152B,0xC4000672,0xAC0005E1,0x9A00152C,0xFF200F46,0xFF2C1292,0x134152B,0xFF100A32,0xFEF40542,0xFEE001CB,0xFEDC00E2,0xF0C80002,0xFF140ED2,0xFEF4097A,0xFEB80489,0xCC680005,0x41FC152B,0x1680482,0x1680482,0x1680482,0x1680482,0xFF4400A9,0xFF4400A9,0xFF4400A9,0xE5380001,0xE5380001,0xCD380001,0x1FFC0480,0x1FFC0480,0x1FFC0480,0xF8E00001,0xF8E00001, -0xCD0C0000,0x91FC0480,0x91FC0480,0xCC740000,0xB4000480,0x1FFC0480,0x1FFC0480,0x1FFC0480,0xF8E00001,0xF8E00001,0xCD0C0000,0x91FC0480,0x91FC0480,0xCC740000,0xB4000480,0x91FC0480,0x91FC0480,0xCC740000,0xB4000480,0xB4000480,0xFF580349,0xFB64039D,0x1680482,0xFF440288,0xFD280154,0xFEFC006D,0xFEF8000D,0xF4C00000,0xFD4C0372,0xFF300269,0x75FC0480,0xCC740000, -0x75FC0480,0x1CC04E2,0xFFBC0305,0xFFA000EA,0xFF980001,0xB5FC04E2,0xFF8801CD,0xFF680000,0xDBF804E2,0xFECC0000,0xE40004E4,0xB5FC04E2,0xFF8801CD,0xFF680000,0xDBF804E2,0xFECC0000,0xE40004E4,0xDBF804E2,0xFECC0000,0xE40004E4,0xE40004E4,0xB5FC04E2,0xFF8801CD,0xFF680000,0xDBF804E2,0xFECC0000,0xE40004E4,0xDBF804E2,0xFECC0000,0xE40004E4,0xE40004E4,0xDBF804E2, -0xFECC0000,0xE40004E4,0xE40004E4,0xE40004E4,0xF9C40480,0xDE804E2,0xFDC80482,0xFFB003CA,0xFF900305,0xFF480175,0xFF080000,0xFE840000,0xF3C40480,0xFFA003B5,0xFF300028,0xE40004E4,0xD1FC04E2,0x1000622,0x1000622,0x1000622,0x1000622,0x1000622,0x1000622,0x1000622,0x1000622,0x1000622,0x1000622,0xFED00012,0xFED00012,0xFED00012,0xFED00012,0xFED00012, -0xFED00012,0xB8C80001,0xB8C80001,0xB8C80001,0x9CC80001,0x1800620,0x1800620,0x1800620,0x1800620,0x1800620,0x1800620,0xD0600001,0xD0600001,0xD0600001,0x9C940000,0x45F80620,0x45F80620,0x45F80620,0x9A00000D,0x80000620,0xF4F80482,0x1000622,0x1000622,0xFEE8029A,0xFEE4016D,0xFEDC0091,0xFEDC0091,0xECCC0001,0xF8E803F5,0xFED80269,0xD4BC0000,0xD0600001, -0x13FC0620,}; -static const uint32_t g_etc1_to_bc7_m6_table183[] = { -0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x27F80000, -0x27F80000,0x27F80000,0x27F80000,0x6C000000,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xE80000,0xE80000,0xE80000,0x1440000,0x1D00000,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000, -0x3E80000,0x79FC0000,0x79FC0000,0x79FC0000,0xA4000000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x79FC0000,0x79FC0000,0x79FC0000,0xA4000000,0x79FC0000,0x79FC0000,0x79FC0000,0xA4000000,0xA4000000,0x1600000,0x1480001,0x1480001,0x1800000,0x39C0000,0x1C00000,0x1C00000,0x23FC0000,0x1800000,0x39C0000,0x57FC0000,0x79FC0000, -0x57FC0000,0x1A80001,0x1A80001,0x1A80001,0x1A80001,0x81FC0000,0x81FC0000,0x81FC0000,0xC1FC0000,0xC1FC0000,0xD4000000,0x81FC0000,0x81FC0000,0x81FC0000,0xC1FC0000,0xC1FC0000,0xD4000000,0xC1FC0000,0xC1FC0000,0xD4000000,0xD4000000,0x81FC0000,0x81FC0000,0x81FC0000,0xC1FC0000,0xC1FC0000,0xD4000000,0xC1FC0000,0xC1FC0000,0xD4000000,0xD4000000,0xC1FC0000, -0xC1FC0000,0xD4000000,0xD4000000,0xD4000000,0x1F00000,0xBC40000,0x1A80001,0x63FC0000,0x99FC0000,0xB1FC0000,0xB9FC0000,0xC7F40000,0x33FC0000,0x81FC0000,0xB1FC0000,0xD4000000,0xB1FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1742E54,0xFF5C2227,0xFF4C189C,0xFF44152B,0xFF441B96,0xFF2C0F3F,0xFF240A83,0xFF200BB6,0xFD10062A,0xE9100996,0xFF301E64,0xFF140EBF,0xFF0C08B3,0xFEF0080E,0xFEE40042,0xEAE40406,0xFEE00FFC,0xF2D005A9,0xE2C805B5,0xD2D80FD9,0x2BFC2E54,0xFEFC1C6F,0xFEE8152B,0xFECC11E6,0xFEAC06E9,0xE8B40994,0xFE9414BB,0xFE5804A2,0xE854039D,0xD2740FD9,0x97FC2E54, -0xF8001590,0xE0000BBC,0xCA001371,0xB8002E54,0xFF58245E,0xFF6C2AA6,0xFF6C2BC3,0xFF4019B9,0xFF1C0EF6,0xFEFC0522,0xFEF00173,0xFCD8000A,0xFF5023BA,0xFF30184B,0xFED00513,0xE854039D,0x7DF82E54,0x1A40FDB,0xFF8C0AC6,0xFF7C063B,0xFF780482,0xFF8009BD,0xFF64037E,0xFF5C00E1,0xFF5803EB,0xF944005E,0xE9480375,0x7BFC0FD8,0xFF4C0811,0xFF380482,0xFF200615,0xFEF00005, -0xE9080374,0xBFF80FD8,0xFE6C0480,0xE8340374,0xD2000FD8,0x7BFC0FD8,0xFF4C0811,0xFF380482,0xFF200615,0xFEF00005,0xE9080374,0xBFF80FD8,0xFE6C0480,0xE8340374,0xD2000FD8,0xBFF80FD8,0xFE6C0480,0xE8340374,0xD2000FD8,0xD2000FD8,0xFF940D34,0xF9A00ECD,0xFBA40ED6,0xFF780A71,0xFF5C06D6,0xFF2C0293,0xFF10002D,0xFECC0002,0xFF880D43,0xFF680A04,0xFEE404D1,0xE8340374, -0xADFC0FD8,0x144152B,0x144152B,0x144152B,0x144152B,0xFF240A83,0xFF240A83,0xFF240A83,0xF9140622,0xF9140622,0xD5100622,0xFF0C08B3,0xFF0C08B3,0xFF0C08B3,0xFEE40042,0xFEE40042,0xD8F00132,0xE2DC0481,0xE2DC0481,0xCED0007D,0xBCD80481,0x1E0152B,0x1E0152B,0x1E0152B,0xFEAC06E9,0xFEAC06E9,0xD4C80620,0xFE5804A2,0xFE5804A2,0xD4780005,0xBC8C0480,0x75FC152B, -0x75FC152B,0xD0000642,0xB8000581,0xA200152C,0xFF2C0FD1,0xF94012E3,0x144152B,0xFF180AF6,0xFF080612,0xFEF8026E,0xFEF00173,0xF8D80002,0xFF200F41,0xFF100A41,0xFED004C2,0xD4780005,0x51FC152B,0x1780482,0x1780482,0x1780482,0x1780482,0xFF5C00E1,0xFF5C00E1,0xFF5C00E1,0xED480001,0xED480001,0xD5480001,0x37FC0480,0x37FC0480,0x37FC0480,0xFCF80005,0xFCF80005, -0xD51C0000,0x9DFC0480,0x9DFC0480,0xD4840000,0xBC000480,0x37FC0480,0x37FC0480,0x37FC0480,0xFCF80005,0xFCF80005,0xD51C0000,0x9DFC0480,0x9DFC0480,0xD4840000,0xBC000480,0x9DFC0480,0x9DFC0480,0xD4840000,0xBC000480,0xBC000480,0xFB6C0372,0xFF6C03C5,0x1780482,0xFD5802AD,0xFF400188,0xFF2000A4,0xFF10002D,0xFCD00000,0xF564039D,0xFF48028A,0x83FC0480,0xD4840000, -0x83FC0480,0x1D40372,0xFFC40212,0xFFB400A9,0xFFA80001,0xC1FC0372,0xFF940145,0xFF800000,0xE1F80372,0xFEFC0000,0xE8000374,0xC1FC0372,0xFF940145,0xFF800000,0xE1F80372,0xFEFC0000,0xE8000374,0xE1F80372,0xFEFC0000,0xE8000374,0xE8000374,0xC1FC0372,0xFF940145,0xFF800000,0xE1F80372,0xFEFC0000,0xE8000374,0xE1F80372,0xFEFC0000,0xE8000374,0xE8000374,0xE1F80372, -0xFEFC0000,0xE8000374,0xE8000374,0xE8000374,0xFDCC0320,0x1F40372,0xFFCC0332,0xFFC002AD,0xFFA80221,0xFF5C0110,0xFF300000,0xFEC00000,0xF7CC0320,0xFFAC02A8,0xFF50001A,0xE8000374,0xD9FC0372,0x1100622,0x1100622,0x1100622,0x1100622,0x1100622,0x1100622,0x1100622,0x1100622,0x1100622,0x1100622,0xFEE0002D,0xFEE0002D,0xFEE0002D,0xFEE0002D,0xFEE0002D, -0xFEE0002D,0xC0D80001,0xC0D80001,0xC0D80001,0xA4D80001,0x1980620,0x1980620,0x1980620,0x1980620,0x1980620,0x1980620,0xD8700001,0xD8700001,0xD8700001,0xA4A40000,0x51F80620,0x51F80620,0x51F80620,0xA4000004,0x88000620,0xFD080482,0x1100622,0x1100622,0xFD0002D2,0xFEF801A5,0xFEE800C1,0xFEE800C1,0xF4DC0001,0xFEF403F9,0xFEE402A8,0xDCCC0000,0xD8700001, -0x21FC0620,}; -static const uint32_t g_etc1_to_bc7_m6_table184[] = { -0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x33FC0000, -0x33FC0000,0x33FC0000,0x33FC0000,0x74000001,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xFC0000,0xFC0000,0xFC0000,0x35C0000,0x1F40000,0x15C0000,0x15C0000,0x15C0000,0x15C0000,0x15C0000,0x15C0000,0x15C0000,0x15C0000,0x15C0000,0x15C0000,0x9FC0000,0x9FC0000,0x9FC0000,0x9FC0000,0x9FC0000, -0x9FC0000,0x87FC0000,0x87FC0000,0x87FC0000,0xAC000001,0x9FC0000,0x9FC0000,0x9FC0000,0x9FC0000,0x9FC0000,0x9FC0000,0x87FC0000,0x87FC0000,0x87FC0000,0xAC000001,0x87FC0000,0x87FC0000,0x87FC0000,0xAC000001,0xAC000001,0x1740000,0x15C0000,0x15C0000,0x1940000,0x1B40000,0x1D80000,0x1D80000,0x39FC0000,0x1940000,0x1B40000,0x67FC0000,0x87FC0000, -0x67FC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0xCFF80000,0xCFF80000,0xDC000001,0x9BFC0000,0x9BFC0000,0x9BFC0000,0xCFF80000,0xCFF80000,0xDC000001,0xCFF80000,0xCFF80000,0xDC000001,0xDC000001,0x9BFC0000,0x9BFC0000,0x9BFC0000,0xCFF80000,0xCFF80000,0xDC000001,0xCFF80000,0xCFF80000,0xDC000001,0xDC000001,0xCFF80000, -0xCFF80000,0xDC000001,0xDC000001,0xDC000001,0x17FC0000,0x5D80000,0x1BC0000,0x85FC0000,0xAFFC0000,0xC1FC0000,0xC9F80000,0xD3F80000,0x5FFC0000,0x9BFC0000,0xC1FC0000,0xDC000001,0xC1FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x18029AD,0xFF681FD0,0xFF581805,0xFF58152C,0xFF5C195D,0xFF400F2A,0xFF3C0B40,0xFF2C0AC5,0xFF240629,0xEF240841,0xFF441B46,0xFF2C0E8C,0xFF180984,0xFF080739,0xFEFC00B5,0xF0FC02D1,0xFEF80D65,0xF6E40502,0xE8E003EA,0xD8EC0CF6,0x41FC29AD,0xFF141B10,0xFF04152B,0xFEE41095,0xFECC0789,0xEEC80841,0xFEAC126A,0xFE7C04F9,0xEC740236,0xD88C0CF6,0xA1FC29AD, -0xFE00152B,0xE6000953,0xD4000F97,0xC00029AF,0xFF64212B,0xF77C26D9,0xF98027C5,0xFF5417DD,0xFF300E79,0xFF1805B7,0xFF080236,0xFEEC0009,0xFF5C209B,0xFF40169E,0xFEF0056C,0xEC740236,0x89FC29AD,0x1B00CF9,0xFFA40924,0xFF9405C1,0xFF8C0480,0xFF8C07FD,0xFF7C0332,0xFF740139,0xFF6402C1,0xFD580018,0xEF5C0221,0x8DFC0CF6,0xFF640723,0xFF540480,0xFF3804C5,0xFF14001D, -0xEF1C0221,0xC7FC0CF6,0xFEA00480,0xEE500221,0xD8000CF6,0x8DFC0CF6,0xFF640723,0xFF540480,0xFF3804C5,0xFF14001D,0xEF1C0221,0xC7FC0CF6,0xFEA00480,0xEE500221,0xD8000CF6,0xC7FC0CF6,0xFEA00480,0xEE500221,0xD8000CF6,0xD8000CF6,0xFFA00AE3,0xFFAC0BFD,0xFFAC0C24,0xFF9408AE,0xFF6805CD,0xFF48026C,0xFF280068,0xFEF40008,0xFF980AF2,0xFF840840,0xFF1004B3,0xEE500221, -0xB9FC0CF6,0x158152C,0x158152C,0x158152C,0x158152C,0xFF3C0B40,0xFF3C0B40,0xFF3C0B40,0xFF240629,0xFF240629,0xDD240621,0xFF180984,0xFF180984,0xFF180984,0xFEFC00B5,0xFEFC00B5,0xE1000131,0xECEC0480,0xECEC0480,0xD8E4007A,0xC4EC0482,0x1FC152B,0x1FC152B,0x1FC152B,0xFECC0789,0xFECC0789,0xDCDC0621,0xFE7C04F9,0xFE7C04F9,0xDE880006,0xC4A00482,0x83F8152B, -0x83F8152B,0xDC000629,0xC200053B,0xAA00152B,0xFD48103D,0xFF4C1304,0x158152C,0xFF340BC5,0xFF1C0715,0xFF100379,0xFF080236,0xFEEC0009,0xFF340FA9,0xFF240B2D,0xFEF0052C,0xDE880006,0x61FC152B,0x18C0480,0x18C0480,0x18C0480,0x18C0480,0xFF740139,0xFF740139,0xFF740139,0xF55C0000,0xF55C0000,0xDD5C0001,0x53FC0480,0x53FC0480,0x53FC0480,0xFF14001D,0xFF14001D, -0xDF2C0001,0xABF80480,0xABF80480,0xDC9C0001,0xC4000482,0x53FC0480,0x53FC0480,0x53FC0480,0xFF14001D,0xFF14001D,0xDF2C0001,0xABF80480,0xABF80480,0xDC9C0001,0xC4000482,0xABF80480,0xABF80480,0xDC9C0001,0xC4000482,0xC4000482,0xF780039D,0xFD8803C8,0x18C0480,0xFB7002F9,0xFF5401CA,0xFF3800E9,0xFF280068,0xFEF40008,0xFD74039D,0xFF5C02B9,0x95FC0480,0xDC9C0001, -0x95FC0480,0x1DC0221,0xFFD00145,0xFFC00068,0xFFBC0000,0xCDFC0221,0xFFAC00C2,0xFF9C0000,0xE7F80221,0xFF340000,0xEE000221,0xCDFC0221,0xFFAC00C2,0xFF9C0000,0xE7F80221,0xFF340000,0xEE000221,0xE7F80221,0xFF340000,0xEE000221,0xEE000221,0xCDFC0221,0xFFAC00C2,0xFF9C0000,0xE7F80221,0xFF340000,0xEE000221,0xE7F80221,0xFF340000,0xEE000221,0xEE000221,0xE7F80221, -0xFF340000,0xEE000221,0xEE000221,0xEE000221,0xFFD001ED,0x1FC0221,0xF7DC0200,0xFFC001A8,0xFFA80152,0xFF8400A0,0xFF5C0000,0xFF040000,0xFBD401E1,0xFFC00190,0xFF700010,0xEE000221,0xDFFC0221,0x1240620,0x1240620,0x1240620,0x1240620,0x1240620,0x1240620,0x1240620,0x1240620,0x1240620,0x1240620,0xFEF40050,0xFEF40050,0xFEF40050,0xFEF40050,0xFEF40050, -0xFEF40050,0xC8EC0000,0xC8EC0000,0xC8EC0000,0xACEC0001,0x3B00620,0x3B00620,0x3B00620,0x3B00620,0x3B00620,0x3B00620,0xE0840001,0xE0840001,0xE0840001,0xAEB40001,0x5DFC0620,0x5DFC0620,0x5DFC0620,0xAC080001,0x90000622,0xF71C04B1,0x1240620,0x1240620,0xFF1002FD,0xFF0C01E1,0xFF0000F4,0xFF0000F4,0xFEEC0000,0xFD0C0422,0xFF0002D4,0xE6DC0000,0xE0840001, -0x33FC0620,}; -static const uint32_t g_etc1_to_bc7_m6_table185[] = { -0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0x3740000,0x3740000,0x3740000,0x3740000,0x3740000,0x3740000,0x3740000,0x3740000,0x3740000,0x3740000,0x3FFC0000, -0x3FFC0000,0x3FFC0000,0x3FFC0000,0x7C000001,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0x10C0000,0x10C0000,0x10C0000,0x3740000,0xDFC0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x23FC0000,0x23FC0000,0x23FC0000,0x23FC0000,0x23FC0000, -0x23FC0000,0x93FC0000,0x93FC0000,0x93FC0000,0xB4000001,0x23FC0000,0x23FC0000,0x23FC0000,0x23FC0000,0x23FC0000,0x23FC0000,0x93FC0000,0x93FC0000,0x93FC0000,0xB4000001,0x93FC0000,0x93FC0000,0x93FC0000,0xB4000001,0xB4000001,0x1840000,0x16C0000,0x16C0000,0x1A80000,0x1C80000,0x1F00000,0x1F00000,0x4DFC0000,0x1A80000,0x1C80000,0x77FC0000,0x93FC0000, -0x77FC0000,0x1CC0000,0x1CC0000,0x1CC0000,0x1CC0000,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xDBF80000,0xDBF80000,0xE4000001,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xDBF80000,0xDBF80000,0xE4000001,0xDBF80000,0xDBF80000,0xE4000001,0xE4000001,0xB5FC0000,0xB5FC0000,0xB5FC0000,0xDBF80000,0xDBF80000,0xE4000001,0xDBF80000,0xDBF80000,0xE4000001,0xE4000001,0xDBF80000, -0xDBF80000,0xE4000001,0xE4000001,0xE4000001,0x51FC0000,0xDE80000,0x1CC0000,0xA3FC0000,0xC3FC0000,0xD1FC0000,0xD5FC0000,0xDDFC0000,0x87FC0000,0xB5FC0000,0xD1FC0000,0xE4000001,0xD1FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x18C25F1,0xFF801DF0,0xFF70177D,0xFF68152C,0xFF68179D,0xFF540F30,0xFF500BE9,0xFF400A4D,0xFF380659,0xF3340759,0xFF5C1916,0xFF3C0E80,0xFF300A44,0xFF2006D1,0xFF100159,0xF30C0209,0xFF080B86,0xFAF804A6,0xECF402A6,0xDEFC0AC2,0x53FC25F1,0xFF2C19F0,0xFF1C152B,0xFEFC0FAD,0xFEE40821,0xF2DC0759,0xFECC10EA,0xFE940579,0xF0880142,0xDE9C0AC2,0xABF825F1, -0xFE2C152B,0xEC0007C7,0xDA000C9B,0xC60025F3,0xFF781EA2,0xFD88233D,0xFF8C2439,0xFF5C168D,0xFF440E39,0xFF240649,0xFF1C031D,0xFF040051,0xFF701E24,0xFF501536,0xFEFC05F2,0xF0880142,0x95FC25F1,0x1BC0AC1,0xFFB007EC,0xFFA00569,0xFF9C0480,0xFFA406AD,0xFF88030E,0xFF800185,0xFF7C0209,0xFF6C0001,0xF36C0139,0x9BFC0AC1,0xFF7C066B,0xFF6C0480,0xFF5803FE,0xFF2C0055, -0xF3300139,0xCFF80AC1,0xFED40480,0xF2700139,0xDE000AC2,0x9BFC0AC1,0xFF7C066B,0xFF6C0480,0xFF5803FE,0xFF2C0055,0xF3300139,0xCFF80AC1,0xFED40480,0xF2700139,0xDE000AC2,0xCFF80AC1,0xFED40480,0xF2700139,0xDE000AC2,0xDE000AC2,0xFFB4091E,0xF5B80A25,0xF7BC0A41,0xFF98075D,0xFF7C0521,0xFF5C026D,0xFF4800B4,0xFF0C0032,0xFFA4093E,0xFF900722,0xFF3004A6,0xF2700139, -0xC1FC0AC1,0x168152C,0x168152C,0x168152C,0x168152C,0xFF500BE9,0xFF500BE9,0xFF500BE9,0xFF380659,0xFF380659,0xE5340621,0xFF300A44,0xFF300A44,0xFF300A44,0xFF100159,0xFF100159,0xE9100131,0xF4FC0480,0xF4FC0480,0xE0F4007A,0xCCFC0482,0x19FC152B,0x19FC152B,0x19FC152B,0xFEE40821,0xFEE40821,0xE4EC0621,0xFE940579,0xFE940579,0xE6980006,0xCCB00482,0x8FF8152B, -0x8FF8152B,0xE4000621,0xCA0004F6,0xB200152B,0xFF5810A1,0xFB641341,0x168152C,0xFF440C7A,0xFF300805,0xFF240465,0xFF1C031D,0xFF040051,0xFF501035,0xFF300BE1,0xFEFC05B2,0xE6980006,0x71FC152B,0x19C0480,0x19C0480,0x19C0480,0x19C0480,0xFF800185,0xFF800185,0xFF800185,0xFD6C0000,0xFD6C0000,0xE56C0001,0x6BFC0480,0x6BFC0480,0x6BFC0480,0xFF2C0055,0xFF2C0055, -0xE73C0001,0xB7F80480,0xB7F80480,0xE4AC0001,0xCC000482,0x6BFC0480,0x6BFC0480,0x6BFC0480,0xFF2C0055,0xFF2C0055,0xE73C0001,0xB7F80480,0xB7F80480,0xE4AC0001,0xCC000482,0xB7F80480,0xB7F80480,0xE4AC0001,0xCC000482,0xCC000482,0xFF90039D,0xF59803F5,0x19C0480,0xFD840320,0xFF700221,0xFF500151,0xFF4800B4,0xFF0C0032,0xFD8803C8,0xFF7402FD,0xA3FC0480,0xE4AC0001, -0xA3FC0480,0x1E40139,0xFFDC00B9,0xFFCC0040,0xFFCC0000,0xD9FC0139,0xFFC00074,0xFFB40000,0xEDF80139,0xFF640000,0xF2000139,0xD9FC0139,0xFFC00074,0xFFB40000,0xEDF80139,0xFF640000,0xF2000139,0xEDF80139,0xFF640000,0xF2000139,0xF2000139,0xD9FC0139,0xFFC00074,0xFFB40000,0xEDF80139,0xFF640000,0xF2000139,0xEDF80139,0xFF640000,0xF2000139,0xF2000139,0xEDF80139, -0xFF640000,0xF2000139,0xF2000139,0xF2000139,0xF5E40120,0x37FC0139,0xFBE40120,0xFFD800F2,0xFFBC00C2,0xFFA40061,0xFF840000,0xFF400000,0xFFDC0109,0xFFCC00E9,0xFF940009,0xF2000139,0xE7FC0139,0x1340620,0x1340620,0x1340620,0x1340620,0x1340620,0x1340620,0x1340620,0x1340620,0x1340620,0x1340620,0xFF0C0080,0xFF0C0080,0xFF0C0080,0xFF0C0080,0xFF0C0080, -0xFF0C0080,0xD0FC0000,0xD0FC0000,0xD0FC0000,0xB4FC0001,0x1C80620,0x1C80620,0x1C80620,0x1C80620,0x1C80620,0x1C80620,0xE8940001,0xE8940001,0xE8940001,0xB6C40001,0x69FC0620,0x69FC0620,0x69FC0620,0xB4180001,0x98000622,0xFF2C04B1,0x1340620,0x1340620,0xFF200328,0xFF140220,0xFF100128,0xFF100128,0xFF000008,0xFD1C0451,0xFF1402F9,0xEEEC0000,0xE8940001, -0x41FC0620,}; -static const uint32_t g_etc1_to_bc7_m6_table186[] = { -0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x4BFC0000, -0x4BFC0000,0x4BFC0000,0x4BFC0000,0x84000001,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x71C0000,0x71C0000,0x71C0000,0x38C0000,0x1DF80000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x17C0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000, -0x3BFC0000,0x9FF80000,0x9FF80000,0x9FF80000,0xBC000001,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x9FF80000,0x9FF80000,0x9FF80000,0xBC000001,0x9FF80000,0x9FF80000,0x9FF80000,0xBC000001,0xBC000001,0x3940000,0x17C0000,0x17C0000,0x5B80000,0x1DC0000,0xDFC0000,0xDFC0000,0x61FC0000,0x5B80000,0x1DC0000,0x85FC0000,0x9FF80000, -0x85FC0000,0x1DC0000,0x1DC0000,0x1DC0000,0x1DC0000,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xE7F80000,0xE7F80000,0xEC000001,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xE7F80000,0xE7F80000,0xEC000001,0xE7F80000,0xE7F80000,0xEC000001,0xEC000001,0xCDFC0000,0xCDFC0000,0xCDFC0000,0xE7F80000,0xE7F80000,0xEC000001,0xE7F80000,0xE7F80000,0xEC000001,0xEC000001,0xE7F80000, -0xE7F80000,0xEC000001,0xEC000001,0xEC000001,0x89FC0000,0x1FC0000,0x1DC0000,0xC1FC0000,0xD7FC0000,0xDFFC0000,0xE3FC0000,0xE9F40000,0xADFC0000,0xCDFC0000,0xDFFC0000,0xEC000001,0xDFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1982295,0xFF8C1C24,0xFF7C16F9,0xFF78152C,0xFF74162D,0xFF640F5A,0xFF5C0C99,0xFF4C0A19,0xFF4C06D0,0xF74406B1,0xFF68172A,0xFF500E8F,0xFF440B02,0xFF2C06C9,0xFF200231,0xF7200185,0xFF200A26,0xFF0C0482,0xF30001AE,0xE30C08E2,0x65FC2295,0xFF401914,0xFF34152B,0xFF140F05,0xFEFC08D9,0xF6F006B1,0xFEE40F9A,0xFEB80619,0xF49C0096,0xE2B008E2,0xB3FC2295, -0xFE60152B,0xF40006C7,0xE0000A17,0xCC002297,0xFF801C65,0xFF8C2049,0xF598216C,0xFF701581,0xFF540E23,0xFF3806FD,0xFF340421,0xFF1400E4,0xFF781BC6,0xFF5C147B,0xFF1806AC,0xF49C0096,0x9FFC2295,0x1C808E1,0xFFBC06E4,0xFFAC0529,0xFFAC0480,0xFFB00599,0xFF9C0300,0xFF9801E5,0xFF880191,0xFF7C0010,0xF77C0091,0xAFFC08E1,0xFF9405D3,0xFF840480,0xFF700356,0xFF4C00A9, -0xF7440091,0xD7FC08E1,0xFF040480,0xF6900091,0xE20008E2,0xAFFC08E1,0xFF9405D3,0xFF840480,0xFF700356,0xFF4C00A9,0xF7440091,0xD7FC08E1,0xFF040480,0xF6900091,0xE20008E2,0xD7FC08E1,0xFF040480,0xF6900091,0xE20008E2,0xE20008E2,0xFFBC07A1,0xFBC4084D,0xFBC40879,0xFFB00662,0xFF90049D,0xFF740269,0xFF640104,0xFF300074,0xFFB407AA,0xFFA40632,0xFF500499,0xF6900091, -0xCDFC08E1,0x178152C,0x178152C,0x178152C,0x178152C,0xFF5C0C99,0xFF5C0C99,0xFF5C0C99,0xFF4C06D0,0xFF4C06D0,0xED440621,0xFF440B02,0xFF440B02,0xFF440B02,0xFF200231,0xFF200231,0xF1200131,0xFD0C0480,0xFD0C0480,0xE904007A,0xD50C0482,0x31FC152B,0x31FC152B,0x31FC152B,0xFEFC08D9,0xFEFC08D9,0xECFC0621,0xFEB80619,0xFEB80619,0xEEA80006,0xD4C00482,0x9BF8152B, -0x9BF8152B,0xEC100621,0xD20004D2,0xBA00152B,0xFF6810FE,0xFF6C1369,0x178152C,0xFF540D39,0xFF440905,0xFF340565,0xFF340421,0xFF1400E4,0xFF5810BE,0xFF500C92,0xFF18067B,0xEEA80006,0x7FFC152B,0x1AC0480,0x1AC0480,0x1AC0480,0x1AC0480,0xFF9801E5,0xFF9801E5,0xFF9801E5,0xFF7C0010,0xFF7C0010,0xED7C0001,0x83FC0480,0x83FC0480,0x83FC0480,0xFF4C00A9,0xFF4C00A9, -0xEF4C0001,0xC3F80480,0xC3F80480,0xECBC0001,0xD4000482,0x83FC0480,0x83FC0480,0x83FC0480,0xFF4C00A9,0xFF4C00A9,0xEF4C0001,0xC3F80480,0xC3F80480,0xECBC0001,0xD4000482,0xC3F80480,0xC3F80480,0xECBC0001,0xD4000482,0xD4000482,0xFFA003CA,0xFDA803F5,0x1AC0480,0xFF980349,0xFF840269,0xFF6C019A,0xFF640104,0xFF300074,0xFF9403DA,0xFF840340,0xB3FC0480,0xECBC0001, -0xB3FC0480,0x1EC0091,0xFFE80055,0xFFE0001D,0xFFDC0000,0xE5FC0091,0xFFD80034,0xFFCC0000,0xF3F80091,0xFF940000,0xF6000091,0xE5FC0091,0xFFD80034,0xFFCC0000,0xF3F80091,0xFF940000,0xF6000091,0xF3F80091,0xFF940000,0xF6000091,0xF6000091,0xE5FC0091,0xFFD80034,0xFFCC0000,0xF3F80091,0xFF940000,0xF6000091,0xF3F80091,0xFF940000,0xF6000091,0xF6000091,0xF3F80091, -0xFF940000,0xF6000091,0xF6000091,0xF6000091,0xF9EC0080,0x77FC0091,0xFFEC0080,0xFDE80071,0xFFD0005A,0xFFC00028,0xFFA80000,0xFF7C0000,0xFFE80080,0xFFE00071,0xFFB40004,0xF6000091,0xEFFC0091,0x1440620,0x1440620,0x1440620,0x1440620,0x1440620,0x1440620,0x1440620,0x1440620,0x1440620,0x1440620,0xFF1800B4,0xFF1800B4,0xFF1800B4,0xFF1800B4,0xFF1800B4, -0xFF1800B4,0xD90C0000,0xD90C0000,0xD90C0000,0xBD0C0001,0x1E00620,0x1E00620,0x1E00620,0x1E00620,0x1E00620,0x1E00620,0xF0A40001,0xF0A40001,0xF0A40001,0xBED40001,0x75FC0620,0x75FC0620,0x75FC0620,0xBC280001,0xA0000622,0xF73C04E4,0x1440620,0x1440620,0xFF2C0371,0xFF280254,0xFF240171,0xFF240171,0xFF140020,0xF9300480,0xFF200332,0xF6FC0000,0xF0A40001, -0x51FC0620,}; -static const uint32_t g_etc1_to_bc7_m6_table187[] = { -0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x57FC0000, -0x57FC0000,0x57FC0000,0x57FC0000,0x8C000001,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0xF2C0000,0xF2C0000,0xF2C0000,0x3A40000,0x2BFC0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000, -0x53FC0000,0xABF80000,0xABF80000,0xABF80000,0xC4000001,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0xABF80000,0xABF80000,0xABF80000,0xC4000001,0xABF80000,0xABF80000,0xABF80000,0xC4000001,0xC4000001,0xBA40000,0x18C0000,0x18C0000,0x1CC0000,0x1F00000,0x2BFC0000,0x2BFC0000,0x73FC0000,0x1CC0000,0x1F00000,0x95FC0000,0xABF80000, -0x95FC0000,0x1EC0000,0x1EC0000,0x1EC0000,0x1EC0000,0xE5FC0000,0xE5FC0000,0xE5FC0000,0xF3F80000,0xF3F80000,0xF4000001,0xE5FC0000,0xE5FC0000,0xE5FC0000,0xF3F80000,0xF3F80000,0xF4000001,0xF3F80000,0xF3F80000,0xF4000001,0xF4000001,0xE5FC0000,0xE5FC0000,0xE5FC0000,0xF3F80000,0xF3F80000,0xF4000001,0xF3F80000,0xF3F80000,0xF4000001,0xF4000001,0xF3F80000, -0xF3F80000,0xF4000001,0xF4000001,0xF4000001,0xC3FC0000,0x77FC0000,0x1EC0000,0xDFFC0000,0xEBFC0000,0xEFFC0000,0xF1FC0000,0xF3FC0000,0xD5FC0000,0xE5FC0000,0xEFFC0000,0xF4000001,0xEFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1A41F99,0xFF981A90,0xFF881695,0xFF88152C,0xFF80150D,0xFF740F8A,0xFF680D79,0xFF640A11,0xFF58076C,0xFB540649,0xFF7415A6,0xFF600EC8,0xFF5C0BF2,0xFF40071A,0xFF380339,0xFD2C0139,0xFF2C0946,0xFF200496,0xF71400FA,0xE91C0756,0x77FC1F99,0xFF58182C,0xFF4C152B,0xFF2C0E9D,0xFF1409B1,0xFB040649,0xFF080EA6,0xFED80706,0xFAB0002A,0xE8C40756,0xBDF81F99, -0xFE90152B,0xFA080649,0xE600080B,0xD2001F9B,0xFF901A71,0xF9A01DC1,0xFBA41E94,0xFF80147F,0xFF640E3A,0xFF4C0805,0xFF3C0548,0xFF3001B8,0xFF9019F6,0xFF7413B6,0xFF300766,0xFAB0002A,0xABFC1F99,0x1D00759,0xFFC40609,0xFFC004EC,0xFFBC0480,0xFFBC04CD,0xFFB0030E,0xFFA40249,0xFFA00169,0xFF940050,0xFB8C0029,0xBDFC0756,0xFFAC055B,0xFF9C0480,0xFF8802EE,0xFF700115, -0xFB580029,0xDFF80756,0xFF340480,0xFAB00029,0xE8000756,0xBDFC0756,0xFFAC055B,0xFF9C0480,0xFF8802EE,0xFF700115,0xFB580029,0xDFF80756,0xFF340480,0xFAB00029,0xE8000756,0xDFF80756,0xFF340480,0xFAB00029,0xE8000756,0xE8000756,0xFDCC067A,0xFFCC06D5,0xFFCC0711,0xFFC0058C,0xFFA40441,0xFF8402B0,0xFF7C0172,0xFF5000DD,0xFFC4067D,0xFFB0057E,0xFF700490,0xFAB00029, -0xD7FC0756,0x188152C,0x188152C,0x188152C,0x188152C,0xFF680D79,0xFF680D79,0xFF680D79,0xFF58076C,0xFF58076C,0xF5540621,0xFF5C0BF2,0xFF5C0BF2,0xFF5C0BF2,0xFF380339,0xFF380339,0xF9300131,0xFF200496,0xFF200496,0xF114007A,0xDD1C0482,0x49FC152B,0x49FC152B,0x49FC152B,0xFF1409B1,0xFF1409B1,0xF50C0621,0xFED80706,0xFED80706,0xF6B80006,0xDCD00482,0xA7F8152B, -0xA7F8152B,0xF4200621,0xDC0004A6,0xC200152B,0xFF741194,0xFB8413A0,0x188152C,0xFF680E19,0xFF580A15,0xFF4C06C1,0xFF3C0548,0xFF3001B8,0xFF741148,0xFF5C0D82,0xFF300742,0xF6B80006,0x8FFC152B,0x1BC0480,0x1BC0480,0x1BC0480,0x1BC0480,0xFFA40249,0xFFA40249,0xFFA40249,0xFF940050,0xFF940050,0xF58C0001,0x9BFC0480,0x9BFC0480,0x9BFC0480,0xFF700115,0xFF700115, -0xF75C0001,0xCFF80480,0xCFF80480,0xF4CC0001,0xDC000482,0x9BFC0480,0x9BFC0480,0x9BFC0480,0xFF700115,0xFF700115,0xF75C0001,0xCFF80480,0xCFF80480,0xF4CC0001,0xDC000482,0xCFF80480,0xCFF80480,0xF4CC0001,0xDC000482,0xDC000482,0xFBB403F5,0xF5B80424,0x1BC0480,0xFFA40384,0xFF9802B9,0xFF7C020A,0xFF7C0172,0xFF5000DD,0xFDB003F5,0xFF980371,0xC1FC0480,0xF4CC0001, -0xC1FC0480,0x1F40029,0xFFF40019,0xFFF00008,0xFFEC0000,0xF1FC0029,0xFFE40010,0xFFE40000,0xF9F80029,0xFFC80000,0xFA000029,0xF1FC0029,0xFFE40010,0xFFE40000,0xF9F80029,0xFFC80000,0xFA000029,0xF9F80029,0xFFC80000,0xFA000029,0xFA000029,0xF1FC0029,0xFFE40010,0xFFE40000,0xF9F80029,0xFFC80000,0xFA000029,0xF9F80029,0xFFC80000,0xFA000029,0xFA000029,0xF9F80029, -0xFFC80000,0xFA000029,0xFA000029,0xFA000029,0xFDF40020,0xB7FC0029,0xF3F40029,0xFFEC001D,0xFFE80014,0xFFDC000A,0xFFD00000,0xFFB80000,0xFFF00022,0xFFF00020,0xFFD80001,0xFA000029,0xF7FC0029,0x1540620,0x1540620,0x1540620,0x1540620,0x1540620,0x1540620,0x1540620,0x1540620,0x1540620,0x1540620,0xFF3000F4,0xFF3000F4,0xFF3000F4,0xFF3000F4,0xFF3000F4, -0xFF3000F4,0xE11C0000,0xE11C0000,0xE11C0000,0xC51C0001,0x1F80620,0x1F80620,0x1F80620,0x1F80620,0x1F80620,0x1F80620,0xF8B40001,0xF8B40001,0xF8B40001,0xC6E40001,0x81FC0620,0x81FC0620,0x81FC0620,0xC4380001,0xA8000622,0xFF4C04E4,0x1540620,0x1540620,0xFD48039D,0xFF3C0290,0xFF3401B1,0xFF3401B1,0xFF240050,0xFF3C0488,0xFF340355,0xFF0C0000,0xF8B40001, -0x5FFC0620,}; -static const uint32_t g_etc1_to_bc7_m6_table188[] = { -0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x65F80000, -0x65F80000,0x65F80000,0x65F80000,0x96000000,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x9400000,0x9400000,0x9400000,0x1C00000,0x3DF80000,0x19C0001,0x19C0001,0x19C0001,0x19C0001,0x19C0001,0x19C0001,0x19C0001,0x19C0001,0x19C0001,0x19C0001,0x6FFC0000,0x6FFC0000,0x6FFC0000,0x6FFC0000,0x6FFC0000, -0x6FFC0000,0xB9F80000,0xB9F80000,0xB9F80000,0xCE000000,0x6FFC0000,0x6FFC0000,0x6FFC0000,0x6FFC0000,0x6FFC0000,0x6FFC0000,0xB9F80000,0xB9F80000,0xB9F80000,0xCE000000,0xB9F80000,0xB9F80000,0xB9F80000,0xCE000000,0xCE000000,0x5B80000,0x19C0001,0x19C0001,0x3E00000,0x15FC0000,0x4DFC0000,0x4DFC0000,0x8BFC0000,0x3E00000,0x15FC0000,0xA5FC0000,0xB9F80000, -0xA5FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1B01CB4,0xFFA4190F,0xFF9C1633,0xFF98152B,0xFF98140A,0xFF8C0FFF,0xFF800E56,0xFF700A74,0xFF700863,0xFF640622,0xFF8C144D,0xFF740F22,0xFF680D11,0xFF5807AB,0xFF4C04BA,0xFF44013D,0xFF4C08DC,0xFF380519,0xFB28008D,0xEF2C0601,0x8DFC1CB0,0xFF701751,0xFF68152C,0xFF4C0EB2,0xFF380AC2,0xFF1C0620,0xFF200DF9,0xFEFC0829,0xFECC0005,0xEED80600,0xC7FC1CB0, -0xFEC8152B,0xFE2C0620,0xEC00064C,0xD8001CB0,0xFFA0185A,0xFFAC1AFA,0xFFAC1BE7,0xFF9413DF,0xFF780E8B,0xFF680924,0xFF5C06CB,0xFF400302,0xFF98183A,0xFF801312,0xFF3C08BF,0xFECC0005,0xB9FC1CB0,0x1DC0603,0xFFD40556,0xFFCC04C2,0xFFCC0482,0xFFD00433,0xFFC40336,0xFFBC02C5,0xFFAC0183,0xFFAC00DA,0xFF9C0001,0xCFFC0600,0xFFC004F6,0xFFB80480,0xFFA002C2,0xFF8801A9, -0xFF700000,0xE7FC0600,0xFF6C0480,0xFED80000,0xEE000600,0xCFFC0600,0xFFC004F6,0xFFB80480,0xFFA002C2,0xFF8801A9,0xFF700000,0xE7FC0600,0xFF6C0480,0xFED80000,0xEE000600,0xE7FC0600,0xFF6C0480,0xFED80000,0xEE000600,0xEE000600,0xFFD80565,0xF7DC05C1,0xF7DC05E2,0xFFC404EB,0xFFC003FE,0xFFA802E1,0xFFA00212,0xFF7C0172,0xFFD00575,0xFFC804AC,0xFF980489,0xFED80000, -0xE1FC0600,0x198152B,0x198152B,0x198152B,0x198152B,0xFF800E56,0xFF800E56,0xFF800E56,0xFF700863,0xFF700863,0xFF640622,0xFF680D11,0xFF680D11,0xFF680D11,0xFF4C04BA,0xFF4C04BA,0xFF44013D,0xFF380519,0xFF380519,0xF924007D,0xE72C0481,0x65FC152B,0x65FC152B,0x65FC152B,0xFF380AC2,0xFF380AC2,0xFF1C0620,0xFEFC0829,0xFEFC0829,0xFECC0005,0xE6E00480,0xB3FC152B, -0xB3FC152B,0xFE2C0620,0xE6000490,0xCC00152C,0xFF901212,0xFF8C13EB,0x198152B,0xFF800EF9,0xFF6C0B5E,0xFF5C082E,0xFF5C06CB,0xFF400302,0xFF8011D1,0xFF740E66,0xFF3C089B,0xFECC0005,0x9FFC152B,0x1CC0482,0x1CC0482,0x1CC0482,0x1CC0482,0xFFBC02C5,0xFFBC02C5,0xFFBC02C5,0xFFAC00DA,0xFFAC00DA,0xFF9C0001,0xB7FC0480,0xB7FC0480,0xB7FC0480,0xFF8801A9,0xFF8801A9, -0xFF700000,0xDDF40480,0xDDF40480,0xFED80000,0xE6000480,0xB7FC0480,0xB7FC0480,0xB7FC0480,0xFF8801A9,0xFF8801A9,0xFF700000,0xDDF40480,0xDDF40480,0xFED80000,0xE6000480,0xDDF40480,0xDDF40480,0xFED80000,0xE6000480,0xE6000480,0xFDC80422,0xFFCC0422,0x1CC0482,0xFDC003CA,0xFFAC0321,0xFFA0028D,0xFFA00212,0xFF7C0172,0xFFC40422,0xFFB003C5,0xD3FC0480,0xFED80000, -0xD3FC0480,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1640622,0x1640622,0x1640622,0x1640622,0x1640622,0x1640622,0x1640622,0x1640622,0x1640622,0x1640622,0xFF44013D,0xFF44013D,0xFF44013D,0xFF44013D,0xFF44013D, -0xFF44013D,0xEB2C0001,0xEB2C0001,0xEB2C0001,0xCF2C0001,0x19FC0620,0x19FC0620,0x19FC0620,0x19FC0620,0x19FC0620,0x19FC0620,0xFECC0005,0xFECC0005,0xFECC0005,0xCEF80000,0x8FF80620,0x8FF80620,0x8FF80620,0xCE480000,0xB2000620,0xF9600515,0x1640622,0x1640622,0xFF5803CA,0xFF5002DA,0xFF4C0202,0xFF4C0202,0xFF400091,0xFD5404B1,0xFF50039D,0xFF24000D,0xFECC0005, -0x71FC0620,}; -static const uint32_t g_etc1_to_bc7_m6_table189[] = { -0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x71F80000, -0x71F80000,0x71F80000,0x71F80000,0x9E000000,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x1540000,0x1540000,0x1540000,0x1D80000,0x4BFC0000,0x1AC0001,0x1AC0001,0x1AC0001,0x1AC0001,0x1AC0001,0x1AC0001,0x1AC0001,0x1AC0001,0x1AC0001,0x1AC0001,0x87FC0000,0x87FC0000,0x87FC0000,0x87FC0000,0x87FC0000, -0x87FC0000,0xC5F80000,0xC5F80000,0xC5F80000,0xD6000000,0x87FC0000,0x87FC0000,0x87FC0000,0x87FC0000,0x87FC0000,0x87FC0000,0xC5F80000,0xC5F80000,0xC5F80000,0xD6000000,0xC5F80000,0xC5F80000,0xC5F80000,0xD6000000,0xD6000000,0xDC80000,0x1AC0001,0x1AC0001,0x1F40000,0x3DFC0000,0x6BFC0000,0x6BFC0000,0x9DFC0000,0x1F40000,0x3DFC0000,0xB5FC0000,0xC5F80000, -0xB5FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1B81834,0xFFB01547,0xFFA812FF,0xFFA4122B,0xFFA4114A,0xFF980E07,0xFF8C0CB6,0xFF880994,0xFF7C07EB,0xFF740622,0xFF981115,0xFF800CE2,0xFF800B29,0xFF7006D3,0xFF64046A,0xFF500195,0xFF580704,0xFF4C03F3,0xFD380035,0xF13C042D,0x99FC1830,0xFF7C13E9,0xFF78122B,0xFF580CF2,0xFF4C09E9,0xFF340620,0xFF380B89,0xFF1406C9,0xFEE40025,0xF0EC042D,0xCDFC1830, -0xFEEC122B,0xFE600620,0xF0000435,0xDC001830,0xFFA814D6,0xFFAC171A,0xF5B817AC,0xFF9810D6,0xFF880C93,0xFF700806,0xFF68061E,0xFF5802DE,0xFFA014B2,0xFF901042,0xFF58074F,0xFEE40025,0xBFFC1830,0x1E4042B,0xFFDC03AB,0xFFD80346,0xFFD40322,0xFFD002E3,0xFFC8023E,0xFFC401E2,0xFFB8010B,0xFFB80092,0xFFAC0001,0xD9FC042B,0xFFCC0372,0xFFC00322,0xFFAC01E2,0xFFA00121, -0xFF880000,0xEDF8042B,0xFF840320,0xFF080000,0xF000042C,0xD9FC042B,0xFFCC0372,0xFFC00322,0xFFAC01E2,0xFFA00121,0xFF880000,0xEDF8042B,0xFF840320,0xFF080000,0xF000042C,0xEDF8042B,0xFF840320,0xFF080000,0xF000042C,0xF000042C,0xFFD803C5,0xF9E003F9,0xFBE40412,0xFFD40355,0xFFC802BD,0xFFB801F5,0xFFA80161,0xFF8C0105,0xFFDC03C5,0xFFC8034C,0xFFA80329,0xFF080000, -0xE7FC042B,0x1A4122B,0x1A4122B,0x1A4122B,0x1A4122B,0xFF8C0CB6,0xFF8C0CB6,0xFF8C0CB6,0xFF7C07EB,0xFF7C07EB,0xFF740622,0xFF800B29,0xFF800B29,0xFF800B29,0xFF64046A,0xFF64046A,0xFF500195,0xFF4C03F3,0xFF4C03F3,0xFB3C002A,0xEB3C0321,0x77FC122B,0x77FC122B,0x77FC122B,0xFF4C09E9,0xFF4C09E9,0xFF340620,0xFF1406C9,0xFF1406C9,0xFEE40025,0xEAF40320,0xBDF8122B, -0xBDF8122B,0xFE600620,0xEA040320,0xD000122C,0xFD9C0FA9,0xF9A01122,0x1A4122B,0xFF900D02,0xFF8009FE,0xFF700742,0xFF68061E,0xFF5802DE,0xFF940F4C,0xFF800CA1,0xFF580736,0xFEE40025,0xABFC122B,0x1D40322,0x1D40322,0x1D40322,0x1D40322,0xFFC401E2,0xFFC401E2,0xFFC401E2,0xFFB80092,0xFFB80092,0xFFAC0001,0xC3FC0320,0xC3FC0320,0xC3FC0320,0xFFA00121,0xFFA00121, -0xFF880000,0xE1FC0320,0xE1FC0320,0xFF080000,0xEA000320,0xC3FC0320,0xC3FC0320,0xC3FC0320,0xFFA00121,0xFFA00121,0xFF880000,0xE1FC0320,0xE1FC0320,0xFF080000,0xEA000320,0xE1FC0320,0xE1FC0320,0xFF080000,0xEA000320,0xEA000320,0xFBD002D4,0xFFCC02F2,0x1D40322,0xFFC4029A,0xFFC00225,0xFFB001BD,0xFFA80161,0xFF8C0105,0xFDCC02D4,0xFFC80288,0xDBFC0320,0xFF080000, -0xDBFC0320,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1740622,0x1740622,0x1740622,0x1740622,0x1740622,0x1740622,0x1740622,0x1740622,0x1740622,0x1740622,0xFF500195,0xFF500195,0xFF500195,0xFF500195,0xFF500195, -0xFF500195,0xF33C0001,0xF33C0001,0xF33C0001,0xD73C0001,0x31FC0620,0x31FC0620,0x31FC0620,0x31FC0620,0x31FC0620,0x31FC0620,0xFEE40025,0xFEE40025,0xFEE40025,0xD7080000,0x9BF80620,0x9BF80620,0x9BF80620,0xD6580000,0xBA000620,0xFF6C0521,0x1740622,0x1740622,0xFF6803F9,0xFF640322,0xFF5C024A,0xFF5C024A,0xFF4C00DA,0xF96804E2,0xFF5803E8,0xFF38002D,0xFEE40025, -0x7FFC0620,}; -static const uint32_t g_etc1_to_bc7_m6_table190[] = { -0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x7DF80000, -0x7DF80000,0x7DF80000,0x7DF80000,0xA6000000,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x1640000,0x1640000,0x1640000,0x1F00000,0x5BFC0000,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x1BC0001,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000, -0x9FFC0000,0xD1F80000,0xD1F80000,0xD1F80000,0xDE000000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0x9FFC0000,0xD1F80000,0xD1F80000,0xD1F80000,0xDE000000,0xD1F80000,0xD1F80000,0xD1F80000,0xDE000000,0xDE000000,0x1DC0000,0x1BC0001,0x1BC0001,0x1FFC0000,0x63FC0000,0x89FC0000,0x89FC0000,0xB1FC0000,0x1FFC0000,0x63FC0000,0xC3FC0000,0xD1F80000, -0xC3FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1C01434,0xFFBC11F7,0xFFB4102B,0xFFAC0F83,0xFFB00EDA,0xFFA40C47,0xFF980B46,0xFF9408C4,0xFF88078B,0xFF840622,0xFFA40E45,0xFF8C0AF2,0xFF8C0989,0xFF7C0613,0xFF700432,0xFF6801ED,0xFF64058C,0xFF58030B,0xFD4C000A,0xF54C02AD,0xA5FC1430,0xFF9410D1,0xFF880F80,0xFF700B62,0xFF640911,0xFF4C0620,0xFF4C09AB,0xFF2C05A9,0xFF080059,0xF50002AD,0xD3FC1430, -0xFF080F80,0xFE900620,0xF40C02AC,0xE0001430,0xFFB4117B,0xF9C01344,0xF9C013BC,0xFFAC0E6E,0xFF940AD2,0xFF7C073E,0xFF7805B3,0xFF6C02C3,0xFFB4111B,0xFF980DEE,0xFF70060B,0xFF080059,0xC7FC1430,0x1E802AB,0xFDE40263,0xFFE0021B,0xFFDC0202,0xFFDC01D3,0xFFD4016E,0xFFD00132,0xFFCC00B1,0xFFC00065,0xFFBC0001,0xDFFC02AB,0xFFD80236,0xFFCC0202,0xFFB80132,0xFFAC00B9, -0xFFA00000,0xEFFC02AB,0xFF9C0200,0xFF380000,0xF40002AC,0xDFFC02AB,0xFFD80236,0xFFCC0202,0xFFB80132,0xFFAC00B9,0xFFA00000,0xEFFC02AB,0xFF9C0200,0xFF380000,0xF40002AC,0xEFFC02AB,0xFF9C0200,0xFF380000,0xF40002AC,0xF40002AC,0xFFE4026D,0xFDE80281,0xFDE80296,0xFFDC0215,0xFFD401C2,0xFFBC0152,0xFFB800E8,0xFFA8009D,0xFFE00272,0xFFDC020E,0xFFB80204,0xFF380000, -0xEBFC02AB,0x1AC0F83,0x1AC0F83,0x1AC0F83,0x1AC0F83,0xFF980B46,0xFF980B46,0xFF980B46,0xFF88078B,0xFF88078B,0xFF840622,0xFF8C0989,0xFF8C0989,0xFF8C0989,0xFF700432,0xFF700432,0xFF6801ED,0xFF58030B,0xFF58030B,0xFD4C0006,0xEF4C0201,0x87FC0F80,0x87FC0F80,0x87FC0F80,0xFF640911,0xFF640911,0xFF4C0620,0xFF2C05A9,0xFF2C05A9,0xFF080059,0xEF080200,0xC5F80F80, -0xC5F80F80,0xFE900620,0xEE280200,0xD6000F80,0xFFA00D61,0xFFAC0E96,0x1AC0F83,0xFF980B42,0xFF8C08EE,0xFF7C0695,0xFF7805B3,0xFF6C02C3,0xFFA00D29,0xFF900AF9,0xFF7005FB,0xFF080059,0xB5FC0F80,0x1DC0202,0x1DC0202,0x1DC0202,0x1DC0202,0xFFD00132,0xFFD00132,0xFFD00132,0xFFC00065,0xFFC00065,0xFFBC0001,0xCFFC0200,0xCFFC0200,0xCFFC0200,0xFFAC00B9,0xFFAC00B9, -0xFFA00000,0xE7FC0200,0xE7FC0200,0xFF380000,0xEE000200,0xCFFC0200,0xCFFC0200,0xCFFC0200,0xFFAC00B9,0xFFAC00B9,0xFFA00000,0xE7FC0200,0xE7FC0200,0xFF380000,0xEE000200,0xE7FC0200,0xE7FC0200,0xFF380000,0xEE000200,0xEE000200,0xFFD801C4,0xF7DC01E1,0x1DC0202,0xFDD801A5,0xFFC80164,0xFFBC0121,0xFFB800E8,0xFFA8009D,0xFFD001D4,0xFBD401A5,0xE1FC0200,0xFF380000, -0xE1FC0200,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1840622,0x1840622,0x1840622,0x1840622,0x1840622,0x1840622,0x1840622,0x1840622,0x1840622,0x1840622,0xFF6801ED,0xFF6801ED,0xFF6801ED,0xFF6801ED,0xFF6801ED, -0xFF6801ED,0xFB4C0001,0xFB4C0001,0xFB4C0001,0xDF4C0001,0x49FC0620,0x49FC0620,0x49FC0620,0x49FC0620,0x49FC0620,0x49FC0620,0xFF080059,0xFF080059,0xFF080059,0xDF180000,0xA7F80620,0xA7F80620,0xA7F80620,0xDE680000,0xC2000620,0xF980054A,0x1840622,0x1840622,0xFF740442,0xFF780372,0xFF7002B1,0xFF7002B1,0xFF5C0131,0xFF7404EA,0xFD740422,0xFF500075,0xFF080059, -0x8FFC0620,}; -static const uint32_t g_etc1_to_bc7_m6_table191[] = { -0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0x89F80000, -0x89F80000,0x89F80000,0x89F80000,0xAE000000,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x3740000,0x3740000,0x3740000,0xDFC0000,0x69FC0000,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000, -0xB7FC0000,0xDDF40000,0xDDF40000,0xDDF40000,0xE6000000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xDDF40000,0xDDF40000,0xDDF40000,0xE6000000,0xDDF40000,0xDDF40000,0xDDF40000,0xE6000000,0xE6000000,0x1EC0000,0x1CC0001,0x1CC0001,0x57FC0000,0x8BFC0000,0xA7FC0000,0xA7FC0000,0xC5FC0000,0x57FC0000,0x8BFC0000,0xD3FC0000,0xDDF40000, -0xD3FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1C810B4,0xFFC40EF4,0xFFB80DA4,0xFFB80D2B,0xFFB00CBA,0xFFB00ABF,0xFFA40A06,0xFFA00814,0xFF9C0732,0xFF940622,0xFFB00BDD,0xFF980952,0xFF980831,0xFF88058B,0xFF84041A,0xFF74025D,0xFF7C045C,0xFF700263,0xFF600005,0xF75C0181,0xB1FC10B0,0xFFA00E21,0xFF980D2C,0xFF7C0A12,0xFF700851,0xFF640620,0xFF6407F3,0xFF3804E9,0xFF2000A9,0xF7180180,0xD9FC10B0, -0xFF280D2B,0xFEC00620,0xF6380180,0xE40010B0,0xFFBC0E86,0xFDC80FE4,0xFDC8104C,0xFFAC0C2E,0xFFA00963,0xFF90068E,0xFF900555,0xFF7402DE,0xFFB40E4B,0xFFB00BC7,0xFF7C0521,0xFF2000A9,0xCFFC10B0,0x1EC0183,0xFFE80153,0xFFE40132,0xFFE40122,0xFFE8010B,0xFFDC00CE,0xFFDC00AA,0xFFD80061,0xFFD4003A,0xFFCC0001,0xE9FC0180,0xFFE00141,0xFFD80122,0xFFCC00B1,0xFFC0006D, -0xFFB80000,0xF3FC0180,0xFFB40120,0xFF6C0000,0xF6000180,0xE9FC0180,0xFFE00141,0xFFD80122,0xFFCC00B1,0xFFC0006D,0xFFB80000,0xF3FC0180,0xFFB40120,0xFF6C0000,0xF6000180,0xF3FC0180,0xFFB40120,0xFF6C0000,0xF6000180,0xF6000180,0xFBEC0161,0xFFEC0161,0xFFEC0172,0xFFE00143,0xFFDC00F9,0xFFD000C3,0xFFD00082,0xFFB8005A,0xF9EC0161,0xFFDC012E,0xFFD00123,0xFF6C0000, -0xF1FC0180,0x1B80D2B,0x1B80D2B,0x1B80D2B,0x1B80D2B,0xFFA40A06,0xFFA40A06,0xFFA40A06,0xFF9C0732,0xFF9C0732,0xFF940622,0xFF980831,0xFF980831,0xFF980831,0xFF84041A,0xFF84041A,0xFF74025D,0xFF700263,0xFF700263,0xFF600005,0xF35C0121,0x95FC0D2B,0x95FC0D2B,0x95FC0D2B,0xFF700851,0xFF700851,0xFF640620,0xFF3804E9,0xFF3804E9,0xFF2000A9,0xF31C0120,0xCBFC0D2B, -0xCBFC0D2B,0xFEC00620,0xF2480120,0xDC000D2C,0xFFAC0B7E,0xF5B80CA3,0x1B80D2B,0xFFA409EB,0xFF9407DE,0xFF900615,0xFF900555,0xFF7402DE,0xFDB00B5E,0xFF9809A5,0xFF7C0511,0xFF2000A9,0xBFF80D2B,0x1E40122,0x1E40122,0x1E40122,0x1E40122,0xFFDC00AA,0xFFDC00AA,0xFFDC00AA,0xFFD4003A,0xFFD4003A,0xFFCC0001,0xDBFC0120,0xDBFC0120,0xDBFC0120,0xFFC0006D,0xFFC0006D, -0xFFB80000,0xEDFC0120,0xEDFC0120,0xFF6C0000,0xF2000120,0xDBFC0120,0xDBFC0120,0xDBFC0120,0xFFC0006D,0xFFC0006D,0xFFB80000,0xEDFC0120,0xEDFC0120,0xFF6C0000,0xF2000120,0xEDFC0120,0xEDFC0120,0xFF6C0000,0xF2000120,0xF2000120,0xF7E40109,0xFBE40109,0x1E40122,0xFFDC00E1,0xFFD400C1,0xFFD000AA,0xFFD00082,0xFFB8005A,0xF5E40109,0xFFDC00DD,0xE9FC0120,0xFF6C0000, -0xE9FC0120,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1940622,0x1940622,0x1940622,0x1940622,0x1940622,0x1940622,0x1940622,0x1940622,0x1940622,0x1940622,0xFF74025D,0xFF74025D,0xFF74025D,0xFF74025D,0xFF74025D, -0xFF74025D,0xFF600005,0xFF600005,0xFF600005,0xE75C0001,0x63FC0620,0x63FC0620,0x63FC0620,0x63FC0620,0x63FC0620,0x63FC0620,0xFF2000A9,0xFF2000A9,0xFF2000A9,0xE7280000,0xB3F80620,0xB3F80620,0xB3F80620,0xE6780000,0xCA000620,0xFF8C055A,0x1940622,0x1940622,0xFF840479,0xFF8003C5,0xFF800305,0xFF800305,0xFF700195,0xFD8C0515,0xFF80045D,0xFF6000CD,0xFF2000A9, -0x9FF80620,}; -static const uint32_t g_etc1_to_bc7_m6_table192[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x400001,0x400001,0x400001,0x400001,0x600000,0x600000,0x600000,0xC40000,0xC40000,0x20000000,0x600000,0x600000,0x600000,0xC40000,0xC40000,0x20000000,0xC40000,0xC40000,0x20000000,0x20000000,0x600000,0x600000,0x600000,0xC40000,0xC40000,0x20000000,0xC40000,0xC40000,0x20000000,0x20000000,0xC40000, -0xC40000,0x20000000,0x20000000,0x20000000,0x4C0000,0xA440000,0x400001,0x580000,0x26C0000,0x8C0000,0xA00000,0xF00000,0x4500000,0x600000,0x8C0000,0x20000000,0x8C0000,0xD40000,0x13C0000,0x21FC0000,0x68000001,0x13C0000,0x21FC0000,0x68000001,0x21FC0000,0x68000001,0x68000001,0x13C0000,0x21FC0000,0x68000001,0x21FC0000,0x68000001, -0x68000001,0x21FC0000,0x68000001,0x68000001,0x68000001,0x13C0000,0x21FC0000,0x68000001,0x21FC0000,0x68000001,0x68000001,0x21FC0000,0x68000001,0x68000001,0x68000001,0x21FC0000,0x68000001,0x68000001,0x68000001,0x68000001,0x3080000,0x8E00000,0x8E00000,0x1640000,0x5FC0000,0x45F40000,0x68000001,0x68000001,0x1200000,0x18C0000,0x61D40000,0x68000001, -0x1C00000,0x441D49,0xFE1C039A,0x981402D9,0x681402DA,0xD0000A69,0x92000112,0x68000002,0x66000A69,0x560003DA,0x44000A69,0x8A0016FD,0x7A000882,0x62000432,0x58000E66,0x520006FB,0x40000C4A,0x440016FD,0x44000E46,0x38001145,0x2E0016FE,0x681D47,0x68000DD2,0x580007C3,0x52001187,0x4C000994,0x40000E03,0x400018C6,0x3E001027,0x360012AA,0x2E0017DF,0xD01D47, -0x380014C2,0x3200161D,0x26001A42,0x22001D47,0xFE140B92,0xF4381671,0xF8401611,0xC80006E0,0x8A000732,0x660006FD,0x5A0004DA,0x4A000929,0xF2000CF6,0x9E00098E,0x52000B32,0x360012AA,0x941D47,0x5C16FD,0xFE240289,0x94200222,0x68200222,0xD0000A69,0x92000112,0x68000002,0x66000A69,0x560003DA,0x44000A69,0x8816FD,0x7A000882,0x62000432,0x58000E66,0x520006FB, -0x40000C4A,0x11416FD,0x44000E46,0x38001145,0x2E0016FE,0x8816FD,0x7A000882,0x62000432,0x58000E66,0x520006FB,0x40000C4A,0x11416FD,0x44000E46,0x38001145,0x2E0016FE,0x11416FD,0x44000E46,0x38001145,0x2E0016FE,0x2E0016FE,0xFE140B2E,0xFC481271,0xFE4C1121,0xC80006E0,0x8A000732,0x660006FD,0x5A0004DA,0x4A000929,0xFA000BFE,0x9E00092A,0x52000B19,0x38001145, -0xC016FD,0x1402D9,0x1402D9,0x1402D9,0x1402D9,0x64000000,0x64000000,0x64000000,0x30000000,0x30000000,0x20000000,0x30000221,0x30000221,0x30000221,0x280000C2,0x280000C2,0x1E000068,0x18000221,0x18000221,0x16000145,0x10000221,0x2002D6,0x2002D6,0x2002D6,0x22000143,0x22000143,0x180000C0,0x1200025D,0x1200025D,0x14000185,0xE00023E,0x3C02D6, -0x3C02D6,0x100001F2,0xE000289,0xA0002D6,0xC40000A9,0xFE0C00C1,0x1402D9,0x640000E8,0x3C0000D0,0x2E0000D0,0x2E0000A9,0x220000F5,0x64000178,0x44000138,0x1E000225,0x14000185,0x2C02D6,0x200221,0x200221,0x200221,0x200221,0x64000000,0x64000000,0x64000000,0x30000000,0x30000000,0x20000000,0x300221,0x300221,0x300221,0x280000C2,0x280000C2, -0x1E000068,0x5C0221,0x5C0221,0x16000145,0x10000221,0x300221,0x300221,0x300221,0x280000C2,0x280000C2,0x1E000068,0x5C0221,0x5C0221,0x16000145,0x10000221,0x5C0221,0x5C0221,0x16000145,0x10000221,0x10000221,0xC40000A9,0xFE0C009D,0x200221,0x640000E8,0x3C0000D0,0x2E0000D0,0x2E0000A9,0x220000F5,0x74000151,0x4A000121,0x400221,0x16000145, -0x400221,0x8C0A69,0xFC440001,0x8C440001,0x68400002,0xCC0A69,0x92000112,0x68000002,0x1A00A69,0x560003DA,0x44000A69,0xCC0A69,0x92000112,0x68000002,0x1A00A69,0x560003DA,0x44000A69,0x1A00A69,0x560003DA,0x44000A69,0x44000A69,0xCC0A69,0x92000112,0x68000002,0x1A00A69,0x560003DA,0x44000A69,0x1A00A69,0x560003DA,0x44000A69,0x44000A69,0x1A00A69, -0x560003DA,0x44000A69,0x44000A69,0x44000A69,0xFE3406B2,0x940A69,0xF8800745,0xDE000385,0x96000410,0x720003D4,0x5E00028A,0x54000502,0xFE180659,0xBA000454,0x64000232,0x44000A69,0x1240A69,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table193[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x500001,0x500001,0x500001,0x500001,0x780000,0x780000,0x780000,0xF40000,0xF40000,0x28000000,0x780000,0x780000,0x780000,0xF40000,0xF40000,0x28000000,0xF40000,0xF40000,0x28000000,0x28000000,0x780000,0x780000,0x780000,0xF40000,0xF40000,0x28000000,0xF40000,0xF40000,0x28000000,0x28000000,0xF40000, -0xF40000,0x28000000,0x28000000,0x28000000,0x600000,0x580000,0x500001,0x700000,0x880000,0xAC0000,0xC80000,0x12C0000,0x4640000,0x780000,0xAC0000,0x28000000,0xAC0000,0xE40000,0x3500000,0x2DFC0000,0x70000001,0x3500000,0x2DFC0000,0x70000001,0x2DFC0000,0x70000001,0x70000001,0x3500000,0x2DFC0000,0x70000001,0x2DFC0000,0x70000001, -0x70000001,0x2DFC0000,0x70000001,0x70000001,0x70000001,0x3500000,0x2DFC0000,0x70000001,0x2DFC0000,0x70000001,0x70000001,0x2DFC0000,0x70000001,0x70000001,0x70000001,0x2DFC0000,0x70000001,0x70000001,0x70000001,0x70000001,0x31C0000,0xF40000,0xF40000,0x1800000,0x13F80000,0x4FF40000,0x70000001,0x70000001,0x3340000,0x1AC0000,0x69E40000,0x70000001, -0x1E40000,0x4C21E1,0xFE2405DD,0xA21C0461,0x701C0462,0xEA000A69,0xA20000A0,0x7200000A,0x72000A69,0x60000361,0x4C000A69,0x9C001A0D,0x860009AA,0x68000506,0x68000F41,0x58000723,0x46000CCE,0x4C001A0D,0x4A001006,0x3E0012F1,0x32001A0E,0x7421DF,0x7400101A,0x62000932,0x62001345,0x52000A54,0x46000EDF,0x46001C46,0x44001257,0x3C0014AA,0x30001B22,0xE821DF, -0x3E0017F2,0x320018ED,0x2C001E16,0x260021DF,0xFE140E42,0xF8401A29,0xFC481A39,0xDE000716,0x9A000789,0x7000072D,0x5E0004BB,0x560009B3,0xFE000ECD,0xB6000A4B,0x56000C76,0x3C0014AA,0xA421DF,0x681A0D,0xFE300425,0x9E28034A,0x7028034A,0xEA000A69,0xA20000A0,0x72040009,0x72000A69,0x60000361,0x4C000A69,0x2981A0D,0x860009AA,0x68000506,0x68000F41,0x58000723, -0x46000CCE,0x1381A0D,0x4A001006,0x3E0012F1,0x32001A0E,0x2981A0D,0x860009AA,0x68000506,0x68000F41,0x58000723,0x46000CCE,0x1381A0D,0x4A001006,0x3E0012F1,0x32001A0E,0x1381A0D,0x4A001006,0x3E0012F1,0x32001A0E,0x32001A0E,0xFE200D73,0xFE4C1521,0xF860142A,0xDE000716,0x9A000789,0x7000072D,0x5E0004BB,0x560009B3,0xFE000DCD,0xBC0009CE,0x56000C5D,0x3E0012F1, -0xDC1A0D,0x1C0461,0x1C0461,0x1C0461,0x1C0461,0x7C000000,0x7C000000,0x7C000000,0x3C000000,0x3C000000,0x28000000,0x3C000349,0x3C000349,0x3C000349,0x2E000132,0x2E000132,0x2200009D,0x1E000349,0x1E000349,0x1C0001F9,0x14000349,0x280461,0x280461,0x280461,0x280001FB,0x280001FB,0x22000116,0x180003A1,0x180003A1,0x1A000259,0x12000371,0x500461, -0x500461,0x16000306,0x100003DA,0xC000462,0xF6000105,0xFE0C01B1,0x1C0461,0x7A000161,0x50000140,0x40000145,0x38000112,0x2A000172,0x82000248,0x500001DD,0x2400034D,0x1A000259,0x380461,0x280349,0x280349,0x280349,0x280349,0x7C000000,0x7C000000,0x7C000000,0x3C000000,0x3C000000,0x28000000,0x3C0349,0x3C0349,0x3C0349,0x2E000132,0x2E000132, -0x2200009D,0x740349,0x740349,0x1C0001F9,0x14000349,0x3C0349,0x3C0349,0x3C0349,0x2E000132,0x2E000132,0x2200009D,0x740349,0x740349,0x1C0001F9,0x14000349,0x740349,0x740349,0x1C0001F9,0x14000349,0x14000349,0xF6000105,0xF4180154,0x280349,0x7A000161,0x50000140,0x40000145,0x38000112,0x2A000172,0x82000208,0x5A0001BD,0x540349,0x1C0001F9, -0x540349,0x9C0A69,0xFE540005,0x94540001,0x70500002,0xE40A69,0xA20000A0,0x700C0001,0x1D00A69,0x60000361,0x4C000A69,0xE40A69,0xA20000A0,0x700C0001,0x1D00A69,0x60000361,0x4C000A69,0x1D00A69,0x60000361,0x4C000A69,0x4C000A69,0xE40A69,0xA20000A0,0x700C0001,0x1D00A69,0x60000361,0x4C000A69,0x1D00A69,0x60000361,0x4C000A69,0x4C000A69,0x1D00A69, -0x60000361,0x4C000A69,0x4C000A69,0x4C000A69,0xFE5006CD,0xA40A69,0xFE8C0749,0xF4000304,0xA0000384,0x7C000335,0x66000209,0x5C000492,0xFE2C0694,0xCC0003E8,0x6E0001A8,0x4C000A69,0x1480A69,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table194[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x600001,0x600001,0x600001,0x600001,0x900000,0x900000,0x900000,0x1240000,0x1240000,0x30000000,0x900000,0x900000,0x900000,0x1240000,0x1240000,0x30000000,0x1240000,0x1240000,0x30000000,0x30000000,0x900000,0x900000,0x900000,0x1240000,0x1240000,0x30000000,0x1240000,0x1240000,0x30000000,0x30000000,0x1240000, -0x1240000,0x30000000,0x30000000,0x30000000,0x2700000,0x680000,0x600001,0x840000,0xA40000,0xD00000,0xEC0000,0x1680000,0x4780000,0x900000,0xD00000,0x30000000,0xD00000,0xF40000,0x3680000,0x39FC0000,0x78000001,0x3680000,0x39FC0000,0x78000001,0x39FC0000,0x78000001,0x78000001,0x3680000,0x39FC0000,0x78000001,0x39FC0000,0x78000001, -0x78000001,0x39FC0000,0x78000001,0x78000001,0x78000001,0x3680000,0x39FC0000,0x78000001,0x39FC0000,0x78000001,0x78000001,0x39FC0000,0x78000001,0x78000001,0x78000001,0x39FC0000,0x78000001,0x78000001,0x78000001,0x78000001,0x3300000,0x1040000,0x1040000,0x1980000,0x1FFC0000,0x59F40000,0x78000001,0x78000001,0x14C0000,0x1C80000,0x71F40000,0x78000001, -0x5FC0000,0x5426F9,0xFE3008D5,0xAC200642,0x78200642,0xFE000A6D,0xAE000050,0x7A04003A,0x7E000A69,0x6C0002E9,0x54000A69,0xAC001D72,0x92000B12,0x6E000632,0x6E001055,0x62000755,0x52000D4E,0x54001D72,0x5000120E,0x440014CD,0x38001D72,0x8026F7,0x7A0012C2,0x68000B0A,0x62001515,0x5E000B3C,0x4C000FDB,0x4C00202E,0x4A0014D7,0x440016DE,0x36001EC2,0x10026F7, -0x44001B8A,0x38001C09,0x32002262,0x2A0026F7,0xFE201173,0xFC481E61,0xFE4C1EE5,0xF4000768,0xA20007D3,0x7C000746,0x660004D2,0x60000A46,0xFE00117D,0xC6000B19,0x5E000E2A,0x440016DE,0xB426F7,0x701D75,0xFE3C0631,0xA83004B2,0x783004B2,0xFE000A6D,0xAE000050,0x7C080032,0x7E000A69,0x6C0002E9,0x54000A69,0xA81D72,0x92000B12,0x6E000632,0x6E001055,0x62000755, -0x52000D4E,0x1581D72,0x5000120E,0x440014CD,0x38001D72,0xA81D72,0x92000B12,0x6E000632,0x6E001055,0x62000755,0x52000D4E,0x1581D72,0x5000120E,0x440014CD,0x38001D72,0x1581D72,0x5000120E,0x440014CD,0x38001D72,0x38001D72,0xFE340FF2,0xF8601819,0xFC68174A,0xF4000768,0xA20007D3,0x7C000746,0x660004D2,0x60000A46,0xFE08103E,0xC6000A89,0x5E000E06,0x440014CD, -0xF01D72,0x200641,0x200641,0x200641,0x200641,0x94000000,0x94000000,0x94000000,0x48000000,0x48000000,0x30000000,0x480004B1,0x480004B1,0x480004B1,0x3A0001BA,0x3A0001BA,0x2E0000E5,0x220004B1,0x220004B1,0x200002E4,0x180004B1,0x300641,0x300641,0x300641,0x2E0002E3,0x2E0002E3,0x28000192,0x2200052A,0x2200052A,0x1C00035D,0x180004F1,0x5C0641, -0x5C0641,0x1C000462,0x1400058D,0x10000642,0xF60001A5,0xF4180304,0x200641,0x900001F4,0x5E0001CD,0x480001CD,0x44000184,0x2E00022D,0x9600034E,0x720002BB,0x2C0004BA,0x1C00035D,0x400641,0x3004B1,0x3004B1,0x3004B1,0x3004B1,0x94000000,0x94000000,0x94000000,0x48000000,0x48000000,0x30000000,0x4804B1,0x4804B1,0x4804B1,0x3A0001BA,0x3A0001BA, -0x2E0000E5,0x8C04B1,0x8C04B1,0x200002E4,0x180004B1,0x4804B1,0x4804B1,0x4804B1,0x3A0001BA,0x3A0001BA,0x2E0000E5,0x8C04B1,0x8C04B1,0x200002E4,0x180004B1,0x8C04B1,0x8C04B1,0x200002E4,0x180004B1,0x180004B1,0xF60001A5,0xF8200244,0x3004B1,0x900001F4,0x5E0001CD,0x480001CD,0x44000184,0x2E00022D,0xA40002F2,0x7200028A,0x6404B1,0x200002E4, -0x6404B1,0xAC0A69,0xFE680012,0x9C640001,0x78600002,0xFC0A69,0xAE000050,0x781C0001,0x3F80A69,0x6C0002E9,0x54000A69,0xFC0A69,0xAE000050,0x781C0001,0x3F80A69,0x6C0002E9,0x54000A69,0x3F80A69,0x6C0002E9,0x54000A69,0x54000A69,0xFC0A69,0xAE000050,0x781C0001,0x3F80A69,0x6C0002E9,0x54000A69,0x3F80A69,0x6C0002E9,0x54000A69,0x54000A69,0x3F80A69, -0x6C0002E9,0x54000A69,0x54000A69,0x54000A69,0xF6680708,0xB80A69,0xF8A00782,0xFC0002AD,0xB600031A,0x860002D5,0x70000195,0x6400042A,0xF44806CD,0xDE000361,0x76000128,0x54000A69,0x1680A69,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table195[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x700001,0x700001,0x700001,0x700001,0xA80000,0xA80000,0xA80000,0x1580000,0x1580000,0x38000000,0xA80000,0xA80000,0xA80000,0x1580000,0x1580000,0x38000000,0x1580000,0x1580000,0x38000000,0x38000000,0xA80000,0xA80000,0xA80000,0x1580000,0x1580000,0x38000000,0x1580000,0x1580000,0x38000000,0x38000000,0x1580000, -0x1580000,0x38000000,0x38000000,0x38000000,0x840000,0x4780000,0x700001,0x2980000,0xC00000,0xF00000,0x1140000,0x1A40000,0x48C0000,0xA80000,0xF00000,0x38000000,0xF00000,0x1040000,0x3800000,0x45FC0000,0x80000001,0x3800000,0x45FC0000,0x80000001,0x45FC0000,0x80000001,0x80000001,0x3800000,0x45FC0000,0x80000001,0x45FC0000,0x80000001, -0x80000001,0x45FC0000,0x80000001,0x80000001,0x80000001,0x3800000,0x45FC0000,0x80000001,0x45FC0000,0x80000001,0x80000001,0x45FC0000,0x80000001,0x80000001,0x80000001,0x45FC0000,0x80000001,0x80000001,0x80000001,0x80000001,0x3440000,0x3140000,0x3140000,0x1B40000,0x2DFC0000,0x63F40000,0x80000001,0x80000001,0x1600000,0x1E80000,0x7BC80000,0x80000001, -0x15FC0000,0x5C2C91,0xFE300C45,0xB6280879,0x8024087A,0xFE0C0AED,0xBA000020,0x8208009A,0x8A000A69,0x72000271,0x5C000A69,0xBC00212D,0xA2000C99,0x7A000792,0x7A001185,0x680007B9,0x58000DDA,0x5C00212D,0x5600145E,0x4A0016D9,0x3E00212E,0x8C2C8F,0x860015BA,0x6E000D42,0x6E001735,0x62000C25,0x520010F7,0x5200247E,0x500017A7,0x4A00194A,0x3A0022B7,0x1182C8F, -0x44001F8A,0x3E001F75,0x38002726,0x2E002C8F,0xFE28156D,0xFE4C2325,0xFE4C2475,0xFE00082D,0xB6000833,0x880007C9,0x740004FA,0x60000AF6,0xFE08151E,0xDA000C0E,0x6600103A,0x4A00194A,0xC82C8F,0x7C212D,0xFE4408A6,0xB238065A,0x8038065A,0xFE0C0AC9,0xBA000020,0x840C007E,0x8A000A69,0x72000271,0x5C000A69,0xB8212D,0xA2000C99,0x7A000792,0x7A001185,0x680007B9, -0x58000DDA,0x174212D,0x5600145E,0x4A0016D9,0x3E00212E,0xB8212D,0xA2000C99,0x7A000792,0x7A001185,0x680007B9,0x58000DDA,0x174212D,0x5600145E,0x4A0016D9,0x3E00212E,0x174212D,0x5600145E,0x4A0016D9,0x3E00212E,0x3E00212E,0xFE3412F2,0xFE6C1B49,0xFE6C1AEE,0xFE00082D,0xB6000833,0x880007C9,0x740004FA,0x60000AF6,0xFE18134D,0xDA000B4A,0x66001016,0x4A0016D9, -0x108212D,0x240879,0x240879,0x240879,0x240879,0xAC000000,0xAC000000,0xAC000000,0x54000000,0x54000000,0x38000000,0x54000659,0x54000659,0x54000659,0x40000262,0x40000262,0x34000131,0x28000659,0x28000659,0x260003E8,0x1C000659,0x380876,0x380876,0x380876,0x3A0003F3,0x3A0003F3,0x2E000226,0x220006FA,0x220006FA,0x24000491,0x1A0006AE,0x700876, -0x700876,0x1C0005E2,0x16000776,0x12000876,0xFE0402B1,0xF61C0498,0x240879,0xAC0002B9,0x72000275,0x5600028A,0x4C000209,0x380002F2,0xB400047A,0x720003AB,0x34000662,0x24000491,0x500876,0x380659,0x380659,0x380659,0x380659,0xAC000000,0xAC000000,0xAC000000,0x54000000,0x54000000,0x38000000,0x540659,0x540659,0x540659,0x40000262,0x40000262, -0x34000131,0xA40659,0xA40659,0x260003E8,0x1C000659,0x540659,0x540659,0x540659,0x40000262,0x40000262,0x34000131,0xA40659,0xA40659,0x260003E8,0x1C000659,0xA40659,0xA40659,0x260003E8,0x1C000659,0x1C000659,0xFA0802AD,0xFC280374,0x380659,0xAC0002B9,0x72000275,0x5600028A,0x4C000209,0x380002F2,0xC20003FA,0x7C000371,0x740659,0x260003E8, -0x740659,0xBC0A69,0xFE78002D,0xA4740001,0x80700002,0x1140A69,0xBA000020,0x802C0001,0xFF80A69,0x72000271,0x5C000A69,0x1140A69,0xBA000020,0x802C0001,0xFF80A69,0x72000271,0x5C000A69,0xFF80A69,0x72000271,0x5C000A69,0x5C000A69,0x1140A69,0xBA000020,0x802C0001,0xFF80A69,0x72000271,0x5C000A69,0xFF80A69,0x72000271,0x5C000A69,0x5C000A69,0xFF80A69, -0x72000271,0x5C000A69,0x5C000A69,0x5C000A69,0xFE780708,0xC80A69,0xFEAC078A,0xFE1402D4,0xC000029A,0x90000254,0x7A000131,0x720003BA,0xFC5806CD,0xF40002F2,0x820000DA,0x5C000A69,0x18C0A69,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table196[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x840000,0x840000,0x840000,0x840000,0xC40000,0xC40000,0xC40000,0x18C0000,0x18C0000,0x40000001,0xC40000,0xC40000,0xC40000,0x18C0000,0x18C0000,0x40000001,0x18C0000,0x18C0000,0x40000001,0x40000001,0xC40000,0xC40000,0xC40000,0x18C0000,0x18C0000,0x40000001,0x18C0000,0x18C0000,0x40000001,0x40000001,0x18C0000, -0x18C0000,0x40000001,0x40000001,0x40000001,0x2980000,0x8C0000,0x840000,0xB40000,0xDC0000,0x1180000,0x1400000,0x1E80000,0xA40000,0xC40000,0x1180000,0x40000001,0x1180000,0x1140001,0x19C0000,0x53FC0000,0x8A000000,0x19C0000,0x53FC0000,0x8A000000,0x53FC0000,0x8A000000,0x8A000000,0x19C0000,0x53FC0000,0x8A000000,0x53FC0000,0x8A000000, -0x8A000000,0x53FC0000,0x8A000000,0x8A000000,0x8A000000,0x19C0000,0x53FC0000,0x8A000000,0x53FC0000,0x8A000000,0x8A000000,0x53FC0000,0x8A000000,0x8A000000,0x8A000000,0x53FC0000,0x8A000000,0x8A000000,0x8A000000,0x8A000000,0x15C0000,0x1280000,0x1280000,0x1D40000,0x3DF80000,0x6FF00000,0x8A000000,0x8A000000,0x3780000,0x9FC0000,0x83F80000,0x8A000000, -0x25FC0000,0x683375,0xFE3C10E1,0xC22C0B59,0x8A2C0B58,0xFE0C0C55,0xCC000002,0x8E080130,0x98000A69,0x7E0001F9,0x66000A69,0xCE0025C5,0xA8000EBB,0x86000984,0x86001301,0x74000831,0x62000E81,0x640025C5,0x5C001758,0x50001969,0x440025C6,0x983373,0x92001984,0x7A001024,0x7A0019E1,0x6E000D81,0x5E00125B,0x5E0029BE,0x5C001B19,0x50001C42,0x4000279F,0x1303373, -0x5000247C,0x440023B5,0x38002CEA,0x32003373,0xFE341A28,0xFE4C299D,0xF8602B08,0xFE0009DD,0xC00008C1,0x92000865,0x7C000545,0x6C000BD8,0xFE081A0A,0xDE000D62,0x7400128E,0x50001C42,0xD83373,0x8825C5,0xFE500BF4,0xBE400884,0x8A400884,0xFE180BD5,0xCC000002,0x8E140104,0x98000A69,0x7E0001F9,0x66000A69,0xC825C5,0xA8000EBB,0x86000984,0x86001301,0x74000831, -0x62000E81,0x19825C5,0x5C001758,0x50001969,0x440025C6,0xC825C5,0xA8000EBB,0x86000984,0x86001301,0x74000831,0x62000E81,0x19825C5,0x5C001758,0x50001969,0x440025C6,0x19825C5,0x5C001758,0x50001969,0x440025C6,0x440025C6,0xFE5016DA,0xFE6C1FA5,0xF8801F85,0xFE0009DD,0xC00008C1,0x92000865,0x7C000545,0x6C000BD8,0xFE24174D,0xE8000C89,0x7400125D,0x50001969, -0x12025C5,0x2C0B58,0x2C0B58,0x2C0B58,0x2C0B58,0xC8000000,0xC8000000,0xC8000000,0x60000001,0x60000001,0x40000001,0x64000882,0x64000882,0x64000882,0x4C000335,0x4C000335,0x3A00019A,0x30000882,0x30000882,0x2C00053D,0x20000882,0x400B58,0x400B58,0x400B58,0x40000556,0x40000556,0x340002EB,0x2800095D,0x2800095D,0x2A00061E,0x1E0008EE,0x800B58, -0x800B58,0x20000809,0x1C000A0B,0x14000B5B,0xFC0C044E,0xF82006C9,0x2C0B58,0xC200039D,0x7C000352,0x6200034D,0x5A0002D0,0x400003E8,0xC200060D,0x900004DE,0x3E000892,0x2A00061E,0x5C0B58,0x400884,0x400884,0x400884,0x400884,0xC8000000,0xC8000000,0xC8000000,0x60000001,0x60000001,0x40000001,0x600882,0x600882,0x600882,0x4C000335,0x4C000335, -0x3A00019A,0xC40882,0xC40882,0x2C00053D,0x20000882,0x600882,0x600882,0x600882,0x4C000335,0x4C000335,0x3A00019A,0xC40882,0xC40882,0x2C00053D,0x20000882,0xC40882,0xC40882,0x2C00053D,0x20000882,0x20000882,0xFE100422,0xFE2C052D,0x400884,0xC200039D,0x7C000352,0x6200034D,0x5A0002D0,0x400003E8,0xD6000568,0x9000048D,0x8C0882,0x2C00053D, -0x8C0882,0xCC0A69,0xFE8C0050,0xAE840000,0x8A840000,0x1300A69,0xCC000002,0x8A3C0000,0x1DF40A69,0x7E0001F9,0x66000A69,0x1300A69,0xCC000002,0x8A3C0000,0x1DF40A69,0x7E0001F9,0x66000A69,0x1DF40A69,0x7E0001F9,0x66000A69,0x66000A69,0x1300A69,0xCC000002,0x8A3C0000,0x1DF40A69,0x7E0001F9,0x66000A69,0x1DF40A69,0x7E0001F9,0x66000A69,0x66000A69,0x1DF40A69, -0x7E0001F9,0x66000A69,0x66000A69,0x66000A69,0xFC900745,0xDC0A69,0xFAC407C1,0xFE300322,0xD6000232,0x9E0001D4,0x840000CD,0x7A000340,0xFE700708,0xFC0402AD,0x8E00007D,0x66000A69,0x1B00A69,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table197[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x180000,0x180000,0x180000,0x180000,0x180000, -0x180000,0x2C0000,0x2C0000,0x2C0000,0x6000001,0x180000,0x180000,0x180000,0x180000,0x180000,0x180000,0x2C0000,0x2C0000,0x2C0000,0x6000001,0x2C0000,0x2C0000,0x2C0000,0x6000001,0x6000001,0x100000,0x100000,0x100000,0x4100000,0x140000,0x140000,0x140000,0x180000,0x4100000,0x140000,0x200000,0x2C0000, -0x200000,0x940000,0x940000,0x940000,0x940000,0xDC0000,0xDC0000,0xDC0000,0x1BC0000,0x1BC0000,0x48000001,0xDC0000,0xDC0000,0xDC0000,0x1BC0000,0x1BC0000,0x48000001,0x1BC0000,0x1BC0000,0x48000001,0x48000001,0xDC0000,0xDC0000,0xDC0000,0x1BC0000,0x1BC0000,0x48000001,0x1BC0000,0x1BC0000,0x48000001,0x48000001,0x1BC0000, -0x1BC0000,0x48000001,0x48000001,0x48000001,0xAC0000,0x69C0000,0x940000,0xC80000,0xF80000,0x1380000,0x1680000,0x9F80000,0xB80000,0xDC0000,0x1380000,0x48000001,0x1380000,0x1240001,0x1B40000,0x5FF80000,0x92000000,0x1B40000,0x5FF80000,0x92000000,0x5FF80000,0x92000000,0x92000000,0x1B40000,0x5FF80000,0x92000000,0x5FF80000,0x92000000, -0x92000000,0x5FF80000,0x92000000,0x92000000,0x92000000,0x1B40000,0x5FF80000,0x92000000,0x5FF80000,0x92000000,0x92000000,0x5FF80000,0x92000000,0x92000000,0x92000000,0x5FF80000,0x92000000,0x92000000,0x92000000,0x92000000,0x1700000,0x5380000,0x5380000,0x3EC0000,0x49FC0000,0x79F00000,0x92000000,0x92000000,0x1900000,0x19FC0000,0x8DCC0000,0x92000000, -0x35FC0000,0x7436D9,0xFE4413A8,0xCC380CE4,0x92380CE4,0xFE180DB5,0xD8080020,0x981001A8,0xA2080A89,0x860401D9,0x6E080A89,0xE60025C5,0xBA000D5B,0x8C0008D8,0x920011B1,0x80000671,0x68000D49,0x700025C5,0x6C001626,0x5C001831,0x4C0025C6,0xA836D7,0xA20019F4,0x800010AC,0x86001A01,0x74000CC5,0x620011B1,0x68002A9E,0x66001AB6,0x56001BC6,0x48002826,0x15836D7, -0x560025F8,0x500024DD,0x44002EAA,0x380036D7,0xFE3C1CD5,0xFA642CB1,0xFE6C2E38,0xFE080A6B,0xD000070E,0x9E000678,0x880003A5,0x760009F9,0xFE181C4A,0xFC000B9A,0x7E00111A,0x56001BC6,0xF036D7,0x9825C5,0xFE680C84,0xC6500884,0x92500884,0xFE300C45,0xD4100002,0x96240104,0xA0100A69,0x860401D5,0x6E100A69,0xE025C5,0xBA000D5B,0x8C0008D8,0x920011B1,0x80000671, -0x68000D49,0x1CC25C5,0x6C001626,0x5C001831,0x4C0025C6,0xE025C5,0xBA000D5B,0x8C0008D8,0x920011B1,0x80000671,0x68000D49,0x1CC25C5,0x6C001626,0x5C001831,0x4C0025C6,0x1CC25C5,0x6C001626,0x5C001831,0x4C0025C6,0x4C0025C6,0xFE581771,0xFC881FBD,0xFE8C1F95,0xFE080A5B,0xD000070E,0x9E000678,0x880003A5,0x760009F9,0xFE3417D5,0xFC000A9A,0x7E0010DA,0x5C001831, -0x14025C5,0x380CE4,0x380CE4,0x380CE4,0x380CE4,0xD8080020,0xD8080020,0xD8080020,0x6A080021,0x6A080021,0x48080021,0x7C000882,0x7C000882,0x7C000882,0x62000271,0x62000271,0x460000EA,0x3C000882,0x3C000882,0x380004A5,0x28000882,0x500CE3,0x500CE3,0x500CE3,0x4C000576,0x4C000576,0x400002B3,0x340009D5,0x340009D5,0x320005D6,0x2800092B,0xA00CE3, -0xA00CE3,0x260008B1,0x20000AE6,0x1A000CE3,0xFE1004E2,0xFE2C0801,0x380CE4,0xE40002E9,0x9A00028A,0x6E00029A,0x6A0001F4,0x4C000332,0xF40005C5,0xB200046A,0x4C00089B,0x320005D6,0x700CE3,0x500884,0x500884,0x500884,0x500884,0xD0100000,0xD0100000,0xD0100000,0x68100001,0x68100001,0x48100001,0x780882,0x780882,0x780882,0x62000271,0x62000271, -0x460000EA,0xF40882,0xF40882,0x380004A5,0x28000882,0x780882,0x780882,0x780882,0x62000271,0x62000271,0x460000EA,0xF40882,0xF40882,0x380004A5,0x28000882,0xF40882,0xF40882,0x380004A5,0x28000882,0x28000882,0xFE200451,0xFA440548,0x500884,0xE40002E9,0x9A00028A,0x6E00029A,0x6A0001F4,0x4C000332,0xF40004E4,0xB20003F1,0xAC0882,0x380004A5, -0xAC0882,0xDC0A69,0xFEA40080,0xB6940000,0x92940000,0x1480A69,0xD8080000,0x924C0000,0x27FC0A69,0x840001A5,0x6E000A69,0x1480A69,0xD8080000,0x924C0000,0x27FC0A69,0x840001A5,0x6E000A69,0x27FC0A69,0x840001A5,0x6E000A69,0x6E000A69,0x1480A69,0xD8080000,0x924C0000,0x27FC0A69,0x840001A5,0x6E000A69,0x27FC0A69,0x840001A5,0x6E000A69,0x6E000A69,0x27FC0A69, -0x840001A5,0x6E000A69,0x6E000A69,0x6E000A69,0xFE940781,0xEC0A69,0xFECC07D9,0xFE400361,0xE00001CA,0xA8000190,0x8C000088,0x820002E4,0xF6880745,0xFE1402E4,0x9600003D,0x6E000A69,0x1D40A69,0x80020,0x80020,0x80020,0x80020,0x80020,0x80020,0x80020,0x80020,0x80020,0x80020,0x16000000,0x16000000,0x16000000,0x16000000,0x16000000, -0x16000000,0xA000001,0xA000001,0xA000001,0x6000001,0xC0020,0xC0020,0xC0020,0xC0020,0xC0020,0xC0020,0x600000D,0x600000D,0x600000D,0x6000005,0x140020,0x140020,0x140020,0x4000012,0x2000022,0x78000000,0x80020,0x80020,0x36000000,0x24000000,0x1C000000,0x1C000000,0x12000000,0x36000009,0x24000004,0xE000001,0x600000D, -0x100020,}; -static const uint32_t g_etc1_to_bc7_m6_table198[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x200000,0x300000,0x300000,0x300000,0x300000,0x300000, -0x300000,0x5C0000,0x5C0000,0x5C0000,0xE000001,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x5C0000,0x5C0000,0x5C0000,0xE000001,0x5C0000,0x5C0000,0x5C0000,0xE000001,0xE000001,0x8200000,0x200000,0x200000,0x240000,0x280000,0x2C0000,0x2C0000,0x340000,0x240000,0x280000,0x400000,0x5C0000, -0x400000,0xA40000,0xA40000,0xA40000,0xA40000,0xF40000,0xF40000,0xF40000,0x1F00000,0x1F00000,0x50000001,0xF40000,0xF40000,0xF40000,0x1F00000,0x1F00000,0x50000001,0x1F00000,0x1F00000,0x50000001,0x50000001,0xF40000,0xF40000,0xF40000,0x1F00000,0x1F00000,0x50000001,0x1F00000,0x1F00000,0x50000001,0x50000001,0x1F00000, -0x1F00000,0x50000001,0x50000001,0x50000001,0x6BC0000,0xEAC0000,0xA40000,0x2DC0000,0x1140000,0x15C0000,0x1900000,0x15F40000,0x2CC0000,0xF40000,0x15C0000,0x50000001,0x15C0000,0x1340001,0x1CC0000,0x6BF80000,0x9A000000,0x1CC0000,0x6BF80000,0x9A000000,0x6BF80000,0x9A000000,0x9A000000,0x1CC0000,0x6BF80000,0x9A000000,0x6BF80000,0x9A000000, -0x9A000000,0x6BF80000,0x9A000000,0x9A000000,0x9A000000,0x1CC0000,0x6BF80000,0x9A000000,0x6BF80000,0x9A000000,0x9A000000,0x6BF80000,0x9A000000,0x9A000000,0x9A000000,0x6BF80000,0x9A000000,0x9A000000,0x9A000000,0x9A000000,0x1840000,0xD480000,0xD480000,0xBFC0000,0x57FC0000,0x83F00000,0x9A000000,0x9A000000,0x3A40000,0x2BFC0000,0x95DC0000,0x9A000000, -0x43FC0000,0x803A9D,0xFE5016F4,0xD6400EC4,0x9A400EC4,0xFE300F95,0xE4100082,0x9E1C0268,0xAA100AE9,0x900C020D,0x76100AE9,0xFE0025C5,0xCC000C2B,0x98000888,0xA2001052,0x860004F1,0x74000C51,0x7C0025C5,0x720014E2,0x66001742,0x540025C6,0xBC3A9B,0xAE001B0C,0x8C00119C,0x92001A61,0x80000C45,0x6E001179,0x74002BBE,0x6C001A56,0x60001B66,0x520028A3,0x17C3A9B, -0x5C0027DC,0x56002631,0x4A00308E,0x3E003A9B,0xFE501FDA,0xFE6C3029,0xFE6C3258,0xFE140B8E,0xE2000578,0xAE0004FD,0x9200026A,0x8000085D,0xFE241F6E,0xFC000ADA,0x86000FF4,0x60001B66,0x10C3A9B,0xA825C5,0xFE740D24,0xCE600884,0x9A600884,0xFE440CB5,0xDC200002,0x9E340104,0xA8200A69,0x8E1401D5,0x76200A69,0xF825C5,0xCC000C2B,0x98000888,0xA2001052,0x860004F1, -0x74000C51,0x1FC25C5,0x720014E2,0x66001742,0x540025C6,0xF825C5,0xCC000C2B,0x98000888,0xA2001052,0x860004F1,0x74000C51,0x1FC25C5,0x720014E2,0x66001742,0x540025C6,0x1FC25C5,0x720014E2,0x66001742,0x540025C6,0x540025C6,0xFC74181E,0xFE8C2035,0xF8A02004,0xFE140B2A,0xE2000578,0xAE0004FD,0x9200026A,0x8000085D,0xFE44186E,0xFC0009DA,0x86000FB4,0x66001742, -0x16425C5,0x400EC4,0x400EC4,0x400EC4,0x400EC4,0xE8100080,0xE8100080,0xE8100080,0x74100081,0x74100081,0x50100081,0x94000882,0x94000882,0x94000882,0x680001BD,0x680001BD,0x4C00006A,0x48000882,0x48000882,0x3E00040D,0x30000882,0x600EC3,0x600EC3,0x600EC3,0x580005D6,0x580005D6,0x460002BB,0x40000A6D,0x40000A6D,0x3A0005BA,0x2E00096B,0xC40EC3, -0xC40EC3,0x32000989,0x26000BEE,0x20000EC3,0xFE2005E1,0xF43809E0,0x400EC4,0xFA00024A,0xB20001E1,0x880001E1,0x8000015D,0x6000028A,0xFE000614,0xC40003F1,0x5C0008A6,0x3A0005BA,0x8C0EC3,0x600884,0x600884,0x600884,0x600884,0xD8200000,0xD8200000,0xD8200000,0x70200001,0x70200001,0x50200001,0x900882,0x900882,0x900882,0x680001BD,0x680001BD, -0x4C00006A,0x1240882,0x1240882,0x3E00040D,0x30000882,0x900882,0x900882,0x900882,0x680001BD,0x680001BD,0x4C00006A,0x1240882,0x1240882,0x3E00040D,0x30000882,0x1240882,0x1240882,0x3E00040D,0x30000882,0x30000882,0xFA340480,0xFE4C0568,0x600884,0xFA00024A,0xB20001E1,0x880001E1,0x8000015D,0x6000028A,0xFE0C04E2,0xD000034D,0xD00882,0x3E00040D, -0xD00882,0xEC0A69,0xFEB000B4,0xBEA40000,0x9AA40000,0x1600A69,0xE0180000,0x9A5C0000,0x33FC0A69,0x90000145,0x76000A69,0x1600A69,0xE0180000,0x9A5C0000,0x33FC0A69,0x90000145,0x76000A69,0x33FC0A69,0x90000145,0x76000A69,0x76000A69,0x1600A69,0xE0180000,0x9A5C0000,0x33FC0A69,0x90000145,0x76000A69,0x33FC0A69,0x90000145,0x76000A69,0x76000A69,0x33FC0A69, -0x90000145,0x76000A69,0x76000A69,0x76000A69,0xFEB40782,0xFC0A69,0xFAE40800,0xFE5803B5,0xF6000184,0xB6000132,0x94000048,0x8A000290,0xFE980745,0xFE2C0340,0xA200001D,0x76000A69,0x1F40A69,0x100080,0x100080,0x100080,0x100080,0x100080,0x100080,0x100080,0x100080,0x100080,0x100080,0x30000000,0x30000000,0x30000000,0x30000000,0x30000000, -0x30000000,0x16000001,0x16000001,0x16000001,0xE000001,0x180080,0x180080,0x180080,0x180080,0x180080,0x180080,0x1200002D,0x1200002D,0x1200002D,0xC000019,0x2C0080,0x2C0080,0x2C0080,0xA00004A,0x6000082,0xF8000000,0x100080,0x100080,0x6E000000,0x4C000000,0x3A000000,0x3A000000,0x26000000,0x60000028,0x42000014,0x1E000004,0x1200002D, -0x200080,}; -static const uint32_t g_etc1_to_bc7_m6_table199[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x300000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000, -0x2440000,0x8C0000,0x8C0000,0x8C0000,0x16000001,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x2440000,0x8C0000,0x8C0000,0x8C0000,0x16000001,0x8C0000,0x8C0000,0x8C0000,0x16000001,0x16000001,0x340000,0x300000,0x300000,0x380000,0x3C0000,0x400000,0x400000,0x500000,0x380000,0x3C0000,0x640000,0x8C0000, -0x640000,0xB40000,0xB40000,0xB40000,0xB40000,0x10C0000,0x10C0000,0x10C0000,0xBF80000,0xBF80000,0x58000001,0x10C0000,0x10C0000,0x10C0000,0xBF80000,0xBF80000,0x58000001,0xBF80000,0xBF80000,0x58000001,0x58000001,0x10C0000,0x10C0000,0x10C0000,0xBF80000,0xBF80000,0x58000001,0xBF80000,0xBF80000,0x58000001,0x58000001,0xBF80000, -0xBF80000,0x58000001,0x58000001,0x58000001,0xD00000,0xC00000,0xB40000,0xF40000,0x32C0000,0x17C0000,0x1B80000,0x1FF80000,0x2E00000,0x10C0000,0x17C0000,0x58000001,0x17C0000,0x1440001,0x1E40000,0x77F80000,0xA2000000,0x1E40000,0x77F80000,0xA2000000,0x77F80000,0xA2000000,0xA2000000,0x1E40000,0x77F80000,0xA2000000,0x77F80000,0xA2000000, -0xA2000000,0x77F80000,0xA2000000,0xA2000000,0xA2000000,0x1E40000,0x77F80000,0xA2000000,0x77F80000,0xA2000000,0xA2000000,0x77F80000,0xA2000000,0xA2000000,0xA2000000,0x77F80000,0xA2000000,0xA2000000,0xA2000000,0xA2000000,0x1980000,0x15C0000,0x15C0000,0x1DFC0000,0x65FC0000,0x8DF00000,0xA2000000,0xA2000000,0x1BC0000,0x3BFC0000,0x9DEC0000,0xA2000000, -0x53FC0000,0x8C3EC1,0xFE5C1AB8,0xDE4C10F9,0xA24C10F8,0xFE3C1205,0xEC180130,0xA8240370,0xB4180B89,0x98100285,0x7E180B89,0xFE0C2621,0xE2000B28,0xA204088E,0xAE000F3A,0x92000391,0x7A000B81,0x880025C5,0x7E0013B2,0x6C00162E,0x5C0025C6,0xCC3EBF,0xBA001C84,0x9800130C,0xA2001ACB,0x8C000C25,0x74001185,0x80002CFE,0x76001A25,0x66001B26,0x5800292F,0x1A03EBF, -0x66002A4B,0x5C0027D5,0x500032AA,0x44003EBF,0xFE582361,0xF8803495,0xFA843661,0xFE180DAD,0xF6000408,0xBC000398,0x9E000164,0x8A0006D0,0xFE342275,0xFE000B6D,0x94000EAC,0x66001B26,0x1243EBF,0xB825C5,0xFE8C0DD4,0xD6700884,0xA2700884,0xFE500D49,0xE4300002,0xA6440104,0xB0300A69,0x962401D5,0x7E300A69,0x11025C5,0xE2000B28,0xA2080884,0xAE000F3A,0x92000391, -0x7A000B81,0xDFC25C5,0x7E0013B2,0x6C00162E,0x5C0025C6,0x11025C5,0xE2000B28,0xA2080884,0xAE000F3A,0x92000391,0x7A000B81,0xDFC25C5,0x7E0013B2,0x6C00162E,0x5C0025C6,0xDFC25C5,0x7E0013B2,0x6C00162E,0x5C0025C6,0x5C0025C6,0xFE78188A,0xFCA82039,0xFEAC2018,0xFE300C02,0xF6000408,0xBC000398,0x9E000164,0x8A0006D0,0xFE5C18B6,0xFE040A46,0x94000E5B,0x6C00162E, -0x18825C5,0x4C10F8,0x4C10F8,0x4C10F8,0x4C10F8,0xF8180120,0xF8180120,0xF8180120,0x7E180121,0x7E180121,0x58180121,0xAC000882,0xAC000882,0xAC000882,0x7A000131,0x7A000131,0x5800001A,0x54000882,0x54000882,0x4A000385,0x38000882,0x7010F8,0x7010F8,0x7010F8,0x62000651,0x62000651,0x52000313,0x4C000B25,0x4C000B25,0x420005AE,0x340009C3,0xE410F8, -0xE410F8,0x38000AA9,0x2C000D26,0x240010FB,0xFE2C0756,0xFA440BCC,0x4C10F8,0xFE08025A,0xC6000151,0x9A000161,0x8C0000CD,0x6A0001E2,0xFE000734,0xDA000398,0x660008AE,0x420005AE,0xA010F8,0x700884,0x700884,0x700884,0x700884,0xE0300000,0xE0300000,0xE0300000,0x78300001,0x78300001,0x58300001,0xA80882,0xA80882,0xA80882,0x7A000131,0x7A000131, -0x5800001A,0x1580882,0x1580882,0x4A000385,0x38000882,0xA80882,0xA80882,0xA80882,0x7A000131,0x7A000131,0x5800001A,0x1580882,0x1580882,0x4A000385,0x38000882,0x1580882,0x1580882,0x4A000385,0x38000882,0x38000882,0xFE3C04A0,0xFA64057D,0x700884,0xFE08024A,0xC6000151,0x9A000161,0x8C0000CD,0x6A0001E2,0xFE140514,0xEC0002D0,0xF00882,0x4A000385, -0xF00882,0xFC0A69,0xFEC400E9,0xC6B40000,0xA2B40000,0x1780A69,0xE8280000,0xA26C0000,0x3FFC0A69,0x9A000104,0x7E000A69,0x1780A69,0xE8280000,0xA26C0000,0x3FFC0A69,0x9A000104,0x7E000A69,0x3FFC0A69,0x9A000104,0x7E000A69,0x7E000A69,0x1780A69,0xE8280000,0xA26C0000,0x3FFC0A69,0x9A000104,0x7E000A69,0x3FFC0A69,0x9A000104,0x7E000A69,0x7E000A69,0x3FFC0A69, -0x9A000104,0x7E000A69,0x7E000A69,0x7E000A69,0xFAC807C1,0x10C0A69,0xFEEC0820,0xFE6C03FA,0xFA040152,0xBE0000F2,0x9E000020,0x9400022D,0xFEAC0784,0xFE4C037A,0xAC000005,0x7E000A69,0xDFC0A69,0x180120,0x180120,0x180120,0x180120,0x180120,0x180120,0x180120,0x180120,0x180120,0x180120,0x48000000,0x48000000,0x48000000,0x48000000,0x48000000, -0x48000000,0x22000000,0x22000000,0x22000000,0x16000001,0x240120,0x240120,0x240120,0x240120,0x240120,0x240120,0x1E00006D,0x1E00006D,0x1E00006D,0x1400003A,0x440120,0x440120,0x440120,0x100000AA,0xA000122,0xFC080020,0x180120,0x180120,0xA8000000,0x74000000,0x58000000,0x58000000,0x3A000000,0x84000059,0x6400002D,0x2C000009,0x1E00006D, -0x300120,}; -static const uint32_t g_etc1_to_bc7_m6_table200[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x400001,0x600000,0x600000,0x600000,0x600000,0x600000, -0x600000,0xC40000,0xC40000,0xC40000,0x20000000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0xC40000,0xC40000,0xC40000,0x20000000,0xC40000,0xC40000,0xC40000,0x20000000,0x20000000,0xA440000,0x400001,0x400001,0x4C0000,0x4500000,0x580000,0x580000,0x26C0000,0x4C0000,0x4500000,0x8C0000,0xC40000, -0x8C0000,0xC40001,0xC40001,0xC40001,0xC40001,0x3240000,0x3240000,0x3240000,0x17FC0000,0x17FC0000,0x62000000,0x3240000,0x3240000,0x3240000,0x17FC0000,0x17FC0000,0x62000000,0x17FC0000,0x17FC0000,0x62000000,0x62000000,0x3240000,0x3240000,0x3240000,0x17FC0000,0x17FC0000,0x62000000,0x17FC0000,0x17FC0000,0x62000000,0x62000000,0x17FC0000, -0x17FC0000,0x62000000,0x62000000,0x62000000,0x4E40000,0xD40000,0xC40001,0x10C0000,0x14C0000,0x1A40000,0x1E40000,0x2BF80000,0xF80000,0x3240000,0x1A40000,0x62000000,0x1A40000,0x1580000,0x3FC0000,0x85F80000,0xAA000001,0x3FC0000,0x85F80000,0xAA000001,0x85F80000,0xAA000001,0xAA000001,0x3FC0000,0x85F80000,0xAA000001,0x85F80000,0xAA000001, -0xAA000001,0x85F80000,0xAA000001,0xAA000001,0xAA000001,0x3FC0000,0x85F80000,0xAA000001,0x85F80000,0xAA000001,0xAA000001,0x85F80000,0xAA000001,0xAA000001,0xAA000001,0x85F80000,0xAA000001,0xAA000001,0xAA000001,0xAA000001,0x1B00000,0xF6C0000,0xF6C0000,0x33FC0000,0x75F80000,0x97F80000,0xAA000001,0xAA000001,0x1D40000,0x4FFC0000,0xA7E00000,0xAA000001, -0x63FC0000,0x9843DA,0xFE741F87,0xEA5813DA,0xAA5813DB,0xFE441546,0xF824023F,0xB43004F3,0xC0200C8A,0xA020035E,0x86200C8A,0xFE1C2759,0xEE000A1B,0xAC0C08D9,0xC0000E29,0x9E000252,0x86000AE2,0x960025C5,0x8A001275,0x7800150D,0x640025C5,0xE043DA,0xCC001E91,0xA200151E,0xA8001BBA,0x92000C52,0x800011F2,0x8C002E81,0x80001A11,0x72001AE5,0x620029D5,0x1CC43DA, -0x6C002D36,0x66002A52,0x56003551,0x4A0043DE,0xFE6427D8,0xFE8C3942,0xFE8C3B6A,0xFE301079,0xFC000338,0xC8000260,0xA6000092,0x96000559,0xFE4426F1,0xFE040DC1,0x9E000D7F,0x72001AE5,0x14043DA,0xCC25C6,0xFE980EA3,0xDE840883,0xAA840883,0xFE680DF2,0xEE400003,0xAE540103,0xB8400A6A,0xA03801D6,0x86400A6A,0x12C25C5,0xEE000A1B,0xAA1C0883,0xC0000E29,0x9E000252, -0x86000AE2,0x1BF825C5,0x8A001275,0x7800150D,0x640025C5,0x12C25C5,0xEE000A1B,0xAA1C0883,0xC0000E29,0x9E000252,0x86000AE2,0x1BF825C5,0x8A001275,0x7800150D,0x640025C5,0x1BF825C5,0x8A001275,0x7800150D,0x640025C5,0x640025C5,0xFE941919,0xF6BC20B2,0xFAC42082,0xFE440D24,0xFC000338,0xC8000260,0xA6000092,0x96000559,0xFE781975,0xFE240B83,0x9E000D1B,0x7800150D, -0x1AC25C5,0x5813DA,0x5813DA,0x5813DA,0x5813DA,0xFE24022E,0xFE24022E,0xFE24022E,0x8A200221,0x8A200221,0x62200221,0xC8000882,0xC8000882,0xC8000882,0x860000B9,0x860000B9,0x62000001,0x60000884,0x60000884,0x500002FD,0x40000884,0x28013DA,0x28013DA,0x28013DA,0x7400074D,0x7400074D,0x580003BD,0x58000C13,0x58000C13,0x4C0005BE,0x40000A3D,0x10813DA, -0x10813DA,0x3E000C45,0x32000EC8,0x2A0013DD,0xFC380952,0xFE4C0E5E,0x5813DA,0xFE100315,0xDE0000D0,0xAE0000CD,0xA2000068,0x78000164,0xFE1408DB,0xFC000344,0x7C0008C2,0x4C0005BE,0xB813DA,0x840882,0x840882,0x840882,0x840882,0xE6440001,0xE6440001,0xE6440001,0x82400001,0x82400001,0x62400001,0xC40882,0xC40882,0xC40882,0x860000B9,0x860000B9, -0x62000001,0x18C0882,0x18C0882,0x500002FD,0x40000884,0xC40882,0xC40882,0xC40882,0x860000B9,0x860000B9,0x62000001,0x18C0882,0x18C0882,0x500002FD,0x40000884,0x18C0882,0x18C0882,0x500002FD,0x40000884,0x40000884,0xFE5804B1,0xF47805B2,0x840882,0xFE240288,0xDE0000D0,0xAE0000CD,0xA2000068,0x78000164,0xFE340515,0xFC000244,0x1180882,0x500002FD, -0x1180882,0x1100A69,0xFEDC013D,0xCEC80001,0xAAC40002,0x1900A69,0xF03C0000,0xAA800001,0x4DFC0A69,0xA40000C1,0x86000A69,0x1900A69,0xF03C0000,0xAA800001,0x4DFC0A69,0xA40000C1,0x86000A69,0x4DFC0A69,0xA40000C1,0x86000A69,0x86000A69,0x1900A69,0xF03C0000,0xAA800001,0x4DFC0A69,0xA40000C1,0x86000A69,0x4DFC0A69,0xA40000C1,0x86000A69,0x86000A69,0x4DFC0A69, -0xA40000C1,0x86000A69,0x86000A69,0x86000A69,0xFED007FD,0x1200A69,0xFD080841,0xFE940451,0xFE100172,0xD00000A9,0xA800000A,0x9C0001E1,0xF6CC07C1,0xFE6403E8,0xB6080000,0x86000A69,0x1FF80A69,0x200221,0x200221,0x200221,0x200221,0x200221,0x200221,0x200221,0x200221,0x200221,0x200221,0x64000000,0x64000000,0x64000000,0x64000000,0x64000000, -0x64000000,0x30000000,0x30000000,0x30000000,0x20000000,0x300221,0x300221,0x300221,0x300221,0x300221,0x300221,0x280000C2,0x280000C2,0x280000C2,0x1E000068,0x5C0221,0x5C0221,0x5C0221,0x16000145,0x10000221,0xFE0C009D,0x200221,0x200221,0xE8000000,0xA0000000,0x7A000000,0x7A000000,0x50000000,0xC40000A9,0x96000055,0x3E000010,0x280000C2, -0x400221,}; -static const uint32_t g_etc1_to_bc7_m6_table201[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x500001,0x780000,0x780000,0x780000,0x780000,0x780000, -0x780000,0xF40000,0xF40000,0xF40000,0x28000000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0xF40000,0xF40000,0xF40000,0x28000000,0xF40000,0xF40000,0xF40000,0x28000000,0x28000000,0x580000,0x500001,0x500001,0x600000,0x4640000,0x700000,0x700000,0x880000,0x600000,0x4640000,0xAC0000,0xF40000, -0xAC0000,0xD40001,0xD40001,0xD40001,0xD40001,0x33C0000,0x33C0000,0x33C0000,0x23FC0000,0x23FC0000,0x6A000000,0x33C0000,0x33C0000,0x33C0000,0x23FC0000,0x23FC0000,0x6A000000,0x23FC0000,0x23FC0000,0x6A000000,0x6A000000,0x33C0000,0x33C0000,0x33C0000,0x23FC0000,0x23FC0000,0x6A000000,0x23FC0000,0x23FC0000,0x6A000000,0x6A000000,0x23FC0000, -0x23FC0000,0x6A000000,0x6A000000,0x6A000000,0xF80000,0xE40000,0xD40001,0x1240000,0x1680000,0x1C80000,0x7F80000,0x37F40000,0x10C0000,0x33C0000,0x1C80000,0x6A000000,0x1C80000,0x1680000,0x1BFC0000,0x91F80000,0xB2000001,0x1BFC0000,0x91F80000,0xB2000001,0x91F80000,0xB2000001,0xB2000001,0x1BFC0000,0x91F80000,0xB2000001,0x91F80000,0xB2000001, -0xB2000001,0x91F80000,0xB2000001,0xB2000001,0xB2000001,0x1BFC0000,0x91F80000,0xB2000001,0x91F80000,0xB2000001,0xB2000001,0x91F80000,0xB2000001,0xB2000001,0xB2000001,0x91F80000,0xB2000001,0xB2000001,0xB2000001,0xB2000001,0x1C40000,0x1800000,0x1800000,0x47FC0000,0x81FC0000,0xA1FC0000,0xB2000001,0xB2000001,0x3E80000,0x61FC0000,0xAFF00000,0xB2000001, -0x73FC0000,0xA448CA,0xFE802443,0xF26416C6,0xB26416C7,0xFE5018EA,0xFE300393,0xBA3C069F,0xCA280DB2,0xA824045E,0x8E280DB2,0xFE30290A,0xFA00097B,0xB6140951,0xCC000D49,0xA800016E,0x8E000A8D,0xA20025C6,0x9000116D,0x7E001419,0x6C0025C5,0xF448CA,0xE20020BF,0xAE00178E,0xB4001CE2,0xA2000CB3,0x86001292,0x98003001,0x8A001A29,0x7A001AD5,0x68002A81,0x1F048CA, -0x7800301E,0x6C002C86,0x5C0037E9,0x500048CE,0xFE782C61,0xFE8C3E62,0xF8A040BB,0xFE401395,0xFE0403A8,0xD6000182,0xB0000031,0xA0000422,0xFE542B17,0xFE1810AE,0xA6000CB9,0x7A001AD5,0x15C48CA,0xDC25C6,0xFEB00F63,0xE6940883,0xB2940883,0xFE800E9A,0xF6500003,0xB6640103,0xC0500A6A,0xA84801D6,0x8E500A6A,0x14425C5,0xFA00097B,0xB22C0883,0xCC000D49,0xA800016E, -0x8E000A8D,0x27F825C5,0x9000116D,0x7E001419,0x6C0025C5,0x14425C5,0xFA00097B,0xB22C0883,0xCC000D49,0xA800016E,0x8E000A8D,0x27F825C5,0x9000116D,0x7E001419,0x6C0025C5,0x27F825C5,0x9000116D,0x7E001419,0x6C0025C5,0x6C0025C5,0xFEA019C8,0xFECC20B2,0xFECC20B2,0xFE5C0DFE,0xFE100393,0xD6000182,0xB0000031,0xA0000422,0xFE8819FE,0xFE400C4D,0xAC000C4E,0x7E001419, -0x1D025C5,0x6416C6,0x6416C6,0x6416C6,0x6416C6,0xFE300392,0xFE300392,0xFE300392,0x94280349,0x94280349,0x6A280349,0xE0000882,0xE0000882,0xE0000882,0x98000061,0x98000061,0x6C040018,0x6C000884,0x6C000884,0x5C00027D,0x48000884,0x9016C5,0x9016C5,0x9016C5,0x80000875,0x80000875,0x62000491,0x62000CE4,0x62000CE4,0x540005F4,0x46000AA5,0x12416C5, -0x12416C5,0x44000E01,0x3E001058,0x300016C5,0xFE3C0B76,0xF458115D,0x6416C6,0xFE18042D,0xFC000074,0xC0000074,0xAE000022,0x840000FA,0xFE200AF6,0xFE00038A,0x860008CA,0x540005F4,0xD016C5,0x940882,0x940882,0x940882,0x940882,0xEE540001,0xEE540001,0xEE540001,0x8A500001,0x8A500001,0x6A500001,0xDC0882,0xDC0882,0xDC0882,0x98000061,0x98000061, -0x6A100001,0x1BC0882,0x1BC0882,0x5C00027D,0x48000884,0xDC0882,0xDC0882,0xDC0882,0x98000061,0x98000061,0x6A100001,0x1BC0882,0x1BC0882,0x5C00027D,0x48000884,0x1BC0882,0x1BC0882,0x5C00027D,0x48000884,0x48000884,0xFA6C04E2,0xFC8805B2,0x940882,0xFE3402B1,0xFC000074,0xC0000074,0xAE000022,0x840000FA,0xFA48054A,0xFC140265,0x1380882,0x5C00027D, -0x1380882,0x1200A69,0xFEE80195,0xD6D80001,0xB2D40002,0x1A80A69,0xF84C0000,0xB2900001,0x59FC0A69,0xAC000082,0x8E000A69,0x1A80A69,0xF84C0000,0xB2900001,0x59FC0A69,0xAC000082,0x8E000A69,0x59FC0A69,0xAC000082,0x8E000A69,0x8E000A69,0x1A80A69,0xF84C0000,0xB2900001,0x59FC0A69,0xAC000082,0x8E000A69,0x59FC0A69,0xAC000082,0x8E000A69,0x8E000A69,0x59FC0A69, -0xAC000082,0x8E000A69,0x8E000A69,0x8E000A69,0xFAF00802,0x1300A69,0xFF0C087D,0xFEAC04B1,0xFE3C01C4,0xDA00006A,0xB2040001,0xA800019A,0xFEDC07C1,0xFE880424,0xBE180000,0x8E000A69,0x2DFC0A69,0x280349,0x280349,0x280349,0x280349,0x280349,0x280349,0x280349,0x280349,0x280349,0x280349,0x7C000000,0x7C000000,0x7C000000,0x7C000000,0x7C000000, -0x7C000000,0x3C000000,0x3C000000,0x3C000000,0x28000000,0x3C0349,0x3C0349,0x3C0349,0x3C0349,0x3C0349,0x3C0349,0x2E000132,0x2E000132,0x2E000132,0x2200009D,0x740349,0x740349,0x740349,0x1C0001F9,0x14000349,0xF4180154,0x280349,0x280349,0xFE04000D,0xC8000000,0x98000000,0x98000000,0x64000000,0xF6000105,0xB4000089,0x46000019,0x2E000132, -0x540349,}; -static const uint32_t g_etc1_to_bc7_m6_table202[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x600001,0x900000,0x900000,0x900000,0x900000,0x900000, -0x900000,0x1240000,0x1240000,0x1240000,0x30000000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x1240000,0x1240000,0x1240000,0x30000000,0x1240000,0x1240000,0x1240000,0x30000000,0x30000000,0x680000,0x600001,0x600001,0x2700000,0x4780000,0x840000,0x840000,0xA40000,0x2700000,0x4780000,0xD00000,0x1240000, -0xD00000,0xE40001,0xE40001,0xE40001,0xE40001,0x1540000,0x1540000,0x1540000,0x2FFC0000,0x2FFC0000,0x72000000,0x1540000,0x1540000,0x1540000,0x2FFC0000,0x2FFC0000,0x72000000,0x2FFC0000,0x2FFC0000,0x72000000,0x72000000,0x1540000,0x1540000,0x1540000,0x2FFC0000,0x2FFC0000,0x72000000,0x2FFC0000,0x2FFC0000,0x72000000,0x72000000,0x2FFC0000, -0x2FFC0000,0x72000000,0x72000000,0x72000000,0x10C0000,0x2F40000,0xE40001,0x1380000,0x3800000,0x1E80000,0x15F80000,0x41F80000,0x1200000,0x1540000,0x1E80000,0x72000000,0x1E80000,0x1780000,0x35FC0000,0x9DF40000,0xBA000001,0x35FC0000,0x9DF40000,0xBA000001,0x9DF40000,0xBA000001,0xBA000001,0x35FC0000,0x9DF40000,0xBA000001,0x9DF40000,0xBA000001, -0xBA000001,0x9DF40000,0xBA000001,0xBA000001,0xBA000001,0x35FC0000,0x9DF40000,0xBA000001,0x9DF40000,0xBA000001,0xBA000001,0x9DF40000,0xBA000001,0xBA000001,0xBA000001,0x9DF40000,0xBA000001,0xBA000001,0xBA000001,0xBA000001,0x1D80000,0x1900000,0x1900000,0x5BFC0000,0x8FFC0000,0xABFC0000,0xBA000001,0xBA000001,0x5FC0000,0x71FC0000,0xB9C40000,0xBA000001, -0x81FC0000,0xB04E1A,0xFE8C2977,0xFC6C1A06,0xBA6C1A07,0xFE5C1D1E,0xFE3C0597,0xC24408A6,0xD4300F1A,0xB22C059E,0x96300F1A,0xFE3C2B5E,0xFE080993,0xC01C0A01,0xD8000C89,0xB40000C6,0x96000A6A,0xAE0025C6,0x9C001065,0x8A001331,0x740025C5,0x1044E1A,0xE800231B,0xB4001A6E,0xC0001E4A,0xA8000D5F,0x8C001392,0xA200315F,0x92001A55,0x82001ADB,0x6E002B4D,0x7FC4E1A, -0x7E003356,0x72002F0A,0x66003AC6,0x56004E1E,0xFE803152,0xFAA443AA,0xFEAC45DF,0xFE441768,0xFE100503,0xE20000CE,0xBC00000B,0xAA00032A,0xFE5C2FFA,0xFE241464,0xB6000BBD,0x82001ADB,0x1744E1A,0xEC25C6,0xFEC41026,0xEEA40883,0xBAA40883,0xFE980F62,0xFE600003,0xBE740103,0xC8600A6A,0xB05801D6,0x96600A6A,0x15C25C5,0xFE080983,0xBA3C0883,0xD8000C89,0xB40000C6, -0x96000A6A,0x33F825C5,0x9C001065,0x8A001331,0x740025C5,0x15C25C5,0xFE080983,0xBA3C0883,0xD8000C89,0xB40000C6,0x96000A6A,0x33F825C5,0x9C001065,0x8A001331,0x740025C5,0x33F825C5,0x9C001065,0x8A001331,0x740025C5,0x740025C5,0xFEB41A2D,0xF8E0212E,0xFAE42103,0xFE780F0E,0xFE24046B,0xE20000CE,0xBC00000B,0xAA00032A,0xFE981A99,0xFE480D53,0xB6000B44,0x8A001331, -0x1F025C5,0x6C1A06,0x6C1A06,0x6C1A06,0x6C1A06,0xFE3C0566,0xFE3C0566,0xFE3C0566,0x9E3004B1,0x9E3004B1,0x723004B1,0xF8000882,0xF8000882,0xF8000882,0xA8000025,0xA8000025,0x76080051,0x78000884,0x78000884,0x66000220,0x50000884,0xA41A05,0xA41A05,0xA41A05,0x8C0009DD,0x8C0009DD,0x680005C9,0x6E000DF4,0x6E000DF4,0x5E000632,0x4C000B25,0x14C1A05, -0x14C1A05,0x50001001,0x44001218,0x36001A05,0xFE4C0DF1,0xFA641455,0x6C1A06,0xFE2C05C9,0xFE040078,0xD200003A,0xC0000004,0x960000A0,0xFC300D65,0xFE0004CA,0x960008DB,0x5E000632,0xE81A05,0xA40882,0xA40882,0xA40882,0xA40882,0xF6640001,0xF6640001,0xF6640001,0x92600001,0x92600001,0x72600001,0xF40882,0xF40882,0xF40882,0xA8000025,0xA8000025, -0x72200001,0x1F00882,0x1F00882,0x66000220,0x50000884,0xF40882,0xF40882,0xF40882,0xA8000025,0xA8000025,0x72200001,0x1F00882,0x1F00882,0x66000220,0x50000884,0x1F00882,0x1F00882,0x66000220,0x50000884,0x50000884,0xFE740502,0xF49805E9,0xA40882,0xFE4402E4,0xFE040074,0xD200003A,0xC0000004,0x960000A0,0xFE50057A,0xFE24028A,0x15C0882,0x66000220, -0x15C0882,0x1300A69,0xFF0001ED,0xDEE80001,0xBAE40002,0x1C00A69,0xFE600002,0xBAA00001,0x65F80A69,0xB6000059,0x96000A69,0x1C00A69,0xFE600002,0xBAA00001,0x65F80A69,0xB6000059,0x96000A69,0x65F80A69,0xB6000059,0x96000A69,0x96000A69,0x1C00A69,0xFE600002,0xBAA00001,0x65F80A69,0xB6000059,0x96000A69,0x65F80A69,0xB6000059,0x96000A69,0x96000A69,0x65F80A69, -0xB6000059,0x96000A69,0x96000A69,0x96000A69,0xFEF80832,0x1440A69,0xFD280882,0xFEC004EA,0xFE500220,0xE6000050,0xBA140001,0xB2000151,0xFEF00800,0xFEA00488,0xC6280000,0x96000A69,0x3DF80A69,0x3004B1,0x3004B1,0x3004B1,0x3004B1,0x3004B1,0x3004B1,0x3004B1,0x3004B1,0x3004B1,0x3004B1,0x94000000,0x94000000,0x94000000,0x94000000,0x94000000, -0x94000000,0x48000000,0x48000000,0x48000000,0x30000000,0x4804B1,0x4804B1,0x4804B1,0x4804B1,0x4804B1,0x4804B1,0x3A0001BA,0x3A0001BA,0x3A0001BA,0x2E0000E5,0x8C04B1,0x8C04B1,0x8C04B1,0x200002E4,0x180004B1,0xF8200244,0x3004B1,0x3004B1,0xFC0C0064,0xEE000000,0xB6000000,0xB6000000,0x78000000,0xF60001A5,0xD60000C2,0x56000022,0x3A0001BA, -0x6404B1,}; -static const uint32_t g_etc1_to_bc7_m6_table203[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0x700001,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000, -0xA80000,0x1580000,0x1580000,0x1580000,0x38000000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0xA80000,0x1580000,0x1580000,0x1580000,0x38000000,0x1580000,0x1580000,0x1580000,0x38000000,0x38000000,0x4780000,0x700001,0x700001,0x840000,0x48C0000,0x2980000,0x2980000,0xC00000,0x840000,0x48C0000,0xF00000,0x1580000, -0xF00000,0xF40001,0xF40001,0xF40001,0xF40001,0x16C0000,0x16C0000,0x16C0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x16C0000,0x16C0000,0x16C0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x3BFC0000,0x3BFC0000,0x7A000000,0x7A000000,0x16C0000,0x16C0000,0x16C0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x3BFC0000,0x3BFC0000,0x7A000000,0x7A000000,0x3BFC0000, -0x3BFC0000,0x7A000000,0x7A000000,0x7A000000,0x51C0000,0xB040000,0xF40001,0x34C0000,0x19C0000,0x7FC0000,0x21FC0000,0x4BFC0000,0x1340000,0x16C0000,0x7FC0000,0x7A000000,0x7FC0000,0x1880000,0x4DFC0000,0xA7FC0000,0xC2000001,0x4DFC0000,0xA7FC0000,0xC2000001,0xA7FC0000,0xC2000001,0xC2000001,0x4DFC0000,0xA7FC0000,0xC2000001,0xA7FC0000,0xC2000001, -0xC2000001,0xA7FC0000,0xC2000001,0xC2000001,0xC2000001,0x4DFC0000,0xA7FC0000,0xC2000001,0xA7FC0000,0xC2000001,0xC2000001,0xA7FC0000,0xC2000001,0xC2000001,0xC2000001,0xA7FC0000,0xC2000001,0xC2000001,0xC2000001,0xC2000001,0x1EC0000,0x9A00000,0x9A00000,0x6FFC0000,0x9DF80000,0xB5FC0000,0xC2000001,0xC2000001,0x23FC0000,0x83FC0000,0xC1D40000,0xC2000001, -0x91FC0000,0xBC53CA,0xFE982F23,0xFE781DB3,0xC2781D9B,0xFE6821E2,0xFE480853,0xCC4C0AE2,0xDE3810C2,0xBA34072F,0x9E3810C2,0xFE442E39,0xFE140A9F,0xCA240AE9,0xE2000BDE,0xBE000056,0x9E040A76,0xBA0025C6,0xA6000F93,0x9000124D,0x7C0025C5,0x11853CA,0xF400260B,0xC0001DBE,0xCC001FF2,0xB4000E7F,0x980014B2,0xA8003317,0x9E001AAD,0x8C001AFD,0x7A002C15,0x11F853CA, -0x840036F6,0x780031DE,0x6C003DB6,0x5C0053CE,0xFE8C36AE,0xFEAC491A,0xFEAC4BCF,0xFE541B8E,0xFE24073F,0xEE000058,0xC6080023,0xB6000243,0xFE703553,0xFE301879,0xBE000B25,0x8C001AFD,0x19053CA,0xFC25C6,0xFED010EE,0xF6B40883,0xC2B40883,0xFEA41022,0xFE780023,0xC6840103,0xD0700A6A,0xB86801D6,0x9E700A6A,0x17425C5,0xFE200A0B,0xC24C0883,0xE2000BDE,0xBE000056, -0x9E0C0A6A,0x3FF825C5,0xA6000F93,0x9000124D,0x7C0025C5,0x17425C5,0xFE200A0B,0xC24C0883,0xE2000BDE,0xBE000056,0x9E0C0A6A,0x3FF825C5,0xA6000F93,0x9000124D,0x7C0025C5,0x3FF825C5,0xA6000F93,0x9000124D,0x7C0025C5,0x7C0025C5,0xFED01ACD,0xFEEC2132,0xFEEC213B,0xFE940FE5,0xFE38056B,0xEE000058,0xC410000B,0xB6000243,0xFEB41B45,0xFE680E81,0xBE000A95,0x9000124D, -0xBFC25C5,0x781D9A,0x781D9A,0x781D9A,0x781D9A,0xFE440795,0xFE440795,0xFE440795,0xA8380659,0xA8380659,0x7A380659,0xFE0C08AE,0xFE0C08AE,0xFE0C08AE,0xB4000005,0xB4000005,0x7E0C00AD,0x84000884,0x84000884,0x6C0001C4,0x58000884,0x2B01D9A,0x2B01D9A,0x2B01D9A,0x98000B85,0x98000B85,0x74000731,0x74000F18,0x74000F18,0x6800068C,0x52000BBD,0x1681D9A, -0x1681D9A,0x56001235,0x4A001408,0x3A001D9D,0xFE5810F1,0xFE6C17AD,0x781D9A,0xFE3407B5,0xFE140116,0xE200000D,0xD0000002,0xA0000059,0xFE341005,0xFE100696,0xA60008E8,0x6800068C,0xFC1D9A,0xB40882,0xB40882,0xB40882,0xB40882,0xFE740001,0xFE740001,0xFE740001,0x9A700001,0x9A700001,0x7A700001,0x10C0882,0x10C0882,0x10C0882,0xB4000005,0xB4000005, -0x7A300001,0xBF80882,0xBF80882,0x6C0001C4,0x58000884,0x10C0882,0x10C0882,0x10C0882,0xB4000005,0xB4000005,0x7A300001,0xBF80882,0xBF80882,0x6C0001C4,0x58000884,0xBF80882,0xBF80882,0x6C0001C4,0x58000884,0x58000884,0xFE900515,0xFCA805E9,0xB40882,0xFE5C02FD,0xFE1C009D,0xE200000D,0xCC080000,0xA0000059,0xFA70057D,0xFA4002D2,0x17C0882,0x6C0001C4, -0x17C0882,0x1400A69,0xFF0C025D,0xE6F80001,0xC2F40002,0x1D80A69,0xFE7C0014,0xC2B00001,0x71F80A69,0xBE000032,0x9E000A69,0x1D80A69,0xFE7C0014,0xC2B00001,0x71F80A69,0xBE000032,0x9E000A69,0x71F80A69,0xBE000032,0x9E000A69,0x9E000A69,0x1D80A69,0xFE7C0014,0xC2B00001,0x71F80A69,0xBE000032,0x9E000A69,0x71F80A69,0xBE000032,0x9E000A69,0x9E000A69,0x71F80A69, -0xBE000032,0x9E000A69,0x9E000A69,0x9E000A69,0xFF140845,0x1540A69,0xF53808C5,0xFED80550,0xFE68028A,0xF2000028,0xC2240001,0xBA000115,0xF7080841,0xFEC004E2,0xCE380000,0x9E000A69,0x4BFC0A69,0x380659,0x380659,0x380659,0x380659,0x380659,0x380659,0x380659,0x380659,0x380659,0x380659,0xAC000000,0xAC000000,0xAC000000,0xAC000000,0xAC000000, -0xAC000000,0x54000000,0x54000000,0x54000000,0x38000000,0x540659,0x540659,0x540659,0x540659,0x540659,0x540659,0x40000262,0x40000262,0x40000262,0x34000131,0xA40659,0xA40659,0xA40659,0x260003E8,0x1C000659,0xFC280374,0x380659,0x380659,0xFE100104,0xFA040014,0xD4000000,0xD4000000,0x8C000000,0xFA0802AD,0xF4000112,0x66000028,0x40000262, -0x740659,}; -static const uint32_t g_etc1_to_bc7_m6_table204[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0x840000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000, -0xC40000,0x18C0000,0x18C0000,0x18C0000,0x40000001,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0xC40000,0x18C0000,0x18C0000,0x18C0000,0x40000001,0x18C0000,0x18C0000,0x18C0000,0x40000001,0x40000001,0x8C0000,0x840000,0x840000,0x2980000,0xA40000,0xB40000,0xB40000,0xDC0000,0x2980000,0xA40000,0x1180000,0x18C0000, -0x1180000,0x1080000,0x1080000,0x1080000,0x1080000,0x1880000,0x1880000,0x1880000,0x49F80000,0x49F80000,0x82000001,0x1880000,0x1880000,0x1880000,0x49F80000,0x49F80000,0x82000001,0x49F80000,0x49F80000,0x82000001,0x82000001,0x1880000,0x1880000,0x1880000,0x49F80000,0x49F80000,0x82000001,0x49F80000,0x49F80000,0x82000001,0x82000001,0x49F80000, -0x49F80000,0x82000001,0x82000001,0x82000001,0x1340000,0x5180000,0x1080000,0x1680000,0x1BC0000,0x19FC0000,0x31FC0000,0x59F40000,0x14C0000,0x1880000,0x19FC0000,0x82000001,0x19FC0000,0x1980001,0x69FC0000,0xB5FC0000,0xCC000000,0x69FC0000,0xB5FC0000,0xCC000000,0xB5FC0000,0xCC000000,0xCC000000,0x69FC0000,0xB5FC0000,0xCC000000,0xB5FC0000,0xCC000000, -0xCC000000,0xB5FC0000,0xCC000000,0xCC000000,0xCC000000,0x69FC0000,0xB5FC0000,0xCC000000,0xB5FC0000,0xCC000000,0xCC000000,0xB5FC0000,0xCC000000,0xCC000000,0xCC000000,0xB5FC0000,0xCC000000,0xCC000000,0xCC000000,0xCC000000,0xBFC0000,0x3B40000,0x3B40000,0x85FC0000,0xABFC0000,0xC1F40000,0xCC000000,0xCC000000,0x45FC0000,0x95FC0000,0xCBC80000,0xCC000000, -0xA1FC0000,0xC85AA5,0xFEA4360C,0xFE88228D,0xCC842208,0xFE8027E1,0xFE540C44,0xD8580DC5,0xE84412ED,0xC43C0938,0xA84012ED,0xFE503266,0xFE200CC4,0xD6240C2A,0xF4000B45,0xCA000015,0xAA080AB6,0xC60025C5,0xB2000E98,0x9C001162,0x860025C6,0x12C5AA3,0xFA002A18,0xCC00220B,0xE20021ED,0xBA00100A,0xA2001647,0xBA00351E,0xA8001B25,0x92001B46,0x80002CFE,0x1BF85AA3, -0x90003B5B,0x84003543,0x7200414F,0x64005AA3,0xFEA03D3B,0xF8C05071,0xFAC452C9,0xFE6C216E,0xFE280A9E,0xFC00000E,0xD00C0071,0xC0000184,0xFE783BC2,0xFE441DA3,0xCE000A96,0x92001B46,0x1AC5AA3,0x10C25C5,0xFEE811FD,0xFEC80885,0xCCC40884,0xFEBC1115,0xFE880086,0xD0980104,0xDA840A69,0xC07801D5,0xA8840A69,0x38C25C5,0xFE380AD4,0xCC5C0884,0xF4000B45,0xCA000015, -0xA8200A69,0x4BFC25C5,0xB2000E98,0x9C001162,0x860025C6,0x38C25C5,0xFE380AD4,0xCC5C0884,0xF4000B45,0xCA000015,0xA8200A69,0x4BFC25C5,0xB2000E98,0x9C001162,0x860025C6,0x4BFC25C5,0xB2000E98,0x9C001162,0x860025C6,0x860025C6,0xFEE41BB9,0xF90021AD,0xFD082185,0xFE981149,0xFE5C06AE,0xFC00000E,0xCE24000C,0xC0000184,0xFEC41BE9,0xFE840F78,0xCE0009ED,0x9C001162, -0x1DF825C5,0x842208,0x842208,0x842208,0x842208,0xFE500A9D,0xFE500A9D,0xFE500A9D,0xB4400884,0xB4400884,0x82400885,0xFE180974,0xFE180974,0xFE180974,0xC4000001,0xC4000001,0x88140141,0x92000882,0x92000882,0x7800015A,0x62000882,0xC42208,0xC42208,0xC42208,0xA2000D86,0xA2000D86,0x8000092D,0x80001086,0x80001086,0x6E00071A,0x5E000C63,0x18C2208, -0x18C2208,0x5C001505,0x5000166E,0x4000220B,0xFE6814BA,0xF67C1C38,0x842208,0xFE440A76,0xFE1C025D,0xF8000000,0xDE04001E,0xAE000022,0xFE3C13E0,0xFE180932,0xB6000903,0x6E00071A,0x1182208,0xC40884,0xC40884,0xC40884,0xC40884,0xFC88000D,0xFC88000D,0xFC88000D,0xA2840001,0xA2840001,0x82840001,0x3240882,0x3240882,0x3240882,0xC0080001,0xC0080001, -0x82440001,0x17FC0882,0x17FC0882,0x7800015A,0x62000882,0x3240882,0x3240882,0x3240882,0xC0080001,0xC0080001,0x82440001,0x17FC0882,0x17FC0882,0x7800015A,0x62000882,0x17FC0882,0x17FC0882,0x7800015A,0x62000882,0x62000882,0xFEA0054A,0xF6BC0620,0xC40884,0xFE780349,0xFC3C00CA,0xF8000000,0xD21C0001,0xAE000022,0xF88805B2,0xFC5802F9,0x1A40882,0x7800015A, -0x1A40882,0x1500A69,0xFF2402D5,0xF1080000,0xCD080000,0x1F40A69,0xFEA0004A,0xCCC00000,0x7FF80A69,0xCA000014,0xA8000A69,0x1F40A69,0xFEA0004A,0xCCC00000,0x7FF80A69,0xCA000014,0xA8000A69,0x7FF80A69,0xCA000014,0xA8000A69,0xA8000A69,0x1F40A69,0xFEA0004A,0xCCC00000,0x7FF80A69,0xCA000014,0xA8000A69,0x7FF80A69,0xCA000014,0xA8000A69,0xA8000A69,0x7FF80A69, -0xCA000014,0xA8000A69,0xA8000A69,0xA8000A69,0xFB2C0884,0x1680A69,0xFF4C08C5,0xFEEC05A5,0xFE900304,0xFC00000D,0xCC340000,0xC20000DD,0xFF180845,0xFED80550,0xD64C0001,0xA8000A69,0x5DF80A69,0x400884,0x400884,0x400884,0x400884,0x400884,0x400884,0x400884,0x400884,0x400884,0x400884,0xC8000000,0xC8000000,0xC8000000,0xC8000000,0xC8000000, -0xC8000000,0x60000001,0x60000001,0x60000001,0x40000001,0x600882,0x600882,0x600882,0x600882,0x600882,0x600882,0x4C000335,0x4C000335,0x4C000335,0x3A00019A,0xC40882,0xC40882,0xC40882,0x2C00053D,0x20000882,0xFE2C052D,0x400884,0x400884,0xFE200200,0xFE0C0075,0xF6000000,0xF6000000,0xA2000000,0xFE100422,0xFE0001C4,0x7600003A,0x4C000335, -0x8C0882,}; -static const uint32_t g_etc1_to_bc7_m6_table205[] = { -0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x2000000,0x2000000,0x2000000,0x2000000,0x2000000,0x2000000,0x2000000,0x2000000,0x2000000,0x2000000,0x0, -0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x8000000,0x8000000,0x8000000,0x2000000,0x0,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000, -0xDC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x48000001,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0xDC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x48000001,0x1BC0000,0x1BC0000,0x1BC0000,0x48000001,0x48000001,0x69C0000,0x940000,0x940000,0xAC0000,0xB80000,0xC80000,0xC80000,0xF80000,0xAC0000,0xB80000,0x1380000,0x1BC0000, -0x1380000,0x1180000,0x1180000,0x1180000,0x1180000,0x1A00000,0x1A00000,0x1A00000,0x55F80000,0x55F80000,0x8A000001,0x1A00000,0x1A00000,0x1A00000,0x55F80000,0x55F80000,0x8A000001,0x55F80000,0x55F80000,0x8A000001,0x8A000001,0x1A00000,0x1A00000,0x1A00000,0x55F80000,0x55F80000,0x8A000001,0x55F80000,0x55F80000,0x8A000001,0x8A000001,0x55F80000, -0x55F80000,0x8A000001,0x8A000001,0x8A000001,0x3440000,0xD280000,0x1180000,0x17C0000,0x3D40000,0x27FC0000,0x3FF80000,0x63F80000,0x1600000,0x1A00000,0x27FC0000,0x8A000001,0x27FC0000,0x1A80001,0x81FC0000,0xC1FC0000,0xD4000000,0x81FC0000,0xC1FC0000,0xD4000000,0xC1FC0000,0xD4000000,0xD4000000,0x81FC0000,0xC1FC0000,0xD4000000,0xC1FC0000,0xD4000000, -0xD4000000,0xC1FC0000,0xD4000000,0xD4000000,0xD4000000,0x81FC0000,0xC1FC0000,0xD4000000,0xC1FC0000,0xD4000000,0xD4000000,0xC1FC0000,0xD4000000,0xD4000000,0xD4000000,0xC1FC0000,0xD4000000,0xD4000000,0xD4000000,0xD4000000,0x33FC0000,0xBC40000,0xBC40000,0x99FC0000,0xB9FC0000,0xCBF40000,0xD4000000,0xD4000000,0x63FC0000,0xA7FC0000,0xD3D80000,0xD4000000, -0xB1FC0000,0xD4604E,0xFEB03C09,0xFE9426F6,0xD49025C5,0xFE8C2D3E,0xFE641023,0xDE641046,0xF24C14D2,0xCE440B09,0xB04C14D2,0xFE68364F,0xFE2C0F61,0xE02C0D53,0xFC040B0A,0xD2040012,0xB20C0B07,0xD20025C6,0xB8000DD5,0xA60010C2,0x8E0025C7,0x33C604A,0xFE042E6B,0xD40425C6,0xE800235E,0xC6001139,0xA80017A6,0xC0003691,0xAE001B58,0x9E001B37,0x86002D9F,0x23FC604A, -0x96003EDE,0x8A0037DE,0x7E004416,0x6A00604A,0xFEA0430C,0xFECC55D2,0xFECC5866,0xFE702686,0xFE3C0E17,0xFE08006C,0xDA1400D9,0xCA0000F6,0xFE904155,0xFE50227D,0xD60009ED,0x9E001B37,0x1C8604A,0x11C25C5,0xFEF412E5,0xFED808A8,0xD4D40884,0xFED011E1,0xFEA0010E,0xD8A80104,0xE2940A69,0xC88801D5,0xB0940A69,0x3A425C5,0xFE580BA3,0xD46C0884,0xFC040B09,0xD2040011, -0xB0300A69,0x57FC25C5,0xB8000DD4,0xA60010C1,0x8E0025C6,0x3A425C5,0xFE580BA3,0xD46C0884,0xFC040B09,0xD2040011,0xB0300A69,0x57FC25C5,0xB8000DD4,0xA60010C1,0x8E0025C6,0x57FC25C5,0xB8000DD4,0xA60010C1,0x8E0025C6,0x8E0025C6,0xFEF81C2C,0xFF0C21C5,0xFF0C21E5,0xFEC01224,0xFE6807CD,0xFE18003B,0xD634000C,0xCA0000F5,0xFEDC1C41,0xFE9810B6,0xD600095D,0xA60010C1, -0x2BFC25C5,0x9025C5,0x9025C5,0x9025C5,0x9025C5,0xFE5C0D6A,0xFE5C0D6A,0xFE5C0D6A,0xBC4C0A6A,0xBC4C0A6A,0x8A4C0A6A,0xFE240A95,0xFE240A95,0xFE240A95,0xD204000E,0xD204000E,0x901801D6,0x9E000883,0x9E000883,0x84000113,0x6A000883,0xD425C5,0xD425C5,0xD425C5,0xAE000F45,0xAE000F45,0x86000AD6,0x8C00118B,0x8C00118B,0x7A000763,0x62000CC3,0x1B025C5, -0x1B025C5,0x66001782,0x56001853,0x460025C6,0xFE681813,0xFC881FBD,0x9025C5,0xFE4C0D02,0xFE2C03E3,0xFE08001B,0xEC08004D,0xBC00000B,0xFE5016E0,0xFE300BA2,0xC60008FD,0x7A000763,0x13025C5,0xD40884,0xD40884,0xD40884,0xD40884,0xFE98001D,0xFE98001D,0xFE98001D,0xAA940001,0xAA940001,0x8A940001,0x33C0882,0x33C0882,0x33C0882,0xC8180001,0xC8180001, -0x8A540001,0x23FC0882,0x23FC0882,0x84000112,0x6A000882,0x33C0882,0x33C0882,0x33C0882,0xC8180001,0xC8180001,0x8A540001,0x23FC0882,0x23FC0882,0x84000112,0x6A000882,0x23FC0882,0x23FC0882,0x84000112,0x6A000882,0x6A000882,0xFAB4057D,0xFECC0620,0xD40884,0xFE880374,0xFE5400F2,0xFE100001,0xDA2C0001,0xBC00000A,0xFE9405BA,0xFE680322,0x1C80882,0x84000112, -0x1C80882,0x1600A69,0xFF3C0355,0xF9180000,0xD5180000,0xFFC0A69,0xFEB80092,0xD4D00000,0x8BF80A69,0xD2000008,0xB0000A69,0xFFC0A69,0xFEB80092,0xD4D00000,0x8BF80A69,0xD2000008,0xB0000A69,0x8BF80A69,0xD2000008,0xB0000A69,0xB0000A69,0xFFC0A69,0xFEB80092,0xD4D00000,0x8BF80A69,0xD2000008,0xB0000A69,0x8BF80A69,0xD2000008,0xB0000A69,0xB0000A69,0x8BF80A69, -0xD2000008,0xB0000A69,0xB0000A69,0xB0000A69,0xFF3408B4,0x1780A69,0xF75C0908,0xFF040611,0xFEBC037A,0xFE180032,0xD4440000,0xCE0000B4,0xFF2C088A,0xFEF005C4,0xDE5C0001,0xB0000A69,0x6BFC0A69,0x4C0A69,0x4C0A69,0x4C0A69,0x4C0A69,0x4C0A69,0x4C0A69,0x4C0A69,0x4C0A69,0x4C0A69,0x4C0A69,0xDC040001,0xDC040001,0xDC040001,0xDC040001,0xDC040001, -0xDC040001,0x6C040001,0x6C040001,0x6C040001,0x48000002,0x6C0A69,0x6C0A69,0x6C0A69,0x6C0A69,0x6C0A69,0x6C0A69,0x580003D4,0x580003D4,0x580003D4,0x400001E1,0xDC0A69,0xDC0A69,0xDC0A69,0x32000652,0x24000A69,0xF63C06CD,0x4C0A69,0x4C0A69,0xFE200321,0xFE140115,0xFE080012,0xFE080012,0xB2040001,0xFE1005A5,0xFE0002C5,0x86000035,0x580003D4, -0x9C0A69,}; -static const uint32_t g_etc1_to_bc7_m6_table206[] = { -0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x2180000,0x2180000,0x2180000,0x2180000,0x2180000,0x2180000,0x2180000,0x2180000,0x2180000,0x2180000,0x300000, -0x300000,0x300000,0x300000,0x8000000,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x100001,0x140000,0x140000,0x140000,0x2180000,0x240000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xA40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000, -0xF40000,0x1F00000,0x1F00000,0x1F00000,0x50000001,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0xF40000,0x1F00000,0x1F00000,0x1F00000,0x50000001,0x1F00000,0x1F00000,0x1F00000,0x50000001,0x50000001,0xEAC0000,0xA40000,0xA40000,0x6BC0000,0x2CC0000,0x2DC0000,0x2DC0000,0x1140000,0x6BC0000,0x2CC0000,0x15C0000,0x1F00000, -0x15C0000,0x1280000,0x1280000,0x1280000,0x1280000,0x1B80000,0x1B80000,0x1B80000,0x61F80000,0x61F80000,0x92000001,0x1B80000,0x1B80000,0x1B80000,0x61F80000,0x61F80000,0x92000001,0x61F80000,0x61F80000,0x92000001,0x92000001,0x1B80000,0x1B80000,0x1B80000,0x61F80000,0x61F80000,0x92000001,0x61F80000,0x61F80000,0x92000001,0x92000001,0x61F80000, -0x61F80000,0x92000001,0x92000001,0x92000001,0x1580000,0x13C0000,0x1280000,0x3900000,0x1F00000,0x37FC0000,0x4BFC0000,0x6DFC0000,0x1740000,0x1B80000,0x37FC0000,0x92000001,0x37FC0000,0x1B80001,0x99FC0000,0xCDFC0000,0xDC000000,0x99FC0000,0xCDFC0000,0xDC000000,0xCDFC0000,0xDC000000,0xDC000000,0x99FC0000,0xCDFC0000,0xDC000000,0xCDFC0000,0xDC000000, -0xDC000000,0xCDFC0000,0xDC000000,0xDC000000,0xDC000000,0x99FC0000,0xCDFC0000,0xDC000000,0xCDFC0000,0xDC000000,0xDC000000,0xCDFC0000,0xDC000000,0xDC000000,0xDC000000,0xCDFC0000,0xDC000000,0xDC000000,0xDC000000,0xDC000000,0x5BFC0000,0x1D80000,0x1D80000,0xADFC0000,0xC7F80000,0xD5F40000,0xDC000000,0xDC000000,0x81FC0000,0xB7FC0000,0xDBE80000,0xDC000000, -0xBFFC0000,0xE4604E,0xFEC43D66,0xFEA027D6,0xDCA025C5,0xFE982F3A,0xFE7811D1,0xE6741046,0xFA5C14D2,0xD6540B09,0xB85C14D2,0xFE7437E7,0xFE40114C,0xE83C0D53,0xFE140B5E,0xDA140012,0xBA1C0B07,0xDA1025C6,0xC4000D55,0xAC001046,0x961025C7,0x154604A,0xFE082F3D,0xDC1425C6,0xF4002116,0xD2000F41,0xB4001656,0xD20034A5,0xBA001888,0xA800188D,0x92002BD7,0x2FFC604A, -0xA0003D3D,0x960035A6,0x8400424A,0x7200604A,0xFEBC4462,0xFECC56D2,0xF8E05915,0xFE8028C4,0xFE501077,0xFE240126,0xE22400D9,0xD40800DB,0xFE9842FE,0xFE5C2552,0xE4000921,0xA800188D,0x1E8604A,0x12C25C5,0xFF0C13F5,0xFEEC090D,0xDCE40884,0xFEE812E1,0xFEB801D6,0xE0B80104,0xEAA40A69,0xD09801D5,0xB8A40A69,0x3BC25C5,0xFE700C8B,0xDC7C0884,0xFE140B5D,0xDA140011, -0xB8400A69,0x63FC25C5,0xC4000D04,0xAC000FF5,0x960025C6,0x3BC25C5,0xFE700C8B,0xDC7C0884,0xFE140B5D,0xDA140011,0xB8400A69,0x63FC25C5,0xC4000D04,0xAC000FF5,0x960025C6,0x63FC25C5,0xC4000D04,0xAC000FF5,0x960025C6,0x960025C6,0xFF001CF1,0xFB242229,0xFD282208,0xFED41351,0xFE90091D,0xFE2C00B1,0xDE44000C,0xD4000099,0xFEF81D1D,0xFEC01208,0xE4000908,0xAC000FF5, -0x3BFC25C5,0xA025C5,0xA025C5,0xA025C5,0xA025C5,0xFE680E06,0xFE680E06,0xFE680E06,0xC45C0A6A,0xC45C0A6A,0x925C0A6A,0xFE300B25,0xFE300B25,0xFE300B25,0xDA14000E,0xDA14000E,0x982801D6,0xA6100883,0xA6100883,0x8A0C0103,0x72100883,0xEC25C5,0xEC25C5,0xEC25C5,0xC0000E11,0xC0000E11,0x92000A76,0x98001033,0x98001033,0x800005C3,0x6E000B7B,0x1E425C5, -0x1E425C5,0x6C001642,0x5C001743,0x4E0025C6,0xFE841892,0xFE8C201D,0xA025C5,0xFE5C0DCE,0xFE40049B,0xFE1C004D,0xF418004D,0xC410000B,0xFE6417AD,0xFE3C0C7D,0xD6000894,0x800005C3,0x15425C5,0xE40884,0xE40884,0xE40884,0xE40884,0xFEB0003D,0xFEB0003D,0xFEB0003D,0xB2A40001,0xB2A40001,0x92A40001,0x1540882,0x1540882,0x1540882,0xD0280001,0xD0280001, -0x92640001,0x2FFC0882,0x2FFC0882,0x8A0000CA,0x72000882,0x1540882,0x1540882,0x1540882,0xD0280001,0xD0280001,0x92640001,0x2FFC0882,0x2FFC0882,0x8A0000CA,0x72000882,0x2FFC0882,0x2FFC0882,0x8A0000CA,0x72000882,0x72000882,0xFEBC05A5,0xF6DC0659,0xE40884,0xFE9803A9,0xFC6C0139,0xFE2C0008,0xE23C0001,0xC8000000,0xFAAC05E9,0xFE740371,0x1E80882,0x8A0000CA, -0x1E80882,0x1700A69,0xFF5003D0,0xFF280001,0xDD280000,0x29FC0A69,0xFED80104,0xDCE00000,0x97F80A69,0xDC000000,0xB8000A69,0x29FC0A69,0xFED80104,0xDCE00000,0x97F80A69,0xDC000000,0xB8000A69,0x97F80A69,0xDC000000,0xB8000A69,0xB8000A69,0x29FC0A69,0xFED80104,0xDCE00000,0x97F80A69,0xDC000000,0xB8000A69,0x97F80A69,0xDC000000,0xB8000A69,0xB8000A69,0x97F80A69, -0xDC000000,0xB8000A69,0xB8000A69,0xB8000A69,0xFF5008C9,0x1880A69,0xFF6C0908,0xFF200671,0xFED003FA,0xFE480074,0xDC540000,0xD4000080,0xF74C08C5,0xFF080601,0xE66C0001,0xB8000A69,0x7BFC0A69,0x5C0A69,0x5C0A69,0x5C0A69,0x5C0A69,0x5C0A69,0x5C0A69,0x5C0A69,0x5C0A69,0x5C0A69,0x5C0A69,0xE4140001,0xE4140001,0xE4140001,0xE4140001,0xE4140001, -0xE4140001,0x74140001,0x74140001,0x74140001,0x50100002,0x840A69,0x840A69,0x840A69,0x840A69,0x840A69,0x840A69,0x680002EA,0x680002EA,0x680002EA,0x4C000119,0x10C0A69,0x10C0A69,0x10C0A69,0x3E0005AA,0x2C000A69,0xFE4C06CD,0x5C0A69,0x5C0A69,0xFE3C0349,0xFE28013D,0xFE180022,0xFE180022,0xBA140001,0xFE2005E4,0xFE1402E4,0x9A000001,0x680002EA, -0xBC0A69,}; -static const uint32_t g_etc1_to_bc7_m6_table207[] = { -0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x2300000,0x2300000,0x2300000,0x2300000,0x2300000,0x2300000,0x2300000,0x2300000,0x2300000,0x2300000,0x640000, -0x640000,0x640000,0x640000,0x10000000,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x200001,0x240000,0x240000,0x240000,0x2300000,0x480000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0xB40000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000, -0x10C0000,0xBF80000,0xBF80000,0xBF80000,0x58000001,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0xBF80000,0xBF80000,0xBF80000,0x58000001,0xBF80000,0xBF80000,0xBF80000,0x58000001,0x58000001,0xC00000,0xB40000,0xB40000,0xD00000,0x2E00000,0xF40000,0xF40000,0x32C0000,0xD00000,0x2E00000,0x17C0000,0xBF80000, -0x17C0000,0x1380000,0x1380000,0x1380000,0x1380000,0x1D00000,0x1D00000,0x1D00000,0x6DF80000,0x6DF80000,0x9A000001,0x1D00000,0x1D00000,0x1D00000,0x6DF80000,0x6DF80000,0x9A000001,0x6DF80000,0x6DF80000,0x9A000001,0x9A000001,0x1D00000,0x1D00000,0x1D00000,0x6DF80000,0x6DF80000,0x9A000001,0x6DF80000,0x6DF80000,0x9A000001,0x9A000001,0x6DF80000, -0x6DF80000,0x9A000001,0x9A000001,0x9A000001,0x7680000,0x14C0000,0x1380000,0x1A80000,0xDFC0000,0x45FC0000,0x59FC0000,0x79F80000,0x1880000,0x1D00000,0x45FC0000,0x9A000001,0x45FC0000,0x1C80001,0xB1FC0000,0xD9FC0000,0xE4000000,0xB1FC0000,0xD9FC0000,0xE4000000,0xD9FC0000,0xE4000000,0xE4000000,0xB1FC0000,0xD9FC0000,0xE4000000,0xD9FC0000,0xE4000000, -0xE4000000,0xD9FC0000,0xE4000000,0xE4000000,0xE4000000,0xB1FC0000,0xD9FC0000,0xE4000000,0xD9FC0000,0xE4000000,0xE4000000,0xD9FC0000,0xE4000000,0xE4000000,0xE4000000,0xD9FC0000,0xE4000000,0xE4000000,0xE4000000,0xE4000000,0x81FC0000,0x1E80000,0x1E80000,0xC1FC0000,0xD5F80000,0xDFF80000,0xE4000000,0xE4000000,0x9FFC0000,0xC9FC0000,0xE3F80000,0xE4000000, -0xCFFC0000,0xF4604E,0xFED03EDE,0xFEB828D6,0xE4B025C5,0xFEB03142,0xFE8813AF,0xEE841046,0xFE6C14EA,0xDE640B09,0xC06C14D2,0xFE8C397F,0xFE58135C,0xF04C0D53,0xFE2C0C2E,0xE2240012,0xC22C0B07,0xE22025C6,0xCE080D51,0xB4101046,0x9E2025C7,0x16C604A,0xFE2C30C9,0xE42425C6,0xFA001F7A,0xDE000DA9,0xBC00157A,0xE20032CB,0xC60015F8,0xAE00161D,0x98002A5B,0x3BFC604A, -0xAC003B6D,0x9C003362,0x8A0040B6,0x7A00604A,0xFED045E7,0xFEEC56D6,0xFEEC5931,0xFE982B12,0xFE6812F6,0xFE38023E,0xEA3400D9,0xDC1800DB,0xFEB4443B,0xFE802784,0xEE0008BF,0xAE00161D,0x7FC604A,0x13C25C5,0xFF1814ED,0xFEFC0994,0xE4F40884,0xFEF413E5,0xFECC02E0,0xE8C80104,0xF2B40A69,0xD8A801D5,0xC0B40A69,0x1D425C5,0xFE940D8B,0xE48C0884,0xFE380C01,0xE2240011, -0xC0500A69,0x6FFC25C5,0xD0000C54,0xB2000F59,0x9E0025C6,0x1D425C5,0xFE940D8B,0xE48C0884,0xFE380C01,0xE2240011,0xC0500A69,0x6FFC25C5,0xD0000C54,0xB2000F59,0x9E0025C6,0x6FFC25C5,0xD0000C54,0xB2000F59,0x9E0025C6,0x9E0025C6,0xFF141D5E,0xFF2C2251,0xFF2C2274,0xFEEC1476,0xFEA40A79,0xFE480164,0xE654000C,0xE0000062,0xFF081D9A,0xFEC812EA,0xEE0008BB,0xB2000F59, -0x49FC25C5,0xB025C5,0xB025C5,0xB025C5,0xB025C5,0xFE800E8E,0xFE800E8E,0xFE800E8E,0xCC6C0A6A,0xCC6C0A6A,0x9A6C0A6A,0xFE440BA3,0xFE440BA3,0xFE440BA3,0xE224000E,0xE224000E,0xA03801D6,0xAE200883,0xAE200883,0x921C0103,0x7A200883,0x10425C5,0x10425C5,0x10425C5,0xD2000D0D,0xD2000D0D,0x9A080A6A,0xA8000EC8,0xA8000EC8,0x8C000443,0x74000A83,0x7FC25C5, -0x7FC25C5,0x7800151A,0x66001632,0x560025C6,0xFE901926,0xFCA82036,0xB025C5,0xFE700EB6,0xFE540563,0xFE34009A,0xFC28004D,0xCC20000B,0xFE78181A,0xFE500D1D,0xE4040883,0x8C000443,0x17425C5,0xF40884,0xF40884,0xF40884,0xF40884,0xFEBC0061,0xFEBC0061,0xFEBC0061,0xBAB40001,0xBAB40001,0x9AB40001,0x16C0882,0x16C0882,0x16C0882,0xD8380001,0xD8380001, -0x9A740001,0x3BFC0882,0x3BFC0882,0x96000092,0x7A000882,0x16C0882,0x16C0882,0x16C0882,0xD8380001,0xD8380001,0x9A740001,0x3BFC0882,0x3BFC0882,0x96000092,0x7A000882,0x3BFC0882,0x3BFC0882,0x96000092,0x7A000882,0x7A000882,0xFED805B2,0xFEEC0659,0xF40884,0xFEB403F5,0xFE80016D,0xFE480020,0xEA4C0001,0xD0100000,0xFEB40611,0xFE98039D,0x7FC0882,0x96000092, -0x7FC0882,0x1800A69,0xFF5C0454,0xFF380024,0xE5380000,0x41FC0A69,0xFEF00184,0xE4F00000,0xA1FC0A69,0xE40C0000,0xC0000A69,0x41FC0A69,0xFEF00184,0xE4F00000,0xA1FC0A69,0xE40C0000,0xC0000A69,0xA1FC0A69,0xE40C0000,0xC0000A69,0xC0000A69,0x41FC0A69,0xFEF00184,0xE4F00000,0xA1FC0A69,0xE40C0000,0xC0000A69,0xA1FC0A69,0xE40C0000,0xC0000A69,0xC0000A69,0xA1FC0A69, -0xE40C0000,0xC0000A69,0xC0000A69,0xC0000A69,0xFD680908,0x19C0A69,0xF77C094D,0xFF4006CD,0xFEE80484,0xFE7000E8,0xE4640000,0xE0000061,0xFF5C08C5,0xFF200681,0xEE7C0001,0xC0000A69,0x89FC0A69,0x6C0A69,0x6C0A69,0x6C0A69,0x6C0A69,0x6C0A69,0x6C0A69,0x6C0A69,0x6C0A69,0x6C0A69,0x6C0A69,0xEC240001,0xEC240001,0xEC240001,0xEC240001,0xEC240001, -0xEC240001,0x7C240001,0x7C240001,0x7C240001,0x58200002,0x9C0A69,0x9C0A69,0x9C0A69,0x9C0A69,0x9C0A69,0x9C0A69,0x74000232,0x74000232,0x74000232,0x58000091,0x13C0A69,0x13C0A69,0x13C0A69,0x44000502,0x34000A69,0xF65C070A,0x6C0A69,0x6C0A69,0xFE4C0372,0xFE3C016D,0xFE2C003D,0xFE2C003D,0xC2240001,0xFE3C05E9,0xFA2C0322,0xA2100001,0x74000232, -0xE00A69,}; -static const uint32_t g_etc1_to_bc7_m6_table208[] = { -0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x4C0000,0x4C0000,0x4C0000,0x4C0000,0x4C0000,0x4C0000,0x4C0000,0x4C0000,0x4C0000,0x4C0000,0x980000, -0x980000,0x980000,0x980000,0x18000001,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x340000,0x380000,0x380000,0x380000,0x4C0000,0x6C0000,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0xC40001,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000, -0x3240000,0x17FC0000,0x17FC0000,0x17FC0000,0x62000000,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000,0x3240000,0x17FC0000,0x17FC0000,0x17FC0000,0x62000000,0x17FC0000,0x17FC0000,0x17FC0000,0x62000000,0x62000000,0xD40000,0xC40001,0xC40001,0x4E40000,0xF80000,0x10C0000,0x10C0000,0x14C0000,0x4E40000,0xF80000,0x1A40000,0x17FC0000, -0x1A40000,0x1480001,0x1480001,0x1480001,0x1480001,0x3E80000,0x3E80000,0x3E80000,0x79FC0000,0x79FC0000,0xA4000000,0x3E80000,0x3E80000,0x3E80000,0x79FC0000,0x79FC0000,0xA4000000,0x79FC0000,0x79FC0000,0xA4000000,0xA4000000,0x3E80000,0x3E80000,0x3E80000,0x79FC0000,0x79FC0000,0xA4000000,0x79FC0000,0x79FC0000,0xA4000000,0xA4000000,0x79FC0000, -0x79FC0000,0xA4000000,0xA4000000,0xA4000000,0x1800000,0x1600000,0x1480001,0x1C00000,0x23FC0000,0x57FC0000,0x69F80000,0x85F80000,0x39C0000,0x3E80000,0x57FC0000,0xA4000000,0x57FC0000,0x1DC0000,0xCDFC0000,0xE7F80000,0xEC000001,0xCDFC0000,0xE7F80000,0xEC000001,0xE7F80000,0xEC000001,0xEC000001,0xCDFC0000,0xE7F80000,0xEC000001,0xE7F80000,0xEC000001, -0xEC000001,0xE7F80000,0xEC000001,0xEC000001,0xEC000001,0xCDFC0000,0xE7F80000,0xEC000001,0xE7F80000,0xEC000001,0xEC000001,0xE7F80000,0xEC000001,0xEC000001,0xEC000001,0xE7F80000,0xEC000001,0xEC000001,0xEC000001,0xEC000001,0xADFC0000,0x1FC0000,0x1FC0000,0xD7FC0000,0xE3FC0000,0xEBF00000,0xEC000001,0xEC000001,0xC1FC0000,0xDBFC0000,0xEDEC0000,0xEC000001, -0xDFFC0000,0x108604A,0xFEE840B6,0xFECC2A5B,0xECC025C7,0xFEC43362,0xFEA0161D,0xF6941046,0xFE84157A,0xE8780B07,0xC87C14D2,0xFEA43B6D,0xFE7015F8,0xFA600D51,0xFE400DA9,0xEC380012,0xCC400B09,0xEC3425C6,0xD81C0D53,0xBC201046,0xA63425C5,0x188604A,0xFE3832CB,0xEC3825C6,0xFE081F7A,0xE8000C2E,0xC60014EA,0xE80030C9,0xD200135C,0xBA0013AF,0xA20028D6,0x49F8604A, -0xB800397F,0xA6003142,0x96003EDE,0x8200604E,0xFEE44782,0xF90057DA,0xFB0459DA,0xFEAC2DF6,0xFE7C160E,0xFE4803FF,0xF24400DB,0xE42800D9,0xFED0463D,0xFE902A56,0xF81008BB,0xBA0013AF,0x19FC604A,0x15025C6,0xFF301632,0xFF140A83,0xED080883,0xFF0C151A,0xFEE40443,0xF0D80103,0xFAC40A6A,0xE2BC01D6,0xC8C40A6A,0x1F025C5,0xFEAC0EC8,0xECA00883,0xFE580D0D,0xEC38000E, -0xC8600A6A,0x7DF825C5,0xDC000BA3,0xBE000E8E,0xA60025C5,0x1F025C5,0xFEAC0EC8,0xECA00883,0xFE580D0D,0xEC38000E,0xC8600A6A,0x7DF825C5,0xDC000BA3,0xBE000E8E,0xA60025C5,0x7DF825C5,0xDC000BA3,0xBE000E8E,0xA60025C5,0xA60025C5,0xFF341E13,0xFD4822AA,0xFF4C228E,0xFF0015CE,0xFEBC0C34,0xFE70027E,0xEE64000B,0xE800004D,0xFF181E3E,0xFEDC1481,0xFC00088B,0xBE000E8E, -0x5BFC25C5,0xC025C6,0xC025C6,0xC025C6,0xC025C6,0xFE980F59,0xFE980F59,0xFE980F59,0xD67C0A69,0xD67C0A69,0xA47C0A69,0xFE5C0C54,0xFE5C0C54,0xFE5C0C54,0xEA340011,0xEA340011,0xAA4C01D5,0xB6340884,0xB6340884,0x9A2C0104,0x82340884,0x12025C5,0x12025C5,0x12025C5,0xE2000C01,0xE2000C01,0xA4180A69,0xB4000D8B,0xB4000D8B,0x980002E0,0x80000994,0x15F825C5, -0x15F825C5,0x840013E5,0x720014ED,0x600025C5,0xFEA019B4,0xF6BC20B2,0xC025C6,0xFE880F9E,0xFE6C0669,0xFE480126,0xFE3C0062,0xD430000C,0xFE9018F2,0xFE680E59,0xEC180882,0x980002E0,0x19C25C5,0x1080882,0x1080882,0x1080882,0x1080882,0xFED00092,0xFED00092,0xFED00092,0xC4C40001,0xC4C40001,0xA4C40001,0x1880882,0x1880882,0x1880882,0xE04C0001,0xE04C0001, -0xA4840001,0x49F80882,0x49F80882,0xA0000061,0x82000884,0x1880882,0x1880882,0x1880882,0xE04C0001,0xE04C0001,0xA4840001,0x49F80882,0x49F80882,0xA0000061,0x82000884,0x49F80882,0x49F80882,0xA0000061,0x82000884,0x82000884,0xFAEC05E9,0xF9000692,0x1080882,0xFEC4042A,0xFE9801A9,0xFE64003D,0xF65C0000,0xD8240001,0xFED00628,0xFEA403E8,0x19FC0882,0xA0000061, -0x19FC0882,0x1940A69,0xFF740502,0xFF4C0091,0xED480002,0x5BFC0A69,0xFF140232,0xED040001,0xAFFC0A69,0xEC240001,0xC8000A69,0x5BFC0A69,0xFF140232,0xED040001,0xAFFC0A69,0xEC240001,0xC8000A69,0xAFFC0A69,0xEC240001,0xC8000A69,0xC8000A69,0x5BFC0A69,0xFF140232,0xED040001,0xAFFC0A69,0xEC240001,0xC8000A69,0xAFFC0A69,0xEC240001,0xC8000A69,0xC8000A69,0xAFFC0A69, -0xEC240001,0xC8000A69,0xC8000A69,0xC8000A69,0xFF780910,0x1AC0A69,0xFF8C0951,0xFF580749,0xFF10052A,0xFE98018D,0xEC780001,0xE800003D,0xFF70090A,0xFF4006E5,0xF88C0000,0xC8000A69,0x9BFC0A69,0x7C0A69,0x7C0A69,0x7C0A69,0x7C0A69,0x7C0A69,0x7C0A69,0x7C0A69,0x7C0A69,0x7C0A69,0x7C0A69,0xF6340000,0xF6340000,0xF6340000,0xF6340000,0xF6340000, -0xF6340000,0x86340000,0x86340000,0x86340000,0x62340000,0xB80A69,0xB80A69,0xB80A69,0xB80A69,0xB80A69,0xB80A69,0x86000184,0x86000184,0x86000184,0x62000024,0x1740A69,0x1740A69,0x1740A69,0x50000454,0x3E000A69,0xFE6C0710,0x7C0A69,0x7C0A69,0xFE5803B5,0xFE5001A5,0xFE3C0061,0xFE3C0061,0xCC340000,0xFA500622,0xFE3C0355,0xAC200000,0x86000184, -0x1080A69,}; -static const uint32_t g_etc1_to_bc7_m6_table209[] = { -0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0xCC0000, -0xCC0000,0xCC0000,0xCC0000,0x20000001,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x480000,0x480000,0x480000,0x640000,0x900000,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0xD40001,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x33C0000, -0x33C0000,0x23FC0000,0x23FC0000,0x23FC0000,0x6A000000,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x33C0000,0x23FC0000,0x23FC0000,0x23FC0000,0x6A000000,0x23FC0000,0x23FC0000,0x23FC0000,0x6A000000,0x6A000000,0xE40000,0xD40001,0xD40001,0xF80000,0x10C0000,0x1240000,0x1240000,0x1680000,0xF80000,0x10C0000,0x1C80000,0x23FC0000, -0x1C80000,0x1580001,0x1580001,0x1580001,0x1580001,0x7FC0000,0x7FC0000,0x7FC0000,0x85FC0000,0x85FC0000,0xAC000000,0x7FC0000,0x7FC0000,0x7FC0000,0x85FC0000,0x85FC0000,0xAC000000,0x85FC0000,0x85FC0000,0xAC000000,0xAC000000,0x7FC0000,0x7FC0000,0x7FC0000,0x85FC0000,0x85FC0000,0xAC000000,0x85FC0000,0x85FC0000,0xAC000000,0xAC000000,0x85FC0000, -0x85FC0000,0xAC000000,0xAC000000,0xAC000000,0x5900000,0x1700000,0x1580001,0x3D40000,0x37FC0000,0x65FC0000,0x75FC0000,0x91F40000,0x3B00000,0x7FC0000,0x65FC0000,0xAC000000,0x65FC0000,0x1EC0000,0xE5FC0000,0xF3F80000,0xF4000001,0xE5FC0000,0xF3F80000,0xF4000001,0xF3F80000,0xF4000001,0xF4000001,0xE5FC0000,0xF3F80000,0xF4000001,0xF3F80000,0xF4000001, -0xF4000001,0xF3F80000,0xF4000001,0xF4000001,0xF4000001,0xE5FC0000,0xF3F80000,0xF4000001,0xF3F80000,0xF4000001,0xF4000001,0xF3F80000,0xF4000001,0xF4000001,0xF4000001,0xF3F80000,0xF4000001,0xF4000001,0xF4000001,0xF4000001,0xD5FC0000,0x77FC0000,0x77FC0000,0xEBFC0000,0xF1FC0000,0xF5F00000,0xF4000001,0xF4000001,0xDFFC0000,0xEDFC0000,0xF5FC0000,0xF4000001, -0xEFFC0000,0x118604A,0xFEF4424A,0xFED82BD7,0xF4D025C7,0xFED035A6,0xFEAC188D,0xFEA41046,0xFE941656,0xF0880B07,0xD08C14D2,0xFEBC3D3D,0xFE881888,0xFE740D55,0xFE580F41,0xF4480012,0xD4500B09,0xF44425C6,0xE02C0D53,0xC4301046,0xAE4425C5,0x1A0604A,0xFE5834A5,0xF44825C6,0xFE142116,0xF4000B5E,0xD00414D2,0xFA002F3D,0xDE00114C,0xC20011D1,0xAE0027D6,0x55F8604A, -0xC40037E7,0xB2002F3A,0x9C003D66,0x8A00604E,0xFEF44912,0xFF0C57EA,0xFF0C5A1A,0xFEC4307A,0xFE901902,0xFE5C05EA,0xFA5400DB,0xEC3800D9,0xFEDC478F,0xFEB02D65,0xFE2008C1,0xC20011D1,0x27FC604A,0x16025C6,0xFF441743,0xFF200B7B,0xF5180883,0xFF241642,0xFEF005C3,0xF8E80103,0xFED80A76,0xEACC01D6,0xD0D40A6A,0xDFC25C5,0xFECC1033,0xF4B00883,0xFE7C0E11,0xF448000E, -0xD0700A6A,0x89F825C5,0xE6000B25,0xCA000E06,0xAE0025C5,0xDFC25C5,0xFECC1033,0xF4B00883,0xFE7C0E11,0xF448000E,0xD0700A6A,0x89F825C5,0xE6000B25,0xCA000E06,0xAE0025C5,0x89F825C5,0xE6000B25,0xCA000E06,0xAE0025C5,0xAE0025C5,0xFF3C1EC2,0xFF4C22FE,0xF75C2313,0xFF1416C1,0xFEDC0DD4,0xFE8403CB,0xF674000B,0xF010004D,0xFF341F0E,0xFF0415B5,0xFE1408A6,0xCA000E06, -0x69FC25C5,0xD025C6,0xD025C6,0xD025C6,0xD025C6,0xFEA40FF5,0xFEA40FF5,0xFEA40FF5,0xDE8C0A69,0xDE8C0A69,0xAC8C0A69,0xFE740D04,0xFE740D04,0xFE740D04,0xF2440011,0xF2440011,0xB25C01D5,0xBE440884,0xBE440884,0xA23C0104,0x8A440884,0x13825C5,0x13825C5,0x13825C5,0xF4000B5D,0xF4000B5D,0xAC280A69,0xC6000C8B,0xC6000C8B,0xA20001D6,0x8800090D,0x21F825C5, -0x21F825C5,0x8A0012E1,0x780013F5,0x680025C5,0xFEBC1A59,0xFECC20B2,0xD025C6,0xFE981069,0xFE800755,0xFE5C01A9,0xFE540099,0xDC40000C,0xFEA01966,0xFE740F25,0xF4280882,0xA20001D6,0x1BC25C5,0x1180882,0x1180882,0x1180882,0x1180882,0xFEE800CA,0xFEE800CA,0xFEE800CA,0xCCD40001,0xCCD40001,0xACD40001,0x1A00882,0x1A00882,0x1A00882,0xE85C0001,0xE85C0001, -0xAC940001,0x55F80882,0x55F80882,0xA600003D,0x8A000884,0x1A00882,0x1A00882,0x1A00882,0xE85C0001,0xE85C0001,0xAC940001,0x55F80882,0x55F80882,0xA600003D,0x8A000884,0x55F80882,0x55F80882,0xA600003D,0x8A000884,0x8A000884,0xFEF40611,0xFF0C069A,0x1180882,0xFEDC0451,0xFEAC01ED,0xFE7C0064,0xFE6C0000,0xE0340001,0xF8EC0659,0xFEC40422,0x27FC0882,0xA600003D, -0x27FC0882,0x1A40A69,0xFF8005AA,0xFF640119,0xF5580002,0x75FC0A69,0xFF2C02EA,0xF5140001,0xBBFC0A69,0xF4340001,0xD0000A69,0x75FC0A69,0xFF2C02EA,0xF5140001,0xBBFC0A69,0xF4340001,0xD0000A69,0xBBFC0A69,0xF4340001,0xD0000A69,0xD0000A69,0x75FC0A69,0xFF2C02EA,0xF5140001,0xBBFC0A69,0xF4340001,0xD0000A69,0xBBFC0A69,0xF4340001,0xD0000A69,0xD0000A69,0xBBFC0A69, -0xF4340001,0xD0000A69,0xD0000A69,0xD0000A69,0xFD90094D,0x1C00A69,0xF9A00992,0xFF6C0794,0xFF3C05C4,0xFEC80235,0xF4880001,0xF2000022,0xF98C094D,0xFF58076D,0xFEA00004,0xD0000A69,0xA9FC0A69,0x8C0A69,0x8C0A69,0x8C0A69,0x8C0A69,0x8C0A69,0x8C0A69,0x8C0A69,0x8C0A69,0x8C0A69,0x8C0A69,0xFE440000,0xFE440000,0xFE440000,0xFE440000,0xFE440000, -0xFE440000,0x8E440000,0x8E440000,0x8E440000,0x6A440000,0xD00A69,0xD00A69,0xD00A69,0xD00A69,0xD00A69,0xD00A69,0x92000104,0x92000104,0x92000104,0x6A000001,0x1A40A69,0x1A40A69,0x1A40A69,0x560003D0,0x46000A69,0xF8800745,0x8C0A69,0x8C0A69,0xFE6803E8,0xFE6401E1,0xFE540080,0xFE540080,0xD4440000,0xFE580652,0xFE50037A,0xB4300000,0x92000104, -0x1280A69,}; -static const uint32_t g_etc1_to_bc7_m6_table210[] = { -0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0x7C0000,0xFC0000, -0xFC0000,0xFC0000,0xFC0000,0x28000001,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x540000,0x4580000,0x4580000,0x4580000,0x7C0000,0xB00000,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0xE40001,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000, -0x1540000,0x2FFC0000,0x2FFC0000,0x2FFC0000,0x72000000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x1540000,0x2FFC0000,0x2FFC0000,0x2FFC0000,0x72000000,0x2FFC0000,0x2FFC0000,0x2FFC0000,0x72000000,0x72000000,0x2F40000,0xE40001,0xE40001,0x10C0000,0x1200000,0x1380000,0x1380000,0x3800000,0x10C0000,0x1200000,0x1E80000,0x2FFC0000, -0x1E80000,0x1680001,0x1680001,0x1680001,0x1680001,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x91FC0000,0x91FC0000,0xB4000000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x91FC0000,0x91FC0000,0xB4000000,0x91FC0000,0x91FC0000,0xB4000000,0xB4000000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x91FC0000,0x91FC0000,0xB4000000,0x91FC0000,0x91FC0000,0xB4000000,0xB4000000,0x91FC0000, -0x91FC0000,0xB4000000,0xB4000000,0xB4000000,0x1A40000,0x9800000,0x1680001,0x1EC0000,0x4BFC0000,0x75FC0000,0x83FC0000,0x9BF80000,0x3C40000,0x1FFC0000,0x75FC0000,0xB4000000,0x75FC0000,0x1FC0000,0xFDFC0000,0xFFF80000,0xFC000001,0xFDFC0000,0xFFF80000,0xFC000001,0xFFF80000,0xFC000001,0xFC000001,0xFDFC0000,0xFFF80000,0xFC000001,0xFFF80000,0xFC000001, -0xFC000001,0xFFF80000,0xFC000001,0xFC000001,0xFC000001,0xFDFC0000,0xFFF80000,0xFC000001,0xFFF80000,0xFC000001,0xFC000001,0xFFF80000,0xFC000001,0xFC000001,0xFC000001,0xFFF80000,0xFC000001,0xFC000001,0xFC000001,0xFC000001,0xFDFC0000,0xF7FC0000,0xF7FC0000,0xFDFC0000,0xFFF80000,0xFFF00000,0xFC000001,0xFC000001,0xFDFC0000,0xFDFC0000,0xFFD00000,0xFC000001, -0xFFF80000,0x128604A,0xFF004416,0xFEF02D9F,0xFCE025C7,0xFEE837DE,0xFEC01B37,0xFEB010C2,0xFEAC17A6,0xF8980B07,0xD89C14D2,0xFED03EDE,0xFEA01B58,0xFE8C0DD5,0xFE701139,0xFC580012,0xDC600B09,0xFC5425C6,0xE83C0D53,0xCC401046,0xB65425C5,0x1B8604A,0xFE7C3691,0xFC5825C6,0xFE2C235E,0xFC040B0A,0xD81414D2,0xFC002E6B,0xE8000F61,0xCC001023,0xB40026F6,0x61F8604A, -0xCA00364F,0xB8002D3E,0xA6003C09,0x9200604E,0xFF004A76,0xF92058E2,0xFB245AA3,0xFED432EA,0xFEA81C14,0xFE7C0838,0xFE6800F6,0xF44800D9,0xFEF0492C,0xFEC02FD3,0xFE38092D,0xCC001023,0x37FC604A,0x17025C6,0xFF501853,0xFF380CC3,0xFD280883,0xFF301782,0xFF080763,0xFEF40113,0xFEF00AD6,0xF2DC01D6,0xD8E40A6A,0x25FC25C5,0xFEE4118B,0xFCC00883,0xFEA00F45,0xFC58000E, -0xD8800A6A,0x95F825C5,0xEC000A95,0xD0000D6A,0xB60025C5,0x25FC25C5,0xFEE4118B,0xFCC00883,0xFEA00F45,0xFC58000E,0xD8800A6A,0x95F825C5,0xEC000A95,0xD0000D6A,0xB60025C5,0x95F825C5,0xEC000A95,0xD0000D6A,0xB60025C5,0xB60025C5,0xFF501F35,0xFD68232A,0xFF6C2313,0xFF30183D,0xFEE80F6B,0xFEAC055A,0xFE84000B,0xF820004D,0xFF441F99,0xFF101711,0xFE34092B,0xD0000D6A, -0x79FC25C5,0xE025C6,0xE025C6,0xE025C6,0xE025C6,0xFEB010C1,0xFEB010C1,0xFEB010C1,0xE69C0A69,0xE69C0A69,0xB49C0A69,0xFE8C0DD4,0xFE8C0DD4,0xFE8C0DD4,0xFA540011,0xFA540011,0xBA6C01D5,0xC6540884,0xC6540884,0xAA4C0104,0x92540884,0x15025C5,0x15025C5,0x15025C5,0xFC040B09,0xFC040B09,0xB4380A69,0xD2000BA3,0xD2000BA3,0xAE00010E,0x920008A8,0x2DF825C5, -0x2DF825C5,0x960011E1,0x840012E5,0x700025C5,0xFECC1ACA,0xF6DC212D,0xE025C6,0xFEAC116D,0xFE88084E,0xFE700251,0xFE6800F5,0xE450000C,0xFEB419D9,0xFE901016,0xFC380882,0xAE00010E,0x1E025C5,0x1280882,0x1280882,0x1280882,0x1280882,0xFEF40112,0xFEF40112,0xFEF40112,0xD4E40001,0xD4E40001,0xB4E40001,0x1B80882,0x1B80882,0x1B80882,0xF06C0001,0xF06C0001, -0xB4A40001,0x61F80882,0x61F80882,0xB200001D,0x92000884,0x1B80882,0x1B80882,0x1B80882,0xF06C0001,0xF06C0001,0xB4A40001,0x61F80882,0x61F80882,0xB200001D,0x92000884,0x61F80882,0x61F80882,0xB200001D,0x92000884,0x92000884,0xFF100620,0xF92006CD,0x1280882,0xFCF404B1,0xFEC00239,0xFE9400AA,0xFE84000A,0xE8440001,0xFEF8065D,0xFEDC0451,0x37FC0882,0xB200001D, -0x37FC0882,0x1B40A69,0xFF980652,0xFF7C01E1,0xFD680002,0x8DFC0A69,0xFF4C03D4,0xFD240001,0xC7FC0A69,0xFC440001,0xD8000A69,0x8DFC0A69,0xFF4C03D4,0xFD240001,0xC7FC0A69,0xFC440001,0xD8000A69,0xC7FC0A69,0xFC440001,0xD8000A69,0xD8000A69,0x8DFC0A69,0xFF4C03D4,0xFD240001,0xC7FC0A69,0xFC440001,0xD8000A69,0xC7FC0A69,0xFC440001,0xD8000A69,0xD8000A69,0xC7FC0A69, -0xFC440001,0xD8000A69,0xD8000A69,0xD8000A69,0xFDA40992,0x1D00A69,0xFFAC099A,0xFF840812,0xFF500668,0xFEF0031D,0xFC980001,0xFA000012,0xFF980951,0xFF7007FD,0xFED00049,0xD8000A69,0xB9FC0A69,0x9C0A69,0x9C0A69,0x9C0A69,0x9C0A69,0x9C0A69,0x9C0A69,0x9C0A69,0x9C0A69,0x9C0A69,0x9C0A69,0xFC580008,0xFC580008,0xFC580008,0xFC580008,0xFC580008, -0xFC580008,0x96540000,0x96540000,0x96540000,0x72540000,0xE80A69,0xE80A69,0xE80A69,0xE80A69,0xE80A69,0xE80A69,0xA2000092,0xA2000092,0xA2000092,0x720C0000,0x1D80A69,0x1D80A69,0x1D80A69,0x60000355,0x4E000A69,0xFE8C0751,0x9C0A69,0x9C0A69,0xFC800424,0xFE6C0220,0xFE6000B4,0xFE6000B4,0xDC540000,0xFE740659,0xFE6403CA,0xBC400000,0xA2000092, -0x14C0A69,}; -static const uint32_t g_etc1_to_bc7_m6_table211[] = { -0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x940000,0x12C0000, -0x12C0000,0x12C0000,0x12C0000,0x30000001,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0x640000,0xC680000,0xC680000,0xC680000,0x940000,0xD40000,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0xF40001,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000, -0x16C0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x16C0000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x3BFC0000,0x3BFC0000,0x3BFC0000,0x7A000000,0x7A000000,0xB040000,0xF40001,0xF40001,0x51C0000,0x1340000,0x34C0000,0x34C0000,0x19C0000,0x51C0000,0x1340000,0x7FC0000,0x3BFC0000, -0x7FC0000,0x1780001,0x1780001,0x1780001,0x1780001,0x37FC0000,0x37FC0000,0x37FC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x37FC0000,0x37FC0000,0x37FC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x9DFC0000,0x9DFC0000,0xBC000000,0xBC000000,0x37FC0000,0x37FC0000,0x37FC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x9DFC0000,0x9DFC0000,0xBC000000,0xBC000000,0x9DFC0000, -0x9DFC0000,0xBC000000,0xBC000000,0xBC000000,0x1B80000,0x1940000,0x1780001,0x9FC0000,0x5DFC0000,0x83FC0000,0x91FC0000,0xA5FC0000,0x3D80000,0x37FC0000,0x83FC0000,0xBC000000,0x83FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1345AA3,0xFF18414F,0xFEFC2CFE,0xFEF025C6,0xFEF43543,0xFED41B46,0xFEC41162,0xFEB81647,0xFAA80AB6,0xDCAC12ED,0xFEDC3B5B,0xFEAC1B25,0xFE980E98,0xFE88100A,0xFE680015,0xDE700938,0xFE64220B,0xEA4C0C2A,0xD24C0DC5,0xBC642208,0x1C85AA3,0xFE88351E,0xFE7025C5,0xFE3821ED,0xFE140B45,0xDC2812ED,0xFE082A18,0xEE000CC4,0xD4000C44,0xBA00228D,0x69FC5AA3, -0xD6003266,0xBE0027E1,0xAC00360C,0x9A005AA5,0xFF0C469D,0xFF2C536D,0xFF2C555E,0xFEEC313E,0xFEBC1BA5,0xFE9008D1,0xFE7C0184,0xF85C0071,0xFEF84551,0xFED02DDA,0xFE50093C,0xD4000C44,0x41FC5AA3,0x178220B,0xFF5C166E,0xFF400C63,0xFF380882,0xFF441505,0xFF20071A,0xFF0C015A,0xFEFC092D,0xF4EC0141,0xDCF40885,0x37FC2208,0xFEFC1086,0xFED80882,0xFEB80D86,0xFE740001, -0xDE940884,0x9DFC2208,0xF2000974,0xD6000A9D,0xBC002208,0x37FC2208,0xFEFC1086,0xFED80882,0xFEB80D86,0xFE740001,0xDE940884,0x9DFC2208,0xF2000974,0xD6000A9D,0xBC002208,0x9DFC2208,0xF2000974,0xD6000A9D,0xBC002208,0xBC002208,0xFF581C81,0xFF6C1FAD,0xFF6C1FDA,0xFF401614,0xFEFC0E52,0xFEC00532,0xFEA00022,0xFC40001E,0xFD581C9A,0xFF24153E,0xFE58092B,0xD6000A9D, -0x83FC2208,0xF025C6,0xF025C6,0xF025C6,0xF025C6,0xFEC41162,0xFEC41162,0xFEC41162,0xEEAC0A69,0xEEAC0A69,0xBCAC0A69,0xFE980E98,0xFE980E98,0xFE980E98,0xFE680015,0xFE680015,0xC27C01D5,0xCE640884,0xCE640884,0xB25C0104,0x9A640884,0x16825C5,0x16825C5,0x16825C5,0xFE140B45,0xFE140B45,0xBC480A69,0xE2000AD4,0xE2000AD4,0xBA000086,0x9A000885,0x39F825C5, -0x39F825C5,0x9C001115,0x8A0011FD,0x780025C5,0xFED81B58,0xFEEC212D,0xF025C6,0xFEBC1244,0xFE9C0952,0xFE7C0334,0xFE7C0184,0xEC60000C,0xFEBC1A8C,0xFEA41121,0xFE500893,0xBA000086,0x3FC25C5,0x1380882,0x1380882,0x1380882,0x1380882,0xFF0C015A,0xFF0C015A,0xFF0C015A,0xDCF40001,0xDCF40001,0xBCF40001,0x1D00882,0x1D00882,0x1D00882,0xF87C0001,0xF87C0001, -0xBCB40001,0x6DF80882,0x6DF80882,0xBA00000D,0x9A000884,0x1D00882,0x1D00882,0x1D00882,0xF87C0001,0xF87C0001,0xBCB40001,0x6DF80882,0x6DF80882,0xBA00000D,0x9A000884,0x6DF80882,0x6DF80882,0xBA00000D,0x9A000884,0x9A000884,0xFF200659,0xFF2C06D9,0x1380882,0xFD0404E4,0xFED4028D,0xFEB000E1,0xFEA00022,0xF0540001,0xFD100692,0xFEE80492,0x45FC0882,0xBA00000D, -0x45FC0882,0x1BC0882,0xFFA4053D,0xFF88019A,0xFF780001,0x9BFC0882,0xFF640335,0xFF380001,0xCFF80882,0xFE6C0000,0xDC000884,0x9BFC0882,0xFF640335,0xFF380001,0xCFF80882,0xFE6C0000,0xDC000884,0xCFF80882,0xFE6C0000,0xDC000884,0xDC000884,0x9BFC0882,0xFF640335,0xFF380001,0xCFF80882,0xFE6C0000,0xDC000884,0xCFF80882,0xFE6C0000,0xDC000884,0xDC000884,0xCFF80882, -0xFE6C0000,0xDC000884,0xDC000884,0xDC000884,0xFDB007C1,0x5D80882,0xF5B80802,0xFF94069A,0xFF68053D,0xFF1802A1,0xFEB80000,0xFE0C0000,0xFDA807C1,0xFF880665,0xFEF00049,0xDC000884,0xC1FC0882,0xAC0A69,0xAC0A69,0xAC0A69,0xAC0A69,0xAC0A69,0xAC0A69,0xAC0A69,0xAC0A69,0xAC0A69,0xAC0A69,0xFE680014,0xFE680014,0xFE680014,0xFE680014,0xFE680014, -0xFE680014,0x9E640000,0x9E640000,0x9E640000,0x7A640000,0x1000A69,0x1000A69,0x1000A69,0x1000A69,0x1000A69,0x1000A69,0xAE00004A,0xAE00004A,0xAE00004A,0x7A1C0000,0x5F80A69,0x5F80A69,0x5F80A69,0x6C0002D5,0x56000A69,0xF8A00784,0xAC0A69,0xAC0A69,0xFE900455,0xFE800254,0xFE7800DD,0xFE7800DD,0xE4640000,0xFA880692,0xFE7803F5,0xC4500000,0xAE00004A, -0x16C0A69,}; -static const uint32_t g_etc1_to_bc7_m6_table212[] = { -0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0xB00000,0x1640000, -0x1640000,0x1640000,0x1640000,0x3A000000,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x740001,0x67C0000,0x67C0000,0x67C0000,0xB00000,0xF80000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1080000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000, -0x1880000,0x49F80000,0x49F80000,0x49F80000,0x82000001,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x1880000,0x49F80000,0x49F80000,0x49F80000,0x82000001,0x49F80000,0x49F80000,0x49F80000,0x82000001,0x82000001,0x5180000,0x1080000,0x1080000,0x1340000,0x14C0000,0x1680000,0x1680000,0x1BC0000,0x1340000,0x14C0000,0x19FC0000,0x49F80000, -0x19FC0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x53FC0000,0x53FC0000,0x53FC0000,0xABF80000,0xABF80000,0xC4000001,0x53FC0000,0x53FC0000,0x53FC0000,0xABF80000,0xABF80000,0xC4000001,0xABF80000,0xABF80000,0xC4000001,0xC4000001,0x53FC0000,0x53FC0000,0x53FC0000,0xABF80000,0xABF80000,0xC4000001,0xABF80000,0xABF80000,0xC4000001,0xC4000001,0xABF80000, -0xABF80000,0xC4000001,0xC4000001,0xC4000001,0x1CC0000,0xBA40000,0x18C0000,0x2BFC0000,0x73FC0000,0x95FC0000,0x9FFC0000,0xB3F40000,0x1F00000,0x53FC0000,0x95FC0000,0xC4000001,0x95FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x14053CE,0xFF243DB6,0xFF082C15,0xFF0425C5,0xFF0C31DE,0xFEE41AFD,0xFEDC124D,0xFECC14B2,0xFCC00A76,0xE2C010C2,0xFEF436F6,0xFEC01AAD,0xFEB00F93,0xFE940E7F,0xFE800056,0xE488072F,0xFE7C1DBE,0xEC680AE9,0xD8640AE2,0xC2741D9B,0x3DC53CA,0xFEA03317,0xFE8825C6,0xFE641FF2,0xFE2C0BDE,0xE23C10C2,0xFE14260B,0xF4000A9F,0xDA000853,0xC2001DB3,0x73FC53CA, -0xDC002E39,0xCA0021E2,0xB2002F23,0xA00053CA,0xFF204186,0xFF2C4DB2,0xF9404F7E,0xFF002E66,0xFED01ABC,0xFEA4092A,0xFE900243,0xFA700023,0xFF10407E,0xFEDC2BAA,0xFE700975,0xDA000853,0x4FFC53CA,0x1841D9D,0xFF681408,0xFF580BBD,0xFF480884,0xFF501235,0xFF2C068C,0xFF2401C4,0xFF140731,0xF90000AD,0xE3080659,0x49FC1D9A,0xFF140F18,0xFEF00884,0xFECC0B85,0xFE940005, -0xE2AC0659,0xA7F81D9A,0xF80008AE,0xDC000795,0xC2001D9A,0x49FC1D9A,0xFF140F18,0xFEF00884,0xFECC0B85,0xFE940005,0xE2AC0659,0xA7F81D9A,0xF80008AE,0xDC000795,0xC2001D9A,0xA7F81D9A,0xF80008AE,0xDC000795,0xC2001D9A,0xC2001D9A,0xFF7418F9,0xF9801BAD,0xFB841BC5,0xFF40137A,0xFF100CD4,0xFEDC04E3,0xFEBC0059,0xFE5C0002,0xFF5C18D2,0xFF401296,0xFE7C0912,0xDC000795, -0x8FFC1D9A,0x10425C5,0x10425C5,0x10425C5,0x10425C5,0xFEDC124D,0xFEDC124D,0xFEDC124D,0xF6C00A6A,0xF6C00A6A,0xC4C00A6A,0xFEB00F93,0xFEB00F93,0xFEB00F93,0xFE800056,0xFE800056,0xCA8C01D6,0xD8740883,0xD8740883,0xBC700103,0xA4740883,0x38025C5,0x38025C5,0x38025C5,0xFE2C0BDE,0xFE2C0BDE,0xC45C0A6A,0xEE000A0B,0xEE000A0B,0xC2000023,0xA40C0883,0x45FC25C5, -0x45FC25C5,0xAC001022,0x960010EE,0x800025C6,0xFEE81BE6,0xF6FC21AD,0x10425C5,0xFECC134A,0xFEB00A91,0xFE98043A,0xFE900243,0xF674000B,0xFED01B16,0xFEB01226,0xFE6808D8,0xC2000023,0x15FC25C5,0x1480884,0x1480884,0x1480884,0x1480884,0xFF2401C4,0xFF2401C4,0xFF2401C4,0xE5080001,0xE5080001,0xC5080001,0x3E80882,0x3E80882,0x3E80882,0xFE940005,0xFE940005, -0xC4C80001,0x79FC0882,0x79FC0882,0xC4000001,0xA4000882,0x3E80882,0x3E80882,0x3E80882,0xFE940005,0xFE940005,0xC4C80001,0x79FC0882,0x79FC0882,0xC4000001,0xA4000882,0x79FC0882,0x79FC0882,0xC4000001,0xA4000882,0xA4000882,0xFB340694,0xFB440708,0x1480884,0xFF180519,0xFEF002F2,0xFEC80151,0xFEBC0059,0xFA640000,0xF92806CD,0xFF0404EA,0x57FC0882,0xC4000001, -0x57FC0882,0x1C40659,0xFFB003E8,0xFF940131,0xFF8C0000,0xA9FC0659,0xFF700262,0xFF540000,0xD5F80659,0xFEA00000,0xE2000659,0xA9FC0659,0xFF700262,0xFF540000,0xD5F80659,0xFEA00000,0xE2000659,0xD5F80659,0xFEA00000,0xE2000659,0xE2000659,0xA9FC0659,0xFF700262,0xFF540000,0xD5F80659,0xFEA00000,0xE2000659,0xD5F80659,0xFEA00000,0xE2000659,0xE2000659,0xD5F80659, -0xFEA00000,0xE2000659,0xE2000659,0xE2000659,0xFFB405BA,0x1E40659,0xFBC405E9,0xFFA004F4,0xFF7C03E8,0xFF2C01F9,0xFEE40000,0xFE500000,0xFFAC05C4,0xFF9404C9,0xFF100032,0xE2000659,0xC9FC0659,0xC00A69,0xC00A69,0xC00A69,0xC00A69,0xC00A69,0xC00A69,0xC00A69,0xC00A69,0xC00A69,0xC00A69,0xFE800032,0xFE800032,0xFE800032,0xFE800032,0xFE800032, -0xFE800032,0xA6780001,0xA6780001,0xA6780001,0x82740002,0x11C0A69,0x11C0A69,0x11C0A69,0x11C0A69,0x11C0A69,0x11C0A69,0xC0000014,0xC0000014,0xC0000014,0x82300001,0x11FC0A69,0x11FC0A69,0x11FC0A69,0x7800025D,0x5E000A69,0xFEAC07A2,0xC00A69,0xC00A69,0xFEA00492,0xFE94029A,0xFE880115,0xFE880115,0xEC780001,0xFE9006C4,0xFC900451,0xCC640001,0xC0000014, -0x1940A69,}; -static const uint32_t g_etc1_to_bc7_m6_table213[] = { -0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x2C40000,0x2C40000,0x2C40000,0x2C40000,0x2C40000,0x2C40000,0x2C40000,0x2C40000,0x2C40000,0x2C40000,0x1940000, -0x1940000,0x1940000,0x1940000,0x42000000,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0x840001,0xE8C0000,0xE8C0000,0xE8C0000,0x2C40000,0x11C0000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1180000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000, -0x1A00000,0x55F80000,0x55F80000,0x55F80000,0x8A000001,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x55F80000,0x55F80000,0x55F80000,0x8A000001,0x55F80000,0x55F80000,0x55F80000,0x8A000001,0x8A000001,0xD280000,0x1180000,0x1180000,0x3440000,0x1600000,0x17C0000,0x17C0000,0x3D40000,0x3440000,0x1600000,0x27FC0000,0x55F80000, -0x27FC0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0xB7F80000,0xB7F80000,0xCC000001,0x6BFC0000,0x6BFC0000,0x6BFC0000,0xB7F80000,0xB7F80000,0xCC000001,0xB7F80000,0xB7F80000,0xCC000001,0xCC000001,0x6BFC0000,0x6BFC0000,0x6BFC0000,0xB7F80000,0xB7F80000,0xCC000001,0xB7F80000,0xB7F80000,0xCC000001,0xCC000001,0xB7F80000, -0xB7F80000,0xCC000001,0xCC000001,0xCC000001,0x1E00000,0x1B80000,0x19C0000,0x49FC0000,0x87FC0000,0xA3FC0000,0xADFC0000,0xBDF80000,0x11FC0000,0x6BFC0000,0xA3FC0000,0xCC000001,0xA3FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x14C4E1E,0xFF303AC6,0xFF202B4D,0xFF1425C5,0xFF182F0A,0xFEF81ADB,0xFEE81331,0xFED81392,0xFED00A6A,0xE6D00F1A,0xFF003356,0xFED81A55,0xFEC41065,0xFEAC0D5F,0xFE9400C6,0xE898059E,0xFE941A6E,0xF07C0A01,0xDC7808A6,0xC8841A07,0x1F04E1A,0xFEB8315F,0xFEA025C6,0xFE7C1E4A,0xFE4C0C89,0xE6500F1A,0xFE2C231B,0xFA000993,0xE0000597,0xC8041A06,0x7DF84E1A, -0xE0002B5E,0xD0001D1E,0xB8002977,0xA6004E1A,0xFF343DC5,0xFD48484A,0xFF4C4A16,0xFF082C34,0xFEE01A1E,0xFEB809A4,0xFEA4032A,0xFE84000B,0xFF183CC9,0xFEF429C3,0xFE7C09C3,0xE0000597,0x5BFC4E1A,0x1901A05,0xFF741218,0xFF640B25,0xFF580884,0xFF5C1001,0xFF400632,0xFF300220,0xFF2005C9,0xFB100051,0xE71804B1,0x59FC1A05,0xFF200DF4,0xFF080884,0xFEE409DD,0xFEAC0025, -0xE6C004B1,0xADFC1A05,0xFE080882,0xE0000566,0xC8001A06,0x59FC1A05,0xFF200DF4,0xFF080884,0xFEE409DD,0xFEAC0025,0xE6C004B1,0xADFC1A05,0xFE080882,0xE0000566,0xC8001A06,0xADFC1A05,0xFE080882,0xE0000566,0xC8001A06,0xC8001A06,0xFF7815DA,0xFF8C182D,0xFF8C185D,0xFF5C114D,0xFF280B9E,0xFEF0049D,0xFED000A0,0xFE7C0004,0xFD74161E,0xFF481074,0xFEA008FB,0xE0000566, -0x99FC1A05,0x11425C5,0x11425C5,0x11425C5,0x11425C5,0xFEE81331,0xFEE81331,0xFEE81331,0xFED00A6A,0xFED00A6A,0xCCD00A6A,0xFEC41065,0xFEC41065,0xFEC41065,0xFE9400C6,0xFE9400C6,0xD29C01D6,0xE0840883,0xE0840883,0xC4800103,0xAC840883,0x39825C5,0x39825C5,0x39825C5,0xFE4C0C89,0xFE4C0C89,0xCC6C0A6A,0xFA000983,0xFA000983,0xCE000003,0xAC1C0883,0x51FC25C5, -0x51FC25C5,0xB2000F62,0x9C001026,0x880025C6,0xFD001CA9,0xFF0C21AD,0x11425C5,0xFEDC144E,0xFEC40BB5,0xFEAC053E,0xFEA4032A,0xFE84000B,0xFEE41BE5,0xFED012ED,0xFE7C0933,0xCE000003,0x23FC25C5,0x1580884,0x1580884,0x1580884,0x1580884,0xFF300220,0xFF300220,0xFF300220,0xED180001,0xED180001,0xCD180001,0x7FC0882,0x7FC0882,0x7FC0882,0xFEAC0025,0xFEAC0025, -0xCCD80001,0x85FC0882,0x85FC0882,0xCC0C0001,0xAC000882,0x7FC0882,0x7FC0882,0x7FC0882,0xFEAC0025,0xFEAC0025,0xCCD80001,0x85FC0882,0x85FC0882,0xCC0C0001,0xAC000882,0x85FC0882,0x85FC0882,0xCC0C0001,0xAC000882,0xAC000882,0xFF3C06C4,0xFF4C0728,0x1580884,0xFF24057A,0xFF040352,0xFEE4019A,0xFED000A0,0xFE7C0004,0xFF3406D1,0xFF180521,0x65FC0882,0xCC0C0001, -0x65FC0882,0x1CC04B1,0xFFBC02E4,0xFFA000E5,0xFF9C0000,0xB5FC04B1,0xFF8801BA,0xFF6C0000,0xDBF804B1,0xFED40000,0xE60004B1,0xB5FC04B1,0xFF8801BA,0xFF6C0000,0xDBF804B1,0xFED40000,0xE60004B1,0xDBF804B1,0xFED40000,0xE60004B1,0xE60004B1,0xB5FC04B1,0xFF8801BA,0xFF6C0000,0xDBF804B1,0xFED40000,0xE60004B1,0xDBF804B1,0xFED40000,0xE60004B1,0xE60004B1,0xDBF804B1, -0xFED40000,0xE60004B1,0xE60004B1,0xE60004B1,0xF9C40451,0x1EC04B1,0xFFCC0451,0xFFB003A1,0xFF9002E4,0xFF480168,0xFF0C0000,0xFE8C0000,0xF3C40451,0xFFA00392,0xFF300025,0xE60004B1,0xD1FC04B1,0xD00A69,0xD00A69,0xD00A69,0xD00A69,0xD00A69,0xD00A69,0xD00A69,0xD00A69,0xD00A69,0xD00A69,0xFE900059,0xFE900059,0xFE900059,0xFE900059,0xFE900059, -0xFE900059,0xAE880001,0xAE880001,0xAE880001,0x8A840002,0x1340A69,0x1340A69,0x1340A69,0x1340A69,0x1340A69,0x1340A69,0xCE000002,0xCE000002,0xCE000002,0x8A400001,0x1DFC0A69,0x1DFC0A69,0x1DFC0A69,0x7E0001ED,0x66000A69,0xFAC407C1,0xD00A69,0xD00A69,0xFCB804E2,0xFEA802DA,0xFE980151,0xFE980151,0xF4880001,0xFEAC0708,0xFEA00480,0xD4740001,0xCE000002, -0x1B40A69,}; -static const uint32_t g_etc1_to_bc7_m6_table214[] = { -0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x2DC0000,0x1C40000, -0x1C40000,0x1C40000,0x1C40000,0x4A000000,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0x940001,0xA00000,0xA00000,0xA00000,0x2DC0000,0x13C0000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000, -0x1B80000,0x61F80000,0x61F80000,0x61F80000,0x92000001,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x61F80000,0x61F80000,0x61F80000,0x92000001,0x61F80000,0x61F80000,0x61F80000,0x92000001,0x92000001,0x13C0000,0x1280000,0x1280000,0x1580000,0x1740000,0x3900000,0x3900000,0x1F00000,0x1580000,0x1740000,0x37FC0000,0x61F80000, -0x37FC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x83FC0000,0x83FC0000,0x83FC0000,0xC3F80000,0xC3F80000,0xD4000001,0x83FC0000,0x83FC0000,0x83FC0000,0xC3F80000,0xC3F80000,0xD4000001,0xC3F80000,0xC3F80000,0xD4000001,0xD4000001,0x83FC0000,0x83FC0000,0x83FC0000,0xC3F80000,0xC3F80000,0xD4000001,0xC3F80000,0xC3F80000,0xD4000001,0xD4000001,0xC3F80000, -0xC3F80000,0xD4000001,0xD4000001,0xD4000001,0x3F00000,0x1C80000,0x1AC0000,0x67FC0000,0x9BFC0000,0xB3FC0000,0xBBFC0000,0xC7FC0000,0x37FC0000,0x83FC0000,0xB3FC0000,0xD4000001,0xB3FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x15848CE,0xFF4437E9,0xFF2C2A81,0xFF2425C5,0xFF242C86,0xFF081AD5,0xFF001419,0xFEF01292,0xFEE00A8D,0xEAE00DB2,0xFF0C301E,0xFEE41A29,0xFEDC116D,0xFEB80CB3,0xFEAC016E,0xEAA8045E,0xFEA0178E,0xF4900951,0xE088069F,0xCC9416C7,0x7FC48CA,0xFECC3001,0xFEB825C6,0xFE941CE2,0xFE640D49,0xEA640DB2,0xFE3820BF,0xFE08097B,0xE6000393,0xCC1816C6,0x85FC48CA, -0xE600290A,0xD60018EA,0xBE002443,0xAC0048CA,0xFF3C39F6,0xFF4C436A,0xFF4C4586,0xFF182A0A,0xFEF419AA,0xFEC80A63,0xFEBC0422,0xFE9C0031,0xFF34391E,0xFF0027C6,0xFE940A51,0xE6000393,0x65FC48CA,0x19C16C5,0xFF801058,0xFF700AA5,0xFF680884,0xFF740E01,0xFF5405F4,0xFF44027D,0xFF380491,0xFD240018,0xEB280349,0x6BFC16C5,0xFF380CE4,0xFF200884,0xFEFC0875,0xFECC0061, -0xEAD40349,0xB7F816C5,0xFE380882,0xE6000392,0xCC0016C6,0x6BFC16C5,0xFF380CE4,0xFF200884,0xFEFC0875,0xFECC0061,0xEAD40349,0xB7F816C5,0xFE380882,0xE6000392,0xCC0016C6,0xB7F816C5,0xFE380882,0xE6000392,0xCC0016C6,0xCC0016C6,0xFF80136D,0xFF8C154D,0xF59815A4,0xFF6C0F72,0xFF480A86,0xFF040496,0xFEE800FA,0xFEA00022,0xFF781352,0xFF5C0EC6,0xFED008E5,0xE6000392, -0xA3FC16C5,0x12425C5,0x12425C5,0x12425C5,0x12425C5,0xFF001419,0xFF001419,0xFF001419,0xFEE00A8D,0xFEE00A8D,0xD4E00A6A,0xFEDC116D,0xFEDC116D,0xFEDC116D,0xFEAC016E,0xFEAC016E,0xDAAC01D6,0xE8940883,0xE8940883,0xCC900103,0xB4940883,0x3B025C5,0x3B025C5,0x3B025C5,0xFE640D49,0xFE640D49,0xD47C0A6A,0xFE08097B,0xFE08097B,0xD6100003,0xB42C0883,0x5DFC25C5, -0x5DFC25C5,0xBE000E9A,0xA6000F63,0x900025C6,0xFF101D2B,0xF920222A,0x12425C5,0xFEF81556,0xFED80CE9,0xFEBC0666,0xFEBC0422,0xFE9C0031,0xFEF81C56,0xFEDC1421,0xFE9409D8,0xD6100003,0x33FC25C5,0x1680884,0x1680884,0x1680884,0x1680884,0xFF44027D,0xFF44027D,0xFF44027D,0xF5280001,0xF5280001,0xD5280001,0x1FFC0882,0x1FFC0882,0x1FFC0882,0xFECC0061,0xFECC0061, -0xD4E80001,0x91FC0882,0x91FC0882,0xD41C0001,0xB4000882,0x1FFC0882,0x1FFC0882,0x1FFC0882,0xFECC0061,0xFECC0061,0xD4E80001,0x91FC0882,0x91FC0882,0xD41C0001,0xB4000882,0x91FC0882,0x91FC0882,0xD41C0001,0xB4000882,0xB4000882,0xFF5806CD,0xFB640745,0x1680884,0xFF4405B2,0xFF1803BA,0xFEFC01F9,0xFEE800FA,0xFEA00022,0xFD4C0708,0xFF300581,0x75FC0882,0xD41C0001, -0x75FC0882,0x1D40349,0xFFC401F9,0xFFB8009D,0xFFAC0000,0xC1FC0349,0xFFA00132,0xFF840000,0xE1F80349,0xFF040000,0xEA000349,0xC1FC0349,0xFFA00132,0xFF840000,0xE1F80349,0xFF040000,0xEA000349,0xE1F80349,0xFF040000,0xEA000349,0xEA000349,0xC1FC0349,0xFFA00132,0xFF840000,0xE1F80349,0xFF040000,0xEA000349,0xE1F80349,0xFF040000,0xEA000349,0xEA000349,0xE1F80349, -0xFF040000,0xEA000349,0xEA000349,0xEA000349,0xFDCC02F9,0x1F40349,0xFFCC0311,0xFFC00288,0xFFA80202,0xFF700104,0xFF340000,0xFEC80000,0xF7CC02F9,0xFDBC0288,0xFF500019,0xEA000349,0xD9FC0349,0xE00A69,0xE00A69,0xE00A69,0xE00A69,0xE00A69,0xE00A69,0xE00A69,0xE00A69,0xE00A69,0xE00A69,0xFEA40082,0xFEA40082,0xFEA40082,0xFEA40082,0xFEA40082, -0xFEA40082,0xB6980001,0xB6980001,0xB6980001,0x92940002,0x1480A69,0x1480A69,0x1480A69,0x1480A69,0x1480A69,0x1480A69,0xD80C0000,0xD80C0000,0xD80C0000,0x92500001,0x29FC0A69,0x29FC0A69,0x29FC0A69,0x8A000195,0x6E000A69,0xFECC07E9,0xE00A69,0xE00A69,0xFCC80515,0xFEB40321,0xFEAC019A,0xFEAC019A,0xFC980001,0xFEBC070A,0xFEB404B1,0xDC840001,0xD80C0000, -0x1D80A69,}; -static const uint32_t g_etc1_to_bc7_m6_table215[] = { -0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x2F40000,0x1F40000, -0x1F40000,0x1F40000,0x1F40000,0x52000000,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xA40001,0xB00000,0xB00000,0xB00000,0x2F40000,0x1600000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1380000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000, -0x1D00000,0x6DF80000,0x6DF80000,0x6DF80000,0x9A000001,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x1D00000,0x6DF80000,0x6DF80000,0x6DF80000,0x9A000001,0x6DF80000,0x6DF80000,0x6DF80000,0x9A000001,0x9A000001,0x14C0000,0x1380000,0x1380000,0x7680000,0x1880000,0x1A80000,0x1A80000,0xDFC0000,0x7680000,0x1880000,0x45FC0000,0x6DF80000, -0x45FC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0xCFF80000,0xCFF80000,0xDC000001,0x9BFC0000,0x9BFC0000,0x9BFC0000,0xCFF80000,0xCFF80000,0xDC000001,0xCFF80000,0xCFF80000,0xDC000001,0xDC000001,0x9BFC0000,0x9BFC0000,0x9BFC0000,0xCFF80000,0xCFF80000,0xDC000001,0xCFF80000,0xCFF80000,0xDC000001,0xDC000001,0xCFF80000, -0xCFF80000,0xDC000001,0xDC000001,0xDC000001,0x17FC0000,0x5D80000,0x1BC0000,0x85FC0000,0xAFFC0000,0xC1FC0000,0xC9F80000,0xD3F80000,0x5FFC0000,0x9BFC0000,0xC1FC0000,0xDC000001,0xC1FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x16443DE,0xFF503551,0xFF3829D5,0xFF3425C5,0xFF302A52,0xFF181AE5,0xFF0C150D,0xFEFC11F2,0xFEF00AE2,0xEEF00C8A,0xFF242D36,0xFEFC1A11,0xFEE81275,0xFED80C52,0xFEC00252,0xEEBC035E,0xFEB8151E,0xF8A408D9,0xE69404F3,0xD2A413DB,0x19FC43DA,0xFEE42E81,0xFED025C5,0xFEAC1BBA,0xFE7C0E29,0xEE780C8A,0xFE641E91,0xFE200A1B,0xEC0C023F,0xD22813DA,0x8FF843DA, -0xF0002759,0xDC001546,0xC4001F87,0xB20043DA,0xFF4C36D3,0xF9603F4E,0xFB6440F5,0xFF2C286E,0xFF04195C,0xFEDC0B1C,0xFED00559,0xFEB00092,0xFF3435BE,0xFF102642,0xFEB00AE7,0xEC0C023F,0x71FC43DA,0x1A413DD,0xFF8C0EC8,0xFF7C0A3D,0xFF780884,0xFF800C45,0xFF6405BE,0xFF5C02FD,0xFF4C03BD,0xFF380001,0xEF380221,0x7BFC13DA,0xFF4C0C13,0xFF380884,0xFF14074D,0xFEE400B9, -0xEEE80221,0xBFF813DA,0xFE6C0882,0xEC00022E,0xD20013DA,0x7BFC13DA,0xFF4C0C13,0xFF380884,0xFF14074D,0xFEE400B9,0xEEE80221,0xBFF813DA,0xFE6C0882,0xEC00022E,0xD20013DA,0xBFF813DA,0xFE6C0882,0xEC00022E,0xD20013DA,0xD20013DA,0xFF9410EE,0xF9A01299,0xFBA412D8,0xFF780DD1,0xFF5C09A6,0xFF240491,0xFF0C0164,0xFEB80068,0xFF8810FD,0xFF680D52,0xFEE408D3,0xEC00022E, -0xADFC13DA,0x13425C5,0x13425C5,0x13425C5,0x13425C5,0xFF0C150D,0xFF0C150D,0xFF0C150D,0xFEF00AE2,0xFEF00AE2,0xDCF00A6A,0xFEE81275,0xFEE81275,0xFEE81275,0xFEC00252,0xFEC00252,0xE2BC01D6,0xF0A40883,0xF0A40883,0xD4A00103,0xBCA40883,0x1C825C5,0x1C825C5,0x1C825C5,0xFE7C0E29,0xFE7C0E29,0xDC8C0A6A,0xFE200A1B,0xFE200A1B,0xDE200003,0xBC3C0883,0x69FC25C5, -0x69FC25C5,0xCA000DF2,0xB2000EA3,0x980025C6,0xFF201DA6,0xFF2C222E,0x13425C5,0xFF081643,0xFEEC0E2D,0xFED407CE,0xFED00559,0xFEB00092,0xFF141D18,0xFEF41512,0xFEB00A83,0xDE200003,0x41FC25C5,0x1780884,0x1780884,0x1780884,0x1780884,0xFF5C02FD,0xFF5C02FD,0xFF5C02FD,0xFD380001,0xFD380001,0xDD380001,0x37FC0882,0x37FC0882,0x37FC0882,0xFEE400B9,0xFEE400B9, -0xDCF80001,0x9DFC0882,0x9DFC0882,0xDC2C0001,0xBC000882,0x37FC0882,0x37FC0882,0x37FC0882,0xFEE400B9,0xFEE400B9,0xDCF80001,0x9DFC0882,0x9DFC0882,0xDC2C0001,0xBC000882,0x9DFC0882,0x9DFC0882,0xDC2C0001,0xBC000882,0xBC000882,0xFB6C0708,0xFF6C076D,0x1780884,0xFD5805E9,0xFF400422,0xFF20028A,0xFF0C0164,0xFEB80068,0xF5640745,0xFF4805B4,0x83FC0882,0xDC2C0001, -0x83FC0882,0x1DC0221,0xFFD00145,0xFFC00068,0xFFBC0000,0xCDFC0221,0xFFAC00C2,0xFF9C0000,0xE7F80221,0xFF340000,0xEE000221,0xCDFC0221,0xFFAC00C2,0xFF9C0000,0xE7F80221,0xFF340000,0xEE000221,0xE7F80221,0xFF340000,0xEE000221,0xEE000221,0xCDFC0221,0xFFAC00C2,0xFF9C0000,0xE7F80221,0xFF340000,0xEE000221,0xE7F80221,0xFF340000,0xEE000221,0xEE000221,0xE7F80221, -0xFF340000,0xEE000221,0xEE000221,0xEE000221,0xFFD001ED,0x1FC0221,0xF7DC0200,0xFFC001A8,0xFFA80152,0xFF8400A0,0xFF5C0000,0xFF040000,0xFBD401E1,0xFFC00190,0xFF700010,0xEE000221,0xDFFC0221,0xF00A69,0xF00A69,0xF00A69,0xF00A69,0xF00A69,0xF00A69,0xF00A69,0xF00A69,0xF00A69,0xF00A69,0xFEB400C1,0xFEB400C1,0xFEB400C1,0xFEB400C1,0xFEB400C1, -0xFEB400C1,0xBEA80001,0xBEA80001,0xBEA80001,0x9AA40002,0x1600A69,0x1600A69,0x1600A69,0x1600A69,0x1600A69,0x1600A69,0xE01C0000,0xE01C0000,0xE01C0000,0x9A600001,0x35FC0A69,0x35FC0A69,0x35FC0A69,0x9000013D,0x76000A69,0xFAE40802,0xF00A69,0xF00A69,0xFED8054A,0xFED00372,0xFEC401E1,0xFEC401E1,0xFCA8000A,0xFAD00745,0xFEBC0502,0xE4940001,0xE01C0000, -0x1F80A69,}; -static const uint32_t g_etc1_to_bc7_m6_table216[] = { -0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0x1100000,0xDFC0000, -0xDFC0000,0xDFC0000,0xDFC0000,0x5A000001,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xB80000,0xC40000,0xC40000,0xC40000,0x1100000,0x1880000,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x1480001,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000, -0x3E80000,0x79FC0000,0x79FC0000,0x79FC0000,0xA4000000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x3E80000,0x79FC0000,0x79FC0000,0x79FC0000,0xA4000000,0x79FC0000,0x79FC0000,0x79FC0000,0xA4000000,0xA4000000,0x1600000,0x1480001,0x1480001,0x1800000,0x39C0000,0x1C00000,0x1C00000,0x23FC0000,0x1800000,0x39C0000,0x57FC0000,0x79FC0000, -0x57FC0000,0x1CC0001,0x1CC0001,0x1CC0001,0x1CC0001,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xDDF40000,0xDDF40000,0xE6000000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xDDF40000,0xDDF40000,0xE6000000,0xDDF40000,0xDDF40000,0xE6000000,0xE6000000,0xB7FC0000,0xB7FC0000,0xB7FC0000,0xDDF40000,0xDDF40000,0xE6000000,0xDDF40000,0xDDF40000,0xE6000000,0xE6000000,0xDDF40000, -0xDDF40000,0xE6000000,0xE6000000,0xE6000000,0x57FC0000,0x1EC0000,0x1CC0001,0xA7FC0000,0xC5FC0000,0xD3FC0000,0xD7FC0000,0xDFF80000,0x8BFC0000,0xB7FC0000,0xD3FC0000,0xE6000000,0xD3FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1743EBF,0xFF5C32AA,0xFF4C292F,0xFF4425C6,0xFF4427D5,0xFF301B26,0xFF24162E,0xFF141185,0xFF080B81,0xF3000B89,0xFF302A4B,0xFF101A25,0xFF0013B2,0xFEE40C25,0xFED80391,0xF6CC0285,0xFECC130C,0xFCB8088E,0xEAA80370,0xD8B810F8,0x2FFC3EBF,0xFEFC2CFE,0xFEEC25C5,0xFEB81ACB,0xFEA00F3A,0xF2900B89,0xFE881C84,0xFE380B28,0xF0200130,0xD83C10F9,0x99FC3EBF, -0xF8002621,0xE0001205,0xD0001AB8,0xB8003EC1,0xFF583335,0xFF6C3A65,0xFF6C3C36,0xFF4026D4,0xFF181935,0xFEF00C2B,0xFEE806D0,0xFEC00164,0xFF50326B,0xFF3024FE,0xFEBC0BFE,0xF0200130,0x7FF83EBF,0x1B010FB,0xFFA40D26,0xFF9409C3,0xFF8C0882,0xFF8C0AA9,0xFF7405AE,0xFF680385,0xFF580313,0xFF4C001A,0xF3480121,0x8DFC10F8,0xFF640B25,0xFF540882,0xFF2C0651,0xFF080131, -0xF2FC0121,0xC7FC10F8,0xFEA00882,0xF2080120,0xD80010F8,0x8DFC10F8,0xFF640B25,0xFF540882,0xFF2C0651,0xFF080131,0xF2FC0121,0xC7FC10F8,0xFEA00882,0xF2080120,0xD80010F8,0xC7FC10F8,0xFEA00882,0xF2080120,0xD80010F8,0xD80010F8,0xFFA00E9D,0xFFAC0FC9,0xFFAC1026,0xFF880C23,0xFF6808C1,0xFF380495,0xFF2801E2,0xFEE400CD,0xFF980EBE,0xFF840BB2,0xFF1008B5,0xF2080120, -0xB9FC10F8,0x14425C6,0x14425C6,0x14425C6,0x14425C6,0xFF24162E,0xFF24162E,0xFF24162E,0xFF080B81,0xFF080B81,0xE7000A69,0xFF0013B2,0xFF0013B2,0xFF0013B2,0xFED80391,0xFED80391,0xECD001D5,0xF8B80884,0xF8B80884,0xDCB00104,0xC4B80884,0x1E425C5,0x1E425C5,0x1E425C5,0xFEA00F3A,0xFEA00F3A,0xE69C0A69,0xFE380B28,0xFE380B28,0xE6340002,0xC4500884,0x77F825C5, -0x77F825C5,0xD6000D49,0xB8000DD4,0xA20025C5,0xFF2C1E68,0xF94022AA,0x14425C6,0xFF181761,0xFF000FB2,0xFEF00952,0xFEE806D0,0xFEC00164,0xFF201DB2,0xFF101682,0xFEBC0B9A,0xE6340002,0x53FC25C5,0x18C0882,0x18C0882,0x18C0882,0x18C0882,0xFF680385,0xFF680385,0xFF680385,0xFF4C001A,0xFF4C001A,0xE7480001,0x53FC0882,0x53FC0882,0x53FC0882,0xFF080131,0xFF080131, -0xE7080001,0xABF80882,0xABF80882,0xE63C0000,0xC4000884,0x53FC0882,0x53FC0882,0x53FC0882,0xFF080131,0xFF080131,0xE7080001,0xABF80882,0xABF80882,0xE63C0000,0xC4000884,0xABF80882,0xABF80882,0xE63C0000,0xC4000884,0xC4000884,0xFF74073A,0xFD880782,0x18C0882,0xFB700659,0xFF540488,0xFF300304,0xFF2801E2,0xFEE400CD,0xFD740745,0xFF5C05F5,0x95FC0882,0xE63C0000, -0x95FC0882,0x1E40122,0xFFDC00AA,0xFFD4003A,0xFFCC0001,0xDBFC0120,0xFFC0006D,0xFFB80000,0xEDFC0120,0xFF6C0000,0xF2000120,0xDBFC0120,0xFFC0006D,0xFFB80000,0xEDFC0120,0xFF6C0000,0xF2000120,0xEDFC0120,0xFF6C0000,0xF2000120,0xF2000120,0xDBFC0120,0xFFC0006D,0xFFB80000,0xEDFC0120,0xFF6C0000,0xF2000120,0xEDFC0120,0xFF6C0000,0xF2000120,0xF2000120,0xEDFC0120, -0xFF6C0000,0xF2000120,0xF2000120,0xF2000120,0xF5E40109,0x47FC0120,0xFBE40109,0xFFD800DD,0xFFCC00B5,0xFFAC0059,0xFF880000,0xFF480000,0xFFDC00F4,0xFFCC00DA,0xFF980009,0xF2000120,0xE9FC0120,0x1000A69,0x1000A69,0x1000A69,0x1000A69,0x1000A69,0x1000A69,0x1000A69,0x1000A69,0x1000A69,0x1000A69,0xFEC80104,0xFEC80104,0xFEC80104,0xFEC80104,0xFEC80104, -0xFEC80104,0xC8B80000,0xC8B80000,0xC8B80000,0xA4B80000,0x17C0A69,0x17C0A69,0x17C0A69,0x17C0A69,0x17C0A69,0x17C0A69,0xEA2C0000,0xEA2C0000,0xEA2C0000,0xA4700000,0x43F80A69,0x43F80A69,0x43F80A69,0x9C0000E9,0x80000A69,0xF4F80841,0x1000A69,0x1000A69,0xFEE80589,0xFEE403C8,0xFED4022D,0xFED4022D,0xFEC00020,0xFED80781,0xFED0053D,0xEEA40000,0xEA2C0000, -0x11FC0A69,}; -static const uint32_t g_etc1_to_bc7_m6_table217[] = { -0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x1280000,0x19FC0000, -0x19FC0000,0x19FC0000,0x19FC0000,0x62000001,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0x2D40000,0x2D40000,0x2D40000,0x1280000,0x1A80000,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x1580001,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000, -0x7FC0000,0x85FC0000,0x85FC0000,0x85FC0000,0xAC000000,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x7FC0000,0x85FC0000,0x85FC0000,0x85FC0000,0xAC000000,0x85FC0000,0x85FC0000,0x85FC0000,0xAC000000,0xAC000000,0x1700000,0x1580001,0x1580001,0x5900000,0x3B00000,0x3D40000,0x3D40000,0x37FC0000,0x5900000,0x3B00000,0x65FC0000,0x85FC0000, -0x65FC0000,0x1DC0001,0x1DC0001,0x1DC0001,0x1DC0001,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xE7FC0000,0xE7FC0000,0xEE000000,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xE7FC0000,0xE7FC0000,0xEE000000,0xE7FC0000,0xE7FC0000,0xEE000000,0xEE000000,0xCFFC0000,0xCFFC0000,0xCFFC0000,0xE7FC0000,0xE7FC0000,0xEE000000,0xE7FC0000,0xE7FC0000,0xEE000000,0xEE000000,0xE7FC0000, -0xE7FC0000,0xEE000000,0xEE000000,0xEE000000,0x91FC0000,0x7FC0000,0x1DC0001,0xC5FC0000,0xD9FC0000,0xE1FC0000,0xE5FC0000,0xE9FC0000,0xB3FC0000,0xCFFC0000,0xE1FC0000,0xEE000000,0xE1FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1803A9B,0xFF68308E,0xFF5828A3,0xFF5425C6,0xFF502631,0xFF3C1B66,0xFF301742,0xFF201179,0xFF140C51,0xF7100AE9,0xFF4427DC,0xFF241A56,0xFF1814E2,0xFEFC0C45,0xFEE404F1,0xF8DC020D,0xFEE4119C,0xFECC0888,0xEEBC0268,0xDEC80EC4,0x41FC3A9B,0xFF142BBE,0xFF0425C5,0xFED81A61,0xFEB81052,0xF6A40AE9,0xFEA01B0C,0xFE640C2B,0xF6340082,0xDE500EC4,0xA1FC3A9B, -0xFE0025C5,0xE6000F95,0xD60016F4,0xBE003A9D,0xFF643075,0xFF6C3735,0xF77C38AF,0xFF442569,0xFF281942,0xFEFC0D81,0xFEF8085D,0xFED8026A,0xFF542FDE,0xFF4023EC,0xFEDC0D0E,0xF6340082,0x89FC3A9B,0x1BC0EC3,0xFFB00BEE,0xFFA0096B,0xFF9C0882,0xFF980989,0xFF8405BA,0xFF80040D,0xFF7002BB,0xFF64006A,0xF7580081,0x9BFC0EC3,0xFF7C0A6D,0xFF6C0882,0xFF4C05D6,0xFF2C01BD, -0xF7100081,0xCFF80EC3,0xFED40882,0xF6280080,0xDE000EC4,0x9BFC0EC3,0xFF7C0A6D,0xFF6C0882,0xFF4C05D6,0xFF2C01BD,0xF7100081,0xCFF80EC3,0xFED40882,0xF6280080,0xDE000EC4,0xCFF80EC3,0xFED40882,0xF6280080,0xDE000EC4,0xDE000EC4,0xFFB40CEA,0xF5B80E03,0xF7BC0E43,0xFF980AE1,0xFF7C0839,0xFF5804EB,0xFF3C028A,0xFEFC015D,0xFFA40D0A,0xFF900AA6,0xFF3008A8,0xF6280080, -0xC1FC0EC3,0x15425C6,0x15425C6,0x15425C6,0x15425C6,0xFF301742,0xFF301742,0xFF301742,0xFF140C51,0xFF140C51,0xEF100A69,0xFF1814E2,0xFF1814E2,0xFF1814E2,0xFEE404F1,0xFEE404F1,0xF4E001D5,0xFECC0888,0xFECC0888,0xE4C00104,0xCCC80884,0x1FC25C5,0x1FC25C5,0x1FC25C5,0xFEB81052,0xFEB81052,0xEEAC0A69,0xFE640C2B,0xFE640C2B,0xEE440002,0xCC600884,0x83F825C5, -0x83F825C5,0xDC000CB5,0xC4000D24,0xAA0025C5,0xFF3C1EFE,0xFF4C22BE,0x15425C6,0xFF2C188D,0xFF141116,0xFEFC0ADD,0xFEF8085D,0xFED8026A,0xFF341E29,0xFF18178E,0xFEDC0CBD,0xEE440002,0x61FC25C5,0x19C0882,0x19C0882,0x19C0882,0x19C0882,0xFF80040D,0xFF80040D,0xFF80040D,0xFF64006A,0xFF64006A,0xEF580001,0x6BFC0882,0x6BFC0882,0x6BFC0882,0xFF2C01BD,0xFF2C01BD, -0xEF180001,0xB7F80882,0xB7F80882,0xEE4C0000,0xCC000884,0x6BFC0882,0x6BFC0882,0x6BFC0882,0xFF2C01BD,0xFF2C01BD,0xEF180001,0xB7F80882,0xB7F80882,0xEE4C0000,0xCC000884,0xB7F80882,0xB7F80882,0xEE4C0000,0xCC000884,0xCC000884,0xFF900745,0xF59807C1,0x19C0882,0xFF780681,0xFF700515,0xFF5003B5,0xFF3C028A,0xFEFC015D,0xFD880782,0xFF74065D,0xA3FC0882,0xEE4C0000, -0xA3FC0882,0x1EC0082,0xFFE8004A,0xFFE40019,0xFFDC0001,0xE9FC0080,0xFFD8002D,0xFFCC0001,0xF3FC0080,0xFF9C0000,0xF6000080,0xE9FC0080,0xFFD8002D,0xFFCC0001,0xF3FC0080,0xFF9C0000,0xF6000080,0xF3FC0080,0xFF9C0000,0xF6000080,0xF6000080,0xE9FC0080,0xFFD8002D,0xFFCC0001,0xF3FC0080,0xFF9C0000,0xF6000080,0xF3FC0080,0xFF9C0000,0xF6000080,0xF6000080,0xF3FC0080, -0xFF9C0000,0xF6000080,0xF6000080,0xF6000080,0xF9EC0071,0x87FC0080,0xFFEC0071,0xFDE80062,0xFDE00055,0xFFC80022,0xFFB00000,0xFF840000,0xFDEC0071,0xFFE40062,0xFFB80004,0xF6000080,0xF1FC0080,0x1100A69,0x1100A69,0x1100A69,0x1100A69,0x1100A69,0x1100A69,0x1100A69,0x1100A69,0x1100A69,0x1100A69,0xFEDC0145,0xFEDC0145,0xFEDC0145,0xFEDC0145,0xFEDC0145, -0xFEDC0145,0xD0C80000,0xD0C80000,0xD0C80000,0xACC80000,0x1940A69,0x1940A69,0x1940A69,0x1940A69,0x1940A69,0x1940A69,0xF23C0000,0xF23C0000,0xF23C0000,0xAC800000,0x4FF80A69,0x4FF80A69,0x4FF80A69,0xA60000B4,0x88000A69,0xFD080841,0x1100A69,0x1100A69,0xFEF405E4,0xFEEC041D,0xFEE80290,0xFEE80290,0xFED40048,0xFEF40784,0xFEE40595,0xF6B40000,0xF23C0000, -0x1FFC0A69,}; -static const uint32_t g_etc1_to_bc7_m6_table218[] = { -0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x1400000,0x25F80000, -0x25F80000,0x25F80000,0x25F80000,0x6A000001,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xD80000,0xAE40000,0xAE40000,0xAE40000,0x1400000,0x1CC0000,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1680001,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000, -0x1FFC0000,0x91FC0000,0x91FC0000,0x91FC0000,0xB4000000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x1FFC0000,0x91FC0000,0x91FC0000,0x91FC0000,0xB4000000,0x91FC0000,0x91FC0000,0x91FC0000,0xB4000000,0xB4000000,0x9800000,0x1680001,0x1680001,0x1A40000,0x3C40000,0x1EC0000,0x1EC0000,0x4BFC0000,0x1A40000,0x3C40000,0x75FC0000,0x91FC0000, -0x75FC0000,0x1EC0001,0x1EC0001,0x1EC0001,0x1EC0001,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xF3FC0000,0xF3FC0000,0xF6000000,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xF3FC0000,0xF3FC0000,0xF6000000,0xF3FC0000,0xF3FC0000,0xF6000000,0xF6000000,0xE9FC0000,0xE9FC0000,0xE9FC0000,0xF3FC0000,0xF3FC0000,0xF6000000,0xF3FC0000,0xF3FC0000,0xF6000000,0xF6000000,0xF3FC0000, -0xF3FC0000,0xF6000000,0xF6000000,0xF6000000,0xC9FC0000,0x87FC0000,0x1EC0001,0xE3FC0000,0xEDFC0000,0xF1FC0000,0xF3F80000,0xF5F80000,0xDBFC0000,0xE9FC0000,0xF1FC0000,0xF6000000,0xF1FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x18C36D7,0xFF742EAA,0xFF6C2826,0xFF6425C6,0xFF5C24DD,0xFF501BC6,0xFF441831,0xFF3811B1,0xFF2C0D49,0xFB200A89,0xFF5025F8,0xFF301AB6,0xFF241626,0xFF140CC5,0xFEFC0671,0xFCF001D9,0xFEFC10AC,0xFEE408D8,0xF4C801A8,0xE2D80CE4,0x53FC36D7,0xFF2C2A9E,0xFF1C25C5,0xFEF01A01,0xFED811B1,0xFAB80A89,0xFEB819F4,0xFE880D5B,0xFA4C0020,0xE2640CE4,0xABF836D7, -0xFE2C25C5,0xEC000DB5,0xDC0013A8,0xC40036D9,0xFF742E0F,0xFD883393,0xFD88351B,0xFF54242D,0xFF381982,0xFF180EB9,0xFF1009F9,0xFEEC03A5,0xFF702D9A,0xFF5022A2,0xFEF40E18,0xFA4C0020,0x95FC36D7,0x1C80CE3,0xFFBC0AE6,0xFFAC092B,0xFFAC0882,0xFFA408B1,0xFF9805D6,0xFF8C04A5,0xFF7C02B3,0xFF7000EA,0xFB680021,0xAFFC0CE3,0xFF9409D5,0xFF840882,0xFF640576,0xFF380271, -0xFB240021,0xD7FC0CE3,0xFF040882,0xFA4C0020,0xE2000CE4,0xAFFC0CE3,0xFF9409D5,0xFF840882,0xFF640576,0xFF380271,0xFB240021,0xD7FC0CE3,0xFF040882,0xFA4C0020,0xE2000CE4,0xD7FC0CE3,0xFF040882,0xFA4C0020,0xE2000CE4,0xE2000CE4,0xFFBC0B6D,0xFBC40C2B,0xFBC40C7B,0xFFB00A0A,0xFF9007D9,0xFF6C0515,0xFF640332,0xFF2801F4,0xFFB40B76,0xFF9809BE,0xFF50089B,0xFA4C0020, -0xCDFC0CE3,0x16425C6,0x16425C6,0x16425C6,0x16425C6,0xFF441831,0xFF441831,0xFF441831,0xFF2C0D49,0xFF2C0D49,0xF7200A69,0xFF241626,0xFF241626,0xFF241626,0xFEFC0671,0xFEFC0671,0xFCF001D5,0xFEE408D8,0xFEE408D8,0xECD00104,0xD4D80884,0x19FC25C5,0x19FC25C5,0x19FC25C5,0xFED811B1,0xFED811B1,0xF6BC0A69,0xFE880D5B,0xFE880D5B,0xF6540002,0xD4700884,0x8FF825C5, -0x8FF825C5,0xE6000C45,0xCA000C84,0xB20025C5,0xFF4C1F85,0xF960232D,0x16425C6,0xFF3C1996,0xFF2C12AC,0xFF180CA8,0xFF1009F9,0xFEEC03A5,0xFD4C1F0B,0xFF30188D,0xFEF40DD8,0xF6540002,0x71FC25C5,0x1AC0882,0x1AC0882,0x1AC0882,0x1AC0882,0xFF8C04A5,0xFF8C04A5,0xFF8C04A5,0xFF7000EA,0xFF7000EA,0xF7680001,0x83FC0882,0x83FC0882,0x83FC0882,0xFF380271,0xFF380271, -0xF7280001,0xC3F80882,0xC3F80882,0xF65C0000,0xD4000884,0x83FC0882,0x83FC0882,0x83FC0882,0xFF380271,0xFF380271,0xF7280001,0xC3F80882,0xC3F80882,0xF65C0000,0xD4000884,0xC3F80882,0xC3F80882,0xF65C0000,0xD4000884,0xD4000884,0xFFA00784,0xFDA807C1,0x1AC0882,0xFF9806CD,0xFF840581,0xFF640431,0xFF640332,0xFF2801F4,0xFF940794,0xFF8406B2,0xB3FC0882,0xF65C0000, -0xB3FC0882,0x1F40022,0xFFF40012,0xFFF00005,0xFFEC0001,0xF5FC0020,0xFFEC000D,0xFFE40001,0xF9FC0020,0xFFCC0000,0xFA000020,0xF5FC0020,0xFFEC000D,0xFFE40001,0xF9FC0020,0xFFCC0000,0xFA000020,0xF9FC0020,0xFFCC0000,0xFA000020,0xFA000020,0xF5FC0020,0xFFEC000D,0xFFE40001,0xF9FC0020,0xFFCC0000,0xFA000020,0xF9FC0020,0xFFCC0000,0xFA000020,0xFA000020,0xF9FC0020, -0xFFCC0000,0xFA000020,0xFA000020,0xFA000020,0xFDF40019,0xC7FC0020,0xF3F40022,0xFFF00019,0xFFE80011,0xFFDC0009,0xFFD80000,0xFFC00000,0xFFF0001D,0xFFF00019,0xFFDC0001,0xFA000020,0xF9FC0020,0x1200A69,0x1200A69,0x1200A69,0x1200A69,0x1200A69,0x1200A69,0x1200A69,0x1200A69,0x1200A69,0x1200A69,0xFEF401A5,0xFEF401A5,0xFEF401A5,0xFEF401A5,0xFEF401A5, -0xFEF401A5,0xD8D80000,0xD8D80000,0xD8D80000,0xB4D80000,0x1AC0A69,0x1AC0A69,0x1AC0A69,0x1AC0A69,0x1AC0A69,0x1AC0A69,0xFA4C0000,0xFA4C0000,0xFA4C0000,0xB4900000,0x5BF80A69,0x5BF80A69,0x5BF80A69,0xAC000080,0x90000A69,0xF5180884,0x1200A69,0x1200A69,0xFF100620,0xFF000469,0xFEF802E4,0xFEF802E4,0xFEE40088,0xFB0807C1,0xFEF805C4,0xFEC40000,0xFA4C0000, -0x2FFC0A69,}; -static const uint32_t g_etc1_to_bc7_m6_table219[] = { -0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x1580000,0x31F80000, -0x31F80000,0x31F80000,0x31F80000,0x72000001,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xE80000,0xF80000,0xF80000,0xF80000,0x1580000,0x1EC0000,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x1780001,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000, -0x37FC0000,0x9DFC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x37FC0000,0x9DFC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0x9DFC0000,0x9DFC0000,0x9DFC0000,0xBC000000,0xBC000000,0x1940000,0x1780001,0x1780001,0x1B80000,0x3D80000,0x9FC0000,0x9FC0000,0x5DFC0000,0x1B80000,0x3D80000,0x83FC0000,0x9DFC0000, -0x83FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1983373,0xFF8C2CEA,0xFF7C279F,0xFF7425C6,0xFF7423B5,0xFF5C1C42,0xFF5C1969,0xFF40125B,0xFF380E81,0xFF300A69,0xFF5C247C,0xFF441B19,0xFF441758,0xFF200D81,0xFF140831,0xFF0001F9,0xFF081024,0xFEF00984,0xF8DC0130,0xE8E80B58,0x65FC3373,0xFF4029BE,0xFF3425C5,0xFF0819E1,0xFEF01301,0xFECC0A69,0xFED81984,0xFEA00EBB,0xFE640002,0xE8740B59,0xB3FC3373, -0xFE6025C5,0xF6000C55,0xE00010E1,0xCA003375,0xFF802BE9,0xFF8C3087,0xFF8C323F,0xFF70239D,0xFF4C19E6,0xFF2C1017,0xFF240BD8,0xFF040545,0xFF782B2A,0xFF5C221F,0xFF100FAE,0xFE640002,0x9FFC3373,0x1D00B5B,0xFFC40A0B,0xFFC008EE,0xFFBC0882,0xFFBC0809,0xFFA4061E,0xFFA4053D,0xFF9402EB,0xFF88019A,0xFF780001,0xBDFC0B58,0xFFAC095D,0xFF9C0882,0xFF7C0556,0xFF640335, -0xFF380001,0xDFF80B58,0xFF340882,0xFE6C0000,0xE8000B58,0xBDFC0B58,0xFFAC095D,0xFF9C0882,0xFF7C0556,0xFF640335,0xFF380001,0xDFF80B58,0xFF340882,0xFE6C0000,0xE8000B58,0xDFF80B58,0xFF340882,0xFE6C0000,0xE8000B58,0xE8000B58,0xFDCC0A58,0xFFCC0AB3,0xFFCC0B13,0xFFB40941,0xFFA407A1,0xFF7C0571,0xFF7C03E8,0xFF4802D0,0xFFC40A5B,0xFFB00926,0xFF700892,0xFE6C0000, -0xD7FC0B58,0x17425C6,0x17425C6,0x17425C6,0x17425C6,0xFF5C1969,0xFF5C1969,0xFF5C1969,0xFF380E81,0xFF380E81,0xFF300A69,0xFF441758,0xFF441758,0xFF441758,0xFF140831,0xFF140831,0xFF0001F9,0xFEF00984,0xFEF00984,0xF4E00104,0xDCE80884,0x31FC25C5,0x31FC25C5,0x31FC25C5,0xFEF01301,0xFEF01301,0xFECC0A69,0xFEA00EBB,0xFEA00EBB,0xFE640002,0xDC800884,0x9BF825C5, -0x9BF825C5,0xF2000BD5,0xD6000BF4,0xBA0025C5,0xFF682010,0xFF6C2345,0x17425C6,0xFF541A99,0xFF401434,0xFF2C0E5E,0xFF240BD8,0xFF040545,0xFF501F9B,0xFF4419D4,0xFEFC0F78,0xFE640002,0x7FFC25C5,0x1BC0882,0x1BC0882,0x1BC0882,0x1BC0882,0xFFA4053D,0xFFA4053D,0xFFA4053D,0xFF88019A,0xFF88019A,0xFF780001,0x9BFC0882,0x9BFC0882,0x9BFC0882,0xFF640335,0xFF640335, -0xFF380001,0xCFF80882,0xCFF80882,0xFE6C0000,0xDC000884,0x9BFC0882,0x9BFC0882,0x9BFC0882,0xFF640335,0xFF640335,0xFF380001,0xCFF80882,0xCFF80882,0xFE6C0000,0xDC000884,0xCFF80882,0xCFF80882,0xFE6C0000,0xDC000884,0xDC000884,0xFBB407C1,0xF5B80802,0x1BC0882,0xFFA4071A,0xFF9805F5,0xFF7C04C8,0xFF7C03E8,0xFF4802D0,0xFDB007C1,0xFF9806F5,0xC1FC0882,0xFE6C0000, -0xC1FC0882,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1300A69,0x1300A69,0x1300A69,0x1300A69,0x1300A69,0x1300A69,0x1300A69,0x1300A69,0x1300A69,0x1300A69,0xFF0001F9,0xFF0001F9,0xFF0001F9,0xFF0001F9,0xFF0001F9, -0xFF0001F9,0xE0E80000,0xE0E80000,0xE0E80000,0xBCE80000,0x1C40A69,0x1C40A69,0x1C40A69,0x1C40A69,0x1C40A69,0x1C40A69,0xFE640002,0xFE640002,0xFE640002,0xBCA00000,0x67F80A69,0x67F80A69,0x67F80A69,0xB8000050,0x98000A69,0xFD280884,0x1300A69,0x1300A69,0xFF200659,0xFF1404BD,0xFF080340,0xFF080340,0xFEF400CD,0xFF1007E9,0xFD100622,0xFED8000A,0xFE640002, -0x3FF80A69,}; -static const uint32_t g_etc1_to_bc7_m6_table220[] = { -0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0x1740000,0x1740000,0x1740000,0x1740000,0x1740000,0x1740000,0x1740000,0x1740000,0x1740000,0x1740000,0x3FF80000, -0x3FF80000,0x3FF80000,0x3FF80000,0x7C000000,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xF80001,0xD080000,0xD080000,0xD080000,0x1740000,0xBFC0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000, -0x53FC0000,0xABF80000,0xABF80000,0xABF80000,0xC4000001,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0x53FC0000,0xABF80000,0xABF80000,0xABF80000,0xC4000001,0xABF80000,0xABF80000,0xABF80000,0xC4000001,0xC4000001,0xBA40000,0x18C0000,0x18C0000,0x1CC0000,0x1F00000,0x2BFC0000,0x2BFC0000,0x73FC0000,0x1CC0000,0x1F00000,0x95FC0000,0xABF80000, -0x95FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1A02C8F,0xFF8C2726,0xFF8822B7,0xFF80212E,0xFF801F75,0xFF68194A,0xFF6816D9,0xFF5810F7,0xFF4C0DDA,0xFF440A69,0xFF681F8A,0xFF5017A7,0xFF50145E,0xFF380C25,0xFF2C07B9,0xFF180271,0xFF200D42,0xFF080792,0xFAF8009A,0xEAF8087A,0x71FC2C8F,0xFF58247E,0xFF44212D,0xFF201735,0xFF081185,0xFEE80A69,0xFEF015BA,0xFEB80C99,0xFE880020,0xEA900879,0xB9FC2C8F, -0xFE84212D,0xF8000AED,0xE6000C45,0xD0002C91,0xFF8C262B,0xF79C2A69,0xF9A02B7E,0xFF701F1D,0xFF5816D7,0xFF3C0E7E,0xFF3C0AF6,0xFF1404FA,0xFF8025C2,0xFF681E1D,0xFF1C0D7A,0xFE880020,0xA7FC2C8F,0x1D80876,0xFFD00776,0xFFC806AE,0xFFC40659,0xFFC405E2,0xFFB00491,0xFFB003E8,0xFFA00226,0xFF940131,0xFF8C0000,0xC7FC0876,0xFFB806FA,0xFFA80659,0xFF8803F3,0xFF700262, -0xFF540000,0xE3FC0876,0xFF4C0659,0xFEA00000,0xEA000879,0xC7FC0876,0xFFB806FA,0xFFA80659,0xFF8803F3,0xFF700262,0xFF540000,0xE3FC0876,0xFF4C0659,0xFEA00000,0xEA000879,0xE3FC0876,0xFF4C0659,0xFEA00000,0xEA000879,0xEA000879,0xFFD0079D,0xFFCC0822,0xF5D80851,0xFFC006D9,0xFFAC05A1,0xFFA0042C,0xFF8C02F2,0xFF640209,0xFFD007BE,0xFFC006B9,0xFF840669,0xFEA00000, -0xDDF80876,0x180212E,0x180212E,0x180212E,0x180212E,0xFF6816D9,0xFF6816D9,0xFF6816D9,0xFF4C0DDA,0xFF4C0DDA,0xFF440A69,0xFF50145E,0xFF50145E,0xFF50145E,0xFF2C07B9,0xFF2C07B9,0xFF180271,0xFF080792,0xFF080792,0xF8F4007E,0xE2F8065A,0x43FC212D,0x43FC212D,0x43FC212D,0xFF081185,0xFF081185,0xFEE80A69,0xFEB80C99,0xFEB80C99,0xFE880020,0xE294065A,0xA3FC212D, -0xA3FC212D,0xF8000AC9,0xDC0008A6,0xC000212D,0xFF741C62,0xF9801F46,0x180212E,0xFF5C17A3,0xFF541226,0xFF3C0CEE,0xFF3C0AF6,0xFF1404FA,0xFF641BFE,0xFF5016B4,0xFF1C0D49,0xFE880020,0x8BFC212D,0x1C40659,0x1C40659,0x1C40659,0x1C40659,0xFFB003E8,0xFFB003E8,0xFFB003E8,0xFF940131,0xFF940131,0xFF8C0000,0xA9FC0659,0xA9FC0659,0xA9FC0659,0xFF700262,0xFF700262, -0xFF540000,0xD5F80659,0xD5F80659,0xFEA00000,0xE2000659,0xA9FC0659,0xA9FC0659,0xA9FC0659,0xFF700262,0xFF700262,0xFF540000,0xD5F80659,0xD5F80659,0xFEA00000,0xE2000659,0xD5F80659,0xD5F80659,0xFEA00000,0xE2000659,0xE2000659,0xFFBC05B4,0xFBC405E9,0x1C40659,0xFFB4054A,0xFFAC0480,0xFF9403B5,0xFF8C02F2,0xFF640209,0xFFB405BA,0xFFA4053D,0xC9FC0659,0xFEA00000, -0xC9FC0659,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1440A69,0x1440A69,0x1440A69,0x1440A69,0x1440A69,0x1440A69,0x1440A69,0x1440A69,0x1440A69,0x1440A69,0xFF180271,0xFF180271,0xFF180271,0xFF180271,0xFF180271, -0xFF180271,0xE8FC0001,0xE8FC0001,0xE8FC0001,0xC4F80002,0x1E00A69,0x1E00A69,0x1E00A69,0x1E00A69,0x1E00A69,0x1E00A69,0xFE880020,0xFE880020,0xFE880020,0xC4B40001,0x73FC0A69,0x73FC0A69,0x73FC0A69,0xC200002D,0xA0000A69,0xF73C08C5,0x1440A69,0x1440A69,0xFF2C06B2,0xFF280521,0xFF1803BA,0xFF1803BA,0xFF080131,0xF9300841,0xFF200665,0xFEF0002D,0xFE880020, -0x4FFC0A69,}; -static const uint32_t g_etc1_to_bc7_m6_table221[] = { -0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x18C0000,0x4BF80000, -0x4BF80000,0x4BF80000,0x4BF80000,0x84000000,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x1080001,0x11C0000,0x11C0000,0x11C0000,0x18C0000,0x1BFC0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x19C0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000, -0x6BFC0000,0xB7F80000,0xB7F80000,0xB7F80000,0xCC000001,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0x6BFC0000,0xB7F80000,0xB7F80000,0xB7F80000,0xCC000001,0xB7F80000,0xB7F80000,0xB7F80000,0xCC000001,0xCC000001,0x1B80000,0x19C0000,0x19C0000,0x1E00000,0x11FC0000,0x49FC0000,0x49FC0000,0x87FC0000,0x1E00000,0x11FC0000,0xA3FC0000,0xB7F80000, -0xA3FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1A826F7,0xFF982262,0xFF901EC2,0xFF8C1D72,0xFF8C1C09,0xFF7416DE,0xFF7414CD,0xFF640FDB,0xFF580D4E,0xFF540A69,0xFF741B8A,0xFF6814D7,0xFF5C120E,0xFF400B3C,0xFF380755,0xFF2402E9,0xFF2C0B0A,0xFF200632,0xFD08003A,0xEF080642,0x7DFC26F7,0xFF64202E,0xFF541D72,0xFF2C1515,0xFF201055,0xFF000A69,0xFEFC12C2,0xFED80B12,0xFEA00050,0xEEA00642,0xBFFC26F7, -0xFEA01D72,0xFE000A6D,0xE60008D5,0xD40026F9,0xFFA021AB,0xFBA42501,0xFDA825FE,0xFF801B4F,0xFF6C146F,0xFF540D63,0xFF3C0A46,0xFF2804D2,0xFF9020F8,0xFF741A72,0xFF300B96,0xFEA00050,0xAFFC26F7,0x1DC0642,0xFFD4058D,0xFFCC04F1,0xFFCC04B1,0xFFC40462,0xFFBC035D,0xFFBC02E4,0xFFAC0192,0xFFA000E5,0xFF9C0000,0xCFFC0641,0xFFB8052A,0xFFB404B1,0xFFA002E3,0xFF8801BA, -0xFF6C0000,0xE7FC0641,0xFF6404B1,0xFED40000,0xEE000641,0xCFFC0641,0xFFB8052A,0xFFB404B1,0xFFA002E3,0xFF8801BA,0xFF6C0000,0xE7FC0641,0xFF6404B1,0xFED40000,0xEE000641,0xE7FC0641,0xFF6404B1,0xFED40000,0xEE000641,0xEE000641,0xFFD805A2,0xF7DC0600,0xF7DC0621,0xFFC4051A,0xFFC0042D,0xFFA80302,0xFFA0022D,0xFF740184,0xFFD005AE,0xFFC804E1,0xFF9404BA,0xFED40000, -0xE1FC0641,0x18C1D72,0x18C1D72,0x18C1D72,0x18C1D72,0xFF7414CD,0xFF7414CD,0xFF7414CD,0xFF580D4E,0xFF580D4E,0xFF540A69,0xFF5C120E,0xFF5C120E,0xFF5C120E,0xFF380755,0xFF380755,0xFF2402E9,0xFF200632,0xFF200632,0xFB040032,0xE70804B2,0x53FC1D72,0x53FC1D72,0x53FC1D72,0xFF201055,0xFF201055,0xFF000A69,0xFED80B12,0xFED80B12,0xFEA00050,0xE6A804B2,0xABF81D72, -0xABF81D72,0xFC000A6D,0xE0000631,0xC4001D75,0xFF841959,0xFD881BB6,0x18C1D72,0xFF701549,0xFF581073,0xFF4C0C25,0xFF3C0A46,0xFF2804D2,0xFF7818E3,0xFF5C14A8,0xFF300B72,0xFEA00050,0x95FC1D72,0x1CC04B1,0x1CC04B1,0x1CC04B1,0x1CC04B1,0xFFBC02E4,0xFFBC02E4,0xFFBC02E4,0xFFA000E5,0xFFA000E5,0xFF9C0000,0xB5FC04B1,0xB5FC04B1,0xB5FC04B1,0xFF8801BA,0xFF8801BA, -0xFF6C0000,0xDBF804B1,0xDBF804B1,0xFED40000,0xE60004B1,0xB5FC04B1,0xB5FC04B1,0xB5FC04B1,0xFF8801BA,0xFF8801BA,0xFF6C0000,0xDBF804B1,0xDBF804B1,0xFED40000,0xE60004B1,0xDBF804B1,0xDBF804B1,0xFED40000,0xE60004B1,0xE60004B1,0xF7C80451,0xFFCC0451,0x1CC04B1,0xFDC003F5,0xFFAC0340,0xFFA002A8,0xFFA0022D,0xFF740184,0xF9C40451,0xFFB003E8,0xD1FC04B1,0xFED40000, -0xD1FC04B1,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1540A69,0x1540A69,0x1540A69,0x1540A69,0x1540A69,0x1540A69,0x1540A69,0x1540A69,0x1540A69,0x1540A69,0xFF2402E9,0xFF2402E9,0xFF2402E9,0xFF2402E9,0xFF2402E9, -0xFF2402E9,0xF10C0001,0xF10C0001,0xF10C0001,0xCD080002,0x1F80A69,0x1F80A69,0x1F80A69,0x1F80A69,0x1F80A69,0x1F80A69,0xFEA00050,0xFEA00050,0xFEA00050,0xCCC40001,0x7FFC0A69,0x7FFC0A69,0x7FFC0A69,0xCA000012,0xA8000A69,0xFF4C08C5,0x1540A69,0x1540A69,0xFF3C06F5,0xFF34057A,0xFF34042A,0xFF34042A,0xFF1C0195,0xFF3C0845,0xFF34069A,0xFEFC007A,0xFEA00050, -0x5FF80A69,}; -static const uint32_t g_etc1_to_bc7_m6_table222[] = { -0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x1A40000,0x57F80000, -0x57F80000,0x57F80000,0x57F80000,0x8C000000,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x1180001,0x12C0000,0x12C0000,0x12C0000,0x1A40000,0x29FC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x1AC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000, -0x83FC0000,0xC3F80000,0xC3F80000,0xC3F80000,0xD4000001,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0x83FC0000,0xC3F80000,0xC3F80000,0xC3F80000,0xD4000001,0xC3F80000,0xC3F80000,0xC3F80000,0xD4000001,0xD4000001,0x1C80000,0x1AC0000,0x1AC0000,0x3F00000,0x37FC0000,0x67FC0000,0x67FC0000,0x9BFC0000,0x3F00000,0x37FC0000,0xB3FC0000,0xC3F80000, -0xB3FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1B021DF,0xFFA41E16,0xFF9C1B22,0xFF981A0E,0xFF9818ED,0xFF8014AA,0xFF8012F1,0xFF700EDF,0xFF700CCE,0xFF640A69,0xFF8017F2,0xFF741257,0xFF681006,0xFF580A54,0xFF4C0723,0xFF3C0361,0xFF380932,0xFF2C0506,0xFF18000A,0xF1180462,0x89FC21DF,0xFF701C46,0xFF641A0D,0xFF381345,0xFF2C0F41,0xFF180A69,0xFF14101A,0xFEF009AA,0xFEB800A0,0xF0B80461,0xC5FC21DF, -0xFEC01A0D,0xFE200A69,0xEC0005DD,0xD80021E1,0xFFA01D2B,0xFFAC2019,0xFFAC2106,0xFF881837,0xFF78124A,0xFF5C0C29,0xFF5009B3,0xFF4004BB,0xFF941CEE,0xFF801747,0xFF3C0A2E,0xFEB800A0,0xB7FC21DF,0x1E40462,0xFFDC03DA,0xFFD80371,0xFFD40349,0xFFD00306,0xFFC80259,0xFFC401F9,0xFFB80116,0xFFB8009D,0xFFAC0000,0xD5FC0461,0xFFCC03A1,0xFFC00349,0xFFAC01FB,0xFFA00132, -0xFF840000,0xEBF80461,0xFF7C0349,0xFF040000,0xF0000461,0xD5FC0461,0xFFCC03A1,0xFFC00349,0xFFAC01FB,0xFFA00132,0xFF840000,0xEBF80461,0xFF7C0349,0xFF040000,0xF0000461,0xEBF80461,0xFF7C0349,0xFF040000,0xF0000461,0xF0000461,0xFFD803F2,0xF9E0042C,0xF9E00449,0xFFD40382,0xFFC802E2,0xFFB80212,0xFFA80172,0xFF8C0112,0xFFDC03F8,0xFFC80371,0xFFA40352,0xFF040000, -0xE5FC0461,0x1981A0E,0x1981A0E,0x1981A0E,0x1981A0E,0xFF8012F1,0xFF8012F1,0xFF8012F1,0xFF700CCE,0xFF700CCE,0xFF640A69,0xFF681006,0xFF681006,0xFF681006,0xFF4C0723,0xFF4C0723,0xFF3C0361,0xFF2C0506,0xFF2C0506,0xFD180009,0xEB18034A,0x63FC1A0D,0x63FC1A0D,0x63FC1A0D,0xFF2C0F41,0xFF2C0F41,0xFF180A69,0xFEF009AA,0xFEF009AA,0xFEB800A0,0xEABC034A,0xB3F81A0D, -0xB3F81A0D,0xFE200A69,0xE6000425,0xCA001A0D,0xFF841689,0xFF8C18AA,0x1981A0E,0xFF801316,0xFF6C0EEB,0xFF5C0B29,0xFF5009B3,0xFF4004BB,0xFF80163E,0xFF741269,0xFF3C0A0A,0xFEB800A0,0x9FF81A0D,0x1D40349,0x1D40349,0x1D40349,0x1D40349,0xFFC401F9,0xFFC401F9,0xFFC401F9,0xFFB8009D,0xFFB8009D,0xFFAC0000,0xC1FC0349,0xC1FC0349,0xC1FC0349,0xFFA00132,0xFFA00132, -0xFF840000,0xE1F80349,0xE1F80349,0xFF040000,0xEA000349,0xC1FC0349,0xC1FC0349,0xC1FC0349,0xFFA00132,0xFFA00132,0xFF840000,0xE1F80349,0xE1F80349,0xFF040000,0xEA000349,0xE1F80349,0xE1F80349,0xFF040000,0xEA000349,0xEA000349,0xFBD002F9,0xFFCC0311,0x1D40349,0xFFC402B9,0xFFC00244,0xFFB001D4,0xFFA80172,0xFF8C0112,0xFDCC02F9,0xFFC402AD,0xD9FC0349,0xFF040000, -0xD9FC0349,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1640A69,0x1640A69,0x1640A69,0x1640A69,0x1640A69,0x1640A69,0x1640A69,0x1640A69,0x1640A69,0x1640A69,0xFF3C0361,0xFF3C0361,0xFF3C0361,0xFF3C0361,0xFF3C0361, -0xFF3C0361,0xF91C0001,0xF91C0001,0xF91C0001,0xD5180002,0x13FC0A69,0x13FC0A69,0x13FC0A69,0x13FC0A69,0x13FC0A69,0x13FC0A69,0xFEB800A0,0xFEB800A0,0xFEB800A0,0xD4D40001,0x8BFC0A69,0x8BFC0A69,0x8BFC0A69,0xD4000005,0xB0000A69,0xF75C090A,0x1640A69,0x1640A69,0xFF4C073A,0xFF5005E9,0xFF440492,0xFF440492,0xFF300209,0xFB500884,0xFD4C0708,0xFF1800CD,0xFEB800A0, -0x6DFC0A69,}; -static const uint32_t g_etc1_to_bc7_m6_table223[] = { -0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x61FC0000, -0x61FC0000,0x61FC0000,0x61FC0000,0x94000000,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x1280001,0x73C0000,0x73C0000,0x73C0000,0x1BC0000,0x39FC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x1BC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000, -0x9BFC0000,0xCFF80000,0xCFF80000,0xCFF80000,0xDC000001,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0x9BFC0000,0xCFF80000,0xCFF80000,0xCFF80000,0xDC000001,0xCFF80000,0xCFF80000,0xCFF80000,0xDC000001,0xDC000001,0x5D80000,0x1BC0000,0x1BC0000,0x17FC0000,0x5FFC0000,0x85FC0000,0x85FC0000,0xAFFC0000,0x17FC0000,0x5FFC0000,0xC1FC0000,0xCFF80000, -0xC1FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1B81D47,0xFFB01A42,0xFFA017DF,0xFFA016FE,0xFF98161D,0xFF9012AA,0xFF8C1145,0xFF7C0E03,0xFF7C0C4A,0xFF740A69,0xFF8C14C2,0xFF801027,0xFF740E46,0xFF640994,0xFF5806FB,0xFF5003DA,0xFF4C07C3,0xFF380432,0xFF2C0002,0xF32802DA,0x95FC1D47,0xFF7C18C6,0xFF7416FD,0xFF581187,0xFF400E66,0xFF300A69,0xFF2C0DD2,0xFF080882,0xFED80112,0xF4CC02D9,0xCBFC1D47, -0xFEE416FD,0xFE540A69,0xF000039A,0xDC001D49,0xFFA8198F,0xFFAC1C09,0xF5B81CBF,0xFF981527,0xFF801057,0xFF700B37,0xFF680929,0xFF4804DA,0xFFA01957,0xFF90147F,0xFF50092A,0xFED80112,0xBFF81D47,0x1E802D6,0xFFE00289,0xFFE0023E,0xFFDC0221,0xFFDC01F2,0xFFD00185,0xFFD00145,0xFFCC00C0,0xFFC00068,0xFFBC0000,0xDFFC02D6,0xFFD8025D,0xFFCC0221,0xFFB80143,0xFFAC00C2, -0xFF9C0000,0xEFFC02D6,0xFF940221,0xFF340000,0xF20002D9,0xDFFC02D6,0xFFD8025D,0xFFCC0221,0xFFB80143,0xFFAC00C2,0xFF9C0000,0xEFFC02D6,0xFF940221,0xFF340000,0xF20002D9,0xEFFC02D6,0xFF940221,0xFF340000,0xF20002D9,0xF20002D9,0xFFE40296,0xFDE802AC,0xFDE802C1,0xFFDC0238,0xFFD401E1,0xFFB80162,0xFFB800F5,0xFFA000A9,0xFFDC0298,0xFFDC0233,0xFFB40225,0xFF340000, -0xEBFC02D6,0x1A016FE,0x1A016FE,0x1A016FE,0x1A016FE,0xFF8C1145,0xFF8C1145,0xFF8C1145,0xFF7C0C4A,0xFF7C0C4A,0xFF740A69,0xFF740E46,0xFF740E46,0xFF740E46,0xFF5806FB,0xFF5806FB,0xFF5003DA,0xFF380432,0xFF380432,0xFF2C0002,0xEF280222,0x75FC16FD,0x75FC16FD,0x75FC16FD,0xFF400E66,0xFF400E66,0xFF300A69,0xFF080882,0xFF080882,0xFED80112,0xEED00222,0xBBFC16FD, -0xBBFC16FD,0xFE540A69,0xEC000289,0xD00016FD,0xFF901433,0xF9A015ED,0x1A016FE,0xFF88111E,0xFF800DB3,0xFF680A6E,0xFF680929,0xFF4804DA,0xFF9013B8,0xFF8010A6,0xFF500911,0xFED80112,0xA9FC16FD,0x1DC0221,0x1DC0221,0x1DC0221,0x1DC0221,0xFFD00145,0xFFD00145,0xFFD00145,0xFFC00068,0xFFC00068,0xFFBC0000,0xCDFC0221,0xCDFC0221,0xCDFC0221,0xFFAC00C2,0xFFAC00C2, -0xFF9C0000,0xE7F80221,0xE7F80221,0xFF340000,0xEE000221,0xCDFC0221,0xCDFC0221,0xCDFC0221,0xFFAC00C2,0xFFAC00C2,0xFF9C0000,0xE7F80221,0xE7F80221,0xFF340000,0xEE000221,0xE7F80221,0xE7F80221,0xFF340000,0xEE000221,0xEE000221,0xFFD801E1,0xF7DC0200,0x1DC0221,0xFFD401C2,0xFFC80179,0xFFB80131,0xFFB800F5,0xFFA000A9,0xFFD001ED,0xFFC801BD,0xDFFC0221,0xFF340000, -0xDFFC0221,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1740A69,0x1740A69,0x1740A69,0x1740A69,0x1740A69,0x1740A69,0x1740A69,0x1740A69,0x1740A69,0x1740A69,0xFF5003DA,0xFF5003DA,0xFF5003DA,0xFF5003DA,0xFF5003DA, -0xFF5003DA,0xFF2C0002,0xFF2C0002,0xFF2C0002,0xDD280002,0x2BFC0A69,0x2BFC0A69,0x2BFC0A69,0x2BFC0A69,0x2BFC0A69,0x2BFC0A69,0xFED80112,0xFED80112,0xFED80112,0xDCE40001,0x97FC0A69,0x97FC0A69,0x97FC0A69,0xDC040001,0xB8000A69,0xFF6C090A,0x1740A69,0x1740A69,0xFF680782,0xFF580652,0xFF540502,0xFF540502,0xFF40028A,0xFF5808B4,0xFF500750,0xFF300132,0xFED80112, -0x7DF80A69,}; -static const uint32_t g_etc1_to_bc7_m6_table224[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x5C0001,0x5C0001,0x5C0001,0x5C0001,0x8C0000,0x8C0000,0x8C0000,0x1180000,0x1180000,0x2E000000,0x8C0000,0x8C0000,0x8C0000,0x1180000,0x1180000,0x2E000000,0x1180000,0x1180000,0x2E000000,0x2E000000,0x8C0000,0x8C0000,0x8C0000,0x1180000,0x1180000,0x2E000000,0x1180000,0x1180000,0x2E000000,0x2E000000,0x1180000, -0x1180000,0x2E000000,0x2E000000,0x2E000000,0x6C0000,0x640000,0x5C0001,0x800000,0x9C0000,0xC80000,0xE40000,0x15C0000,0x2740000,0x8C0000,0xC80000,0x2E000000,0xC80000,0x16C0001,0x25FC0000,0x95F80000,0xB6000000,0x25FC0000,0x95F80000,0xB6000000,0x95F80000,0xB6000000,0xB6000000,0x25FC0000,0x95F80000,0xB6000000,0x95F80000,0xB6000000, -0xB6000000,0x95F80000,0xB6000000,0xB6000000,0xB6000000,0x25FC0000,0x95F80000,0xB6000000,0x95F80000,0xB6000000,0xB6000000,0x95F80000,0xB6000000,0xB6000000,0xB6000000,0x95F80000,0xB6000000,0xB6000000,0xB6000000,0xB6000000,0x1CC0000,0xB840000,0xB840000,0x4FFC0000,0x87F80000,0xA5F80000,0xB6000000,0xB6000000,0x3F00000,0x67FC0000,0xB3D80000,0xB6000000, -0x79FC0000,0x7457CA,0xFE442041,0xFE2005E5,0xB62005C1,0xFE182962,0xF4000585,0xB4000094,0xAA002420,0x96000F64,0x72002420,0xEA004691,0xCC001B02,0xA2000CAD,0x98002F04,0x88001816,0x6E002950,0x72004691,0x72002CBD,0x5C0036EC,0x4C004691,0xA857CA,0xAE002A47,0x92001711,0x860037C0,0x80001F6C,0x68002DF0,0x68004B75,0x6C0031E9,0x56003AAD,0x480048F9,0x15857CA, -0x5C003E8D,0x500043F0,0x44004F85,0x380057CA,0xFE3438A1,0xFA644CC8,0xFE6C4B89,0xFE001F2A,0xE200192D,0xAC001816,0x92001141,0x82001EEA,0xFE1837C5,0xFC002039,0x86002331,0x56003AAD,0xF057CA,0x984692,0xFE5C1A55,0xFE300461,0xB6300451,0xFE302822,0xF4000585,0xB4000094,0xAA002420,0x96000F64,0x72002420,0xE44691,0xCC001B02,0xA2000CAD,0x98002F04,0x88001816, -0x6E002950,0x1D04691,0x72002CBD,0x5C0036EC,0x4C004691,0xE44691,0xCC001B02,0xA2000CAD,0x98002F04,0x88001816,0x6E002950,0x1D04691,0x72002CBD,0x5C0036EC,0x4C004691,0x1D04691,0x72002CBD,0x5C0036EC,0x4C004691,0x4C004691,0xFE503353,0xFC883FC2,0xFE8C3D0E,0xFE001F2A,0xE200192D,0xAC001816,0x92001141,0x82001EEA,0xFE3433C8,0xFC001F39,0x860022F1,0x5C0036EC, -0x1484691,0x2005C1,0x2005C1,0x2005C1,0x2005C1,0x8E000000,0x8E000000,0x8E000000,0x44000001,0x44000001,0x2E000000,0x44000451,0x44000451,0x44000451,0x3400019A,0x3400019A,0x280000CD,0x22000451,0x22000451,0x200002A8,0x16000451,0x3005C1,0x3005C1,0x3005C1,0x2E0002A3,0x2E0002A3,0x28000176,0x1E0004C1,0x1E0004C1,0x1C000311,0x1400048E,0x5C05C1, -0x5C05C1,0x16000402,0x1000051E,0xE0005C2,0xF6000171,0xF21402AC,0x2005C1,0x900001D0,0x5A0001A8,0x440001A8,0x40000161,0x2E0001F9,0x9600030A,0x66000286,0x2A00045A,0x1C000311,0x4005C1,0x300451,0x300451,0x300451,0x300451,0x8E000000,0x8E000000,0x8E000000,0x44000001,0x44000001,0x2E000000,0x440451,0x440451,0x440451,0x3400019A,0x3400019A, -0x280000CD,0x880451,0x880451,0x200002A8,0x16000451,0x440451,0x440451,0x440451,0x3400019A,0x3400019A,0x280000CD,0x880451,0x880451,0x200002A8,0x16000451,0x880451,0x880451,0x200002A8,0x16000451,0x16000451,0xF6000171,0xF8200200,0x300451,0x900001D0,0x5A0001A8,0x440001A8,0x40000161,0x2E0001F9,0x960002B9,0x6C00025D,0x600451,0x200002A8, -0x600451,0xE42422,0xFE980C49,0xF8600001,0xB65C0001,0x1542420,0xF4000585,0xB4000094,0x2FFC2420,0x96000F64,0x72002420,0x1542420,0xF4000585,0xB4000094,0x2FFC2420,0x96000F64,0x72002420,0x2FFC2420,0x96000F64,0x72002420,0x72002420,0x1542420,0xF4000585,0xB4000094,0x2FFC2420,0x96000F64,0x72002420,0x2FFC2420,0x96000F64,0x72002420,0x72002420,0x2FFC2420, -0x96000F64,0x72002420,0x72002420,0x72002420,0xFAAC1E85,0x2F42420,0xF6DC1F81,0xFE4015A0,0xF6001009,0xBC000EF9,0x9E000AE1,0x9000133D,0xFE901E08,0xFE14145D,0xA6000988,0x72002420,0x1E82420,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table225[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x6C0001,0x6C0001,0x6C0001,0x6C0001,0xA40000,0xA40000,0xA40000,0x14C0000,0x14C0000,0x36000000,0xA40000,0xA40000,0xA40000,0x14C0000,0x14C0000,0x36000000,0x14C0000,0x14C0000,0x36000000,0x36000000,0xA40000,0xA40000,0xA40000,0x14C0000,0x14C0000,0x36000000,0x14C0000,0x14C0000,0x36000000,0x36000000,0x14C0000, -0x14C0000,0x36000000,0x36000000,0x36000000,0x800000,0x2740000,0x6C0001,0x940000,0xB80000,0xE80000,0x10C0000,0x1980000,0x2880000,0xA40000,0xE80000,0x36000000,0xE80000,0x17C0001,0x3DFC0000,0xA1F80000,0xBE000000,0x3DFC0000,0xA1F80000,0xBE000000,0xA1F80000,0xBE000000,0xBE000000,0x3DFC0000,0xA1F80000,0xBE000000,0xA1F80000,0xBE000000, -0xBE000000,0xA1F80000,0xBE000000,0xBE000000,0xBE000000,0x3DFC0000,0xA1F80000,0xBE000000,0xA1F80000,0xBE000000,0xBE000000,0xA1F80000,0xBE000000,0xBE000000,0xBE000000,0xA1F80000,0xBE000000,0xBE000000,0xBE000000,0xBE000000,0x1E00000,0x1980000,0x1980000,0x63FC0000,0x95F80000,0xAFF80000,0xBE000000,0xBE000000,0xFFC0000,0x77FC0000,0xBBE80000,0xBE000000, -0x87FC0000,0x7C5F3A,0xFE442651,0xFE280892,0xBE2407E1,0xFE242C5A,0xFA0004BD,0xBA000034,0xB6002420,0x9C000E48,0x7A002420,0xF8004B86,0xE2001C71,0xA8000DB5,0xA200306B,0x92001820,0x74002A0C,0x78004B89,0x78002F69,0x660039D9,0x50004B89,0xB45F3A,0xBA002DB7,0xA2001906,0x92003AA0,0x86002070,0x6E002F60,0x6E005129,0x72003541,0x60003DE9,0x4C004E42,0x1705F3A, -0x5C0043BD,0x56004888,0x440055C5,0x3C005F3A,0xFE503ED3,0xFE6C5368,0xFE6C52D9,0xFE142325,0xF600193D,0xBC0017ED,0x9E0010D5,0x8A001F71,0xFE183E05,0xFC002279,0x9400251B,0x60003DE9,0x1005F3A,0xA44B86,0xFE681ED9,0xFE38064D,0xBE3805E9,0xFE3C2A76,0xFA0004BD,0xBA000034,0xB6002420,0x9C000E48,0x7A002420,0xF44B86,0xE2001C71,0xA8000DB5,0xA200306B,0x92001820, -0x74002A0C,0x1F04B86,0x78002F69,0x660039D9,0x50004B89,0xF44B86,0xE2001C71,0xA8000DB5,0xA200306B,0x92001820,0x74002A0C,0x1F04B86,0x78002F69,0x660039D9,0x50004B89,0x1F04B86,0x78002F69,0x660039D9,0x50004B89,0x50004B89,0xFE64380A,0xFE8C4466,0xFE8C426E,0xFE1422C1,0xF600193D,0xBC0017ED,0x9E0010D5,0x8A001F71,0xFE443845,0xFC002179,0x940024CA,0x660039D9, -0x15C4B86,0x2407E1,0x2407E1,0x2407E1,0x2407E1,0xA6000000,0xA6000000,0xA6000000,0x50000001,0x50000001,0x36000000,0x500005E9,0x500005E9,0x500005E9,0x40000232,0x40000232,0x2E000121,0x280005E9,0x280005E9,0x260003A4,0x1A0005E9,0x3407E1,0x3407E1,0x3407E1,0x340003AB,0x340003AB,0x2E000202,0x2200067A,0x2200067A,0x20000441,0x18000635,0x6807E1, -0x6807E1,0x1C000576,0x160006F2,0x120007E2,0xF8040269,0xF61C042C,0x2407E1,0xA400028A,0x68000249,0x4C000254,0x4C0001E1,0x380002BA,0xA400042E,0x72000363,0x320005F2,0x20000441,0x4C07E1,0x3805E9,0x3805E9,0x3805E9,0x3805E9,0xA6000000,0xA6000000,0xA6000000,0x50000001,0x50000001,0x36000000,0x5005E9,0x5005E9,0x5005E9,0x40000232,0x40000232, -0x2E000121,0xA005E9,0xA005E9,0x260003A4,0x1A0005E9,0x5005E9,0x5005E9,0x5005E9,0x40000232,0x40000232,0x2E000121,0xA005E9,0xA005E9,0x260003A4,0x1A0005E9,0xA005E9,0xA005E9,0x260003A4,0x1A0005E9,0x1A0005E9,0xF8040265,0xFC280320,0x3805E9,0xA400028A,0x68000249,0x4C000254,0x4C0001E1,0x380002BA,0xB40003B5,0x72000332,0x7005E9,0x260003A4, -0x7005E9,0xF42422,0xFEB00D41,0xFE700002,0xBE6C0001,0x16C2420,0xFA0004BD,0xBA000034,0x3BFC2420,0x9C000E48,0x7A002420,0x16C2420,0xFA0004BD,0xBA000034,0x3BFC2420,0x9C000E48,0x7A002420,0x3BFC2420,0x9C000E48,0x7A002420,0x7A002420,0x16C2420,0xFA0004BD,0xBA000034,0x3BFC2420,0x9C000E48,0x7A002420,0x3BFC2420,0x9C000E48,0x7A002420,0x7A002420,0x3BFC2420, -0x9C000E48,0x7A002420,0x7A002420,0x7A002420,0xFEB41EAD,0xB042420,0xFEEC1F81,0xFE6C1661,0xF6000F79,0xBC000E29,0xA80009CD,0x94001248,0xFE981E3A,0xFE401528,0xB400084A,0x7A002420,0x7FC2420,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table226[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x7C0001,0x7C0001,0x7C0001,0x7C0001,0xBC0000,0xBC0000,0xBC0000,0x17C0000,0x17C0000,0x3E000000,0xBC0000,0xBC0000,0xBC0000,0x17C0000,0x17C0000,0x3E000000,0x17C0000,0x17C0000,0x3E000000,0x3E000000,0xBC0000,0xBC0000,0xBC0000,0x17C0000,0x17C0000,0x3E000000,0x17C0000,0x17C0000,0x3E000000,0x3E000000,0x17C0000, -0x17C0000,0x3E000000,0x3E000000,0x3E000000,0x4900000,0xA840000,0x7C0001,0xAC0000,0xD40000,0x10C0000,0x1340000,0x1D40000,0x29C0000,0xBC0000,0x10C0000,0x3E000000,0x10C0000,0x18C0001,0x55FC0000,0xADF80000,0xC6000000,0x55FC0000,0xADF80000,0xC6000000,0xADF80000,0xC6000000,0xC6000000,0x55FC0000,0xADF80000,0xC6000000,0xADF80000,0xC6000000, -0xC6000000,0xADF80000,0xC6000000,0xC6000000,0xC6000000,0x55FC0000,0xADF80000,0xC6000000,0xADF80000,0xC6000000,0xC6000000,0xADF80000,0xC6000000,0xC6000000,0xC6000000,0xADF80000,0xC6000000,0xC6000000,0xC6000000,0xC6000000,0x1F40000,0x1A80000,0x1A80000,0x77FC0000,0xA1FC0000,0xB9F80000,0xC6000000,0xC6000000,0x2FFC0000,0x89FC0000,0xC3F80000,0xC6000000, -0x97FC0000,0x84672A,0xFE502CD1,0xFE2C0BE9,0xC6280A59,0xFE303022,0xFE0404C5,0xC6000004,0xC2002420,0xA6000D61,0x82002420,0xFE0450E5,0xE8001E1D,0xB4000EED,0xAE003223,0x98001848,0x7A002AE0,0x800050D1,0x7E00325D,0x6C003CB9,0x560050D1,0xC0672A,0xC6003187,0xA2001B66,0xA2003D84,0x920021A0,0x740030F0,0x74005745,0x780038E9,0x6600415D,0x520053E2,0x188672A, -0x66004956,0x5C004D70,0x4A005C5D,0x4000672A,0xFE504513,0xFE6C5AE8,0xF67C5B12,0xFE142775,0xF60019AD,0xBC00181D,0xA60010A0,0x90002036,0xFE3444C8,0xFE0025AA,0x96002723,0x6600415D,0x114672A,0xB050D2,0xFE7423CD,0xFE4008E2,0xC64007C1,0xFE442D24,0xFE0404C1,0xC6000004,0xC2002420,0xA6000D61,0x82002420,0x30050D1,0xE8001E1D,0xB4000EED,0xAE003223,0x98001848, -0x7A002AE0,0x5FC50D1,0x7E00325D,0x6C003CB9,0x560050D1,0x30050D1,0xE8001E1D,0xB4000EED,0xAE003223,0x98001848,0x7A002AE0,0x5FC50D1,0x7E00325D,0x6C003CB9,0x560050D1,0x5FC50D1,0x7E00325D,0x6C003CB9,0x560050D1,0x560050D1,0xFE783CC9,0xF8A049A0,0xFCA84789,0xFE142711,0xF60019AD,0xBC00181D,0xA60010A0,0x90002036,0xFC583D2B,0xFE0424A1,0x960026D2,0x6C003CB9, -0x17050D1,0x280A59,0x280A59,0x280A59,0x280A59,0xBE000000,0xBE000000,0xBE000000,0x5C000001,0x5C000001,0x3E000000,0x5C0007C1,0x5C0007C1,0x5C0007C1,0x4C0002EA,0x4C0002EA,0x3A000179,0x2E0007C1,0x2E0007C1,0x2C0004C8,0x1E0007C1,0x23C0A56,0x23C0A56,0x23C0A56,0x400004DB,0x400004DB,0x340002A6,0x28000882,0x28000882,0x2600058D,0x1E000825,0x7C0A56, -0x7C0A56,0x2000074C,0x1A00092D,0x14000A56,0xFA0803B9,0xF8200600,0x280A59,0xBA00034D,0x7C000301,0x62000308,0x5600028A,0x40000385,0xC200057A,0x9000047B,0x3A0007D1,0x2600058D,0x580A56,0x4007C1,0x4007C1,0x4007C1,0x4007C1,0xBE000000,0xBE000000,0xBE000000,0x5C000001,0x5C000001,0x3E000000,0x5C07C1,0x5C07C1,0x5C07C1,0x4C0002EA,0x4C0002EA, -0x3A000179,0xB807C1,0xB807C1,0x2C0004C8,0x1E0007C1,0x5C07C1,0x5C07C1,0x5C07C1,0x4C0002EA,0x4C0002EA,0x3A000179,0xB807C1,0xB807C1,0x2C0004C8,0x1E0007C1,0xB807C1,0xB807C1,0x2C0004C8,0x1E0007C1,0x1E0007C1,0xFC0C039D,0xFE2C0488,0x4007C1,0xBA00034D,0x7C000301,0x62000308,0x5600028A,0x40000385,0xC20004EA,0x9000042A,0x8007C1,0x2C0004C8, -0x8007C1,0x1042422,0xFEC40E2A,0xFE84002D,0xC67C0001,0x1842420,0xFE0804B5,0xC6000004,0x47FC2420,0xA6000D61,0x82002420,0x1842420,0xFE0804B5,0xC6000004,0x47FC2420,0xA6000D61,0x82002420,0x47FC2420,0xA6000D61,0x82002420,0x82002420,0x1842420,0xFE0804B5,0xC6000004,0x47FC2420,0xA6000D61,0x82002420,0x47FC2420,0xA6000D61,0x82002420,0x82002420,0x47FC2420, -0xA6000D61,0x82002420,0x82002420,0x82002420,0xFED01F04,0x1182420,0xF6FC2002,0xFE6C1711,0xFE0C0F79,0xD0000D00,0xB00008C8,0x9C001174,0xFEAC1EC1,0xFE4C15DD,0xBE000734,0x82002420,0x17FC2420,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table227[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0x8C0001,0x8C0001,0x8C0001,0x8C0001,0x2D00000,0x2D00000,0x2D00000,0x1AC0000,0x1AC0000,0x46000000,0x2D00000,0x2D00000,0x2D00000,0x1AC0000,0x1AC0000,0x46000000,0x1AC0000,0x1AC0000,0x46000000,0x46000000,0x2D00000,0x2D00000,0x2D00000,0x1AC0000,0x1AC0000,0x46000000,0x1AC0000,0x1AC0000,0x46000000,0x46000000,0x1AC0000, -0x1AC0000,0x46000000,0x46000000,0x46000000,0xA40000,0x980000,0x8C0001,0xC00000,0x2EC0000,0x12C0000,0x15C0000,0x5F80000,0x2B00000,0x2D00000,0x12C0000,0x46000000,0x12C0000,0x19C0001,0x6FFC0000,0xB9F80000,0xCE000000,0x6FFC0000,0xB9F80000,0xCE000000,0xB9F80000,0xCE000000,0xCE000000,0x6FFC0000,0xB9F80000,0xCE000000,0xB9F80000,0xCE000000, -0xCE000000,0xB9F80000,0xCE000000,0xCE000000,0xCE000000,0x6FFC0000,0xB9F80000,0xCE000000,0xB9F80000,0xCE000000,0xCE000000,0xB9F80000,0xCE000000,0xCE000000,0xCE000000,0xB9F80000,0xCE000000,0xCE000000,0xCE000000,0xCE000000,0x15FC0000,0x5B80000,0x5B80000,0x8BFC0000,0xAFFC0000,0xC3F80000,0xCE000000,0xCE000000,0x4DFC0000,0x99FC0000,0xCDCC0000,0xCE000000, -0xA5FC0000,0x8C6F9A,0xFE5C3409,0xFE380FFD,0xCE300D21,0xFE303482,0xFE080585,0xD0000008,0xCE002420,0xAC000C6D,0x8A002420,0xFE0C56D6,0xF4002035,0xBA00107D,0xB40033FB,0xA200187A,0x80002BCC,0x8A005671,0x84003599,0x72003FC9,0x5C005671,0xCC6F9A,0xD20035B7,0xAE001E26,0xA20040A4,0x980022EC,0x7A0032A0,0x80005DB5,0x7E003CE1,0x6C004509,0x580059E2,0x1A06F9A, -0x6C004F2A,0x5C0052B0,0x5000636D,0x44006F9A,0xFE504C53,0xF8806322,0xFA846322,0xFE142CC5,0xFC001ADA,0xCC001898,0xAE001085,0x980020F9,0xFE344BA8,0xFE0029CA,0x9E0029C1,0x6C004509,0x1246F9A,0xB85672,0xFE802931,0xFE4C0BF6,0xCE4809D9,0xFE503080,0xFE080575,0xCE040008,0xCE002420,0xAC000C6D,0x8A002420,0x1145671,0xF4002035,0xBA00107D,0xB40033FB,0xA200187A, -0x80002BCC,0xFF85671,0x84003599,0x72003FC9,0x5C005671,0x1145671,0xF4002035,0xBA00107D,0xB40033FB,0xA200187A,0x80002BCC,0xFF85671,0x84003599,0x72003FC9,0x5C005671,0xFF85671,0x84003599,0x72003FC9,0x5C005671,0x5C005671,0xFE7841F9,0xFEAC4ECC,0xFEAC4D09,0xFE302BF5,0xFC001ADA,0xCC001898,0xAE001085,0x980020F9,0xFE5C422B,0xFE0428A1,0x9E00295D,0x72003FC9, -0x18C5671,0x300D21,0x300D21,0x300D21,0x300D21,0xD6000000,0xD6000000,0xD6000000,0x68000000,0x68000000,0x46000000,0x6A0009D9,0x6A0009D9,0x6A0009D9,0x520003BA,0x520003BA,0x400001DD,0x340009D9,0x340009D9,0x32000614,0x220009D9,0x2440D21,0x2440D21,0x2440D21,0x46000633,0x46000633,0x3A000362,0x2E000AD2,0x2E000AD2,0x2C000709,0x22000A52,0x8C0D21, -0x8C0D21,0x26000948,0x1C000B96,0x16000D22,0xFC0C0561,0xFA24082C,0x300D21,0xD000042A,0x860003D9,0x6A0003D4,0x62000334,0x4A000484,0xE4000704,0x900005AB,0x400009E9,0x2C000709,0x640D21,0x4809D9,0x4809D9,0x4809D9,0x4809D9,0xD6000000,0xD6000000,0xD6000000,0x68000000,0x68000000,0x46000000,0x6809D9,0x6809D9,0x6809D9,0x520003BA,0x520003BA, -0x400001DD,0xD009D9,0xD009D9,0x32000614,0x220009D9,0x6809D9,0x6809D9,0x6809D9,0x520003BA,0x520003BA,0x400001DD,0xD009D9,0xD009D9,0x32000614,0x220009D9,0xD009D9,0xD009D9,0x32000614,0x220009D9,0x220009D9,0xFE100521,0xF4380659,0x4809D9,0xD000042A,0x860003D9,0x6A0003D4,0x62000334,0x4A000484,0xF400063D,0x9A000551,0x9409D9,0x32000614, -0x9409D9,0x1142422,0xFED00F3A,0xFE940082,0xCE8C0001,0x19C2420,0xFE080565,0xCE080000,0x53FC2420,0xAC000C6D,0x8A002420,0x19C2420,0xFE080565,0xCE080000,0x53FC2420,0xAC000C6D,0x8A002420,0x53FC2420,0xAC000C6D,0x8A002420,0x8A002420,0x19C2420,0xFE080565,0xCE080000,0x53FC2420,0xAC000C6D,0x8A002420,0x53FC2420,0xAC000C6D,0x8A002420,0x8A002420,0x53FC2420, -0xAC000C6D,0x8A002420,0x8A002420,0x8A002420,0xF6E81F81,0x1282420,0xFF0C2002,0xFE9417C2,0xFE100FE9,0xDA000BE9,0xB40007B4,0xA800109D,0xFCD01F02,0xFE6416CD,0xC6000659,0x8A002420,0x25FC2420,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table228[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0xA00000,0xA00000,0xA00000,0xA00000,0xEC0000,0xEC0000,0xEC0000,0x1E40000,0x1E40000,0x4E000001,0xEC0000,0xEC0000,0xEC0000,0x1E40000,0x1E40000,0x4E000001,0x1E40000,0x1E40000,0x4E000001,0x4E000001,0xEC0000,0xEC0000,0xEC0000,0x1E40000,0x1E40000,0x4E000001,0x1E40000,0x1E40000,0x4E000001,0x4E000001,0x1E40000, -0x1E40000,0x4E000001,0x4E000001,0x4E000001,0x4B80000,0xCA80000,0xA00000,0xD80000,0x10C0000,0x1540000,0x1880000,0x11FC0000,0xC80000,0xEC0000,0x1540000,0x4E000001,0x1540000,0x1B00000,0x89FC0000,0xC5FC0000,0xD6000001,0x89FC0000,0xC5FC0000,0xD6000001,0xC5FC0000,0xD6000001,0xD6000001,0x89FC0000,0xC5FC0000,0xD6000001,0xC5FC0000,0xD6000001, -0xD6000001,0xC5FC0000,0xD6000001,0xD6000001,0xD6000001,0x89FC0000,0xC5FC0000,0xD6000001,0xC5FC0000,0xD6000001,0xD6000001,0xC5FC0000,0xD6000001,0xD6000001,0xD6000001,0xC5FC0000,0xD6000001,0xD6000001,0xD6000001,0xD6000001,0x41FC0000,0x1CC0000,0x1CC0000,0xA1FC0000,0xBFF80000,0xCFF40000,0xD6000001,0xD6000001,0x6FFC0000,0xADFC0000,0xD5FC0000,0xD6000001, -0xB7FC0000,0x9479B0,0xFE5C3CE5,0xFE381579,0xD63410AD,0xFE3C3A48,0xFE08077D,0xD804003A,0xDC002420,0xB8000B51,0x92002422,0xFE185E40,0xFA0022E9,0xC6001283,0xC0003629,0xAE0018E6,0x8C002CC6,0x92005D2B,0x90003993,0x78004373,0x62005D2B,0xDC79B0,0xE2003A6D,0xB40021B4,0xAE00448A,0xA200246F,0x86003492,0x8600656B,0x840041BB,0x7200496F,0x5E006114,0x1BC79B0, -0x72005634,0x66005924,0x56006BE7,0x480079B4,0xFE645556,0xFC886C60,0xFE8C6CD0,0xFE1833EE,0xFC001D3E,0xDA0018C5,0xBC00109D,0xA00021F6,0xFE345496,0xFE042F97,0xA6002D3F,0x7200496F,0x13879B0,0xC45D2C,0xFE8C2FC1,0xFE581024,0xD6500C81,0xFE5C34E6,0xFE140759,0xDA080032,0xDC002420,0xB8000B51,0x92002422,0x3245D2B,0xFA0022E9,0xC6001283,0xC0003629,0xAE0018E6, -0x8C002CC6,0x17FC5D2B,0x90003993,0x78004373,0x62005D2B,0x3245D2B,0xFA0022E9,0xC6001283,0xC0003629,0xAE0018E6,0x8C002CC6,0x17FC5D2B,0x90003993,0x78004373,0x62005D2B,0x17FC5D2B,0x90003993,0x78004373,0x62005D2B,0x62005D2B,0xFE9448B3,0xFEAC5556,0xF8C05444,0xFE4031C3,0xFC001D3E,0xDA0018C5,0xBC00109D,0xA00021F6,0xFE5C48FD,0xFE042E53,0xA6002CDB,0x78004373, -0x1A45D2B,0x3410AC,0x3410AC,0x3410AC,0x3410AC,0xF2000000,0xF2000000,0xF2000000,0x76000000,0x76000000,0x4E000001,0x78000C80,0x78000C80,0x78000C80,0x620004A9,0x620004A9,0x46000262,0x3A000C80,0x3A000C80,0x380007B5,0x26000C82,0x5010AB,0x5010AB,0x5010AB,0x4C0007F2,0x4C0007F2,0x40000453,0x34000DC1,0x34000DC1,0x320008EE,0x24000D22,0xA010AB, -0xA010AB,0x2C000BD1,0x20000EC6,0x1A0010AB,0xFE1007AA,0xFE2C0B01,0x3410AC,0xEC000562,0x9E0004E1,0x800004FD,0x6E00040D,0x560005C9,0xF40008E9,0xB2000742,0x46000C98,0x320008EE,0x7010AB,0x500C80,0x500C80,0x500C80,0x500C80,0xF2000000,0xF2000000,0xF2000000,0x76000000,0x76000000,0x4E000001,0x2740C80,0x2740C80,0x2740C80,0x620004A9,0x620004A9, -0x46000262,0xF00C80,0xF00C80,0x380007B5,0x26000C82,0x2740C80,0x2740C80,0x2740C80,0x620004A9,0x620004A9,0x46000262,0xF00C80,0xF00C80,0x380007B5,0x26000C82,0xF00C80,0xF00C80,0x380007B5,0x26000C82,0x26000C82,0xFE200745,0xF8400884,0x500C80,0xEC000562,0x9E0004E1,0x800004FD,0x6E00040D,0x560005C9,0xF4000808,0xB20006C9,0xA80C80,0x380007B5, -0xA80C80,0x1282420,0xFEE81074,0xFEAC0124,0xD6A00001,0x1B82420,0xFE2C069D,0xD61C0001,0x61F82420,0xB8000B51,0x92002422,0x1B82420,0xFE2C069D,0xD61C0001,0x61F82420,0xB8000B51,0x92002422,0x61F82420,0xB8000B51,0x92002422,0x92002422,0x1B82420,0xFE2C069D,0xD61C0001,0x61F82420,0xB8000B51,0x92002422,0x61F82420,0xB8000B51,0x92002422,0x92002422,0x61F82420, -0xB8000B51,0x92002422,0x92002422,0x92002422,0xFEF81F85,0x13C2420,0xF9202081,0xFEB018A0,0xFE281109,0xE8000B14,0xBE0006B2,0xB2000F82,0xFEDC1F22,0xFE881771,0xD4000541,0x92002422,0x37FC2420,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table229[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x1,0xB00000,0xB00000,0xB00000,0xB00000,0x1040000,0x1040000,0x1040000,0x7FC0000,0x7FC0000,0x56000001,0x1040000,0x1040000,0x1040000,0x7FC0000,0x7FC0000,0x56000001,0x7FC0000,0x7FC0000,0x56000001,0x56000001,0x1040000,0x1040000,0x1040000,0x7FC0000,0x7FC0000,0x56000001,0x7FC0000,0x7FC0000,0x56000001,0x56000001,0x7FC0000, -0x7FC0000,0x56000001,0x56000001,0x56000001,0xCC0000,0xBC0000,0xB00000,0xF00000,0x1280000,0x1740000,0x1AC0000,0x1DF40000,0xDC0000,0x1040000,0x1740000,0x56000001,0x1740000,0x1C00000,0xA3FC0000,0xD1FC0000,0xDE000001,0xA3FC0000,0xD1FC0000,0xDE000001,0xD1FC0000,0xDE000001,0xDE000001,0xA3FC0000,0xD1FC0000,0xDE000001,0xD1FC0000,0xDE000001, -0xDE000001,0xD1FC0000,0xDE000001,0xDE000001,0xDE000001,0xA3FC0000,0xD1FC0000,0xDE000001,0xD1FC0000,0xDE000001,0xDE000001,0xD1FC0000,0xDE000001,0xDE000001,0xDE000001,0xD1FC0000,0xDE000001,0xDE000001,0xDE000001,0xDE000001,0x69FC0000,0x7DC0000,0x7DC0000,0xB3FC0000,0xCBFC0000,0xD9F40000,0xDE000001,0xDE000001,0x8DFC0000,0xBDFC0000,0xDFD00000,0xDE000001, -0xC5FC0000,0x9C8330,0xFE684541,0xFE401B24,0xDE3C142D,0xFE443FEA,0xFE140A2D,0xE008009A,0xE6002422,0xC4000A69,0x9A002422,0xFE246584,0xFA002639,0xCC0014AB,0xCC003831,0xB4001956,0x92002DC2,0x9A006380,0x96003D53,0x7E0046EB,0x66006383,0xE88330,0xE8003F39,0xC0002534,0xBA00483A,0xA8002617,0x8C00366A,0x8C006CB7,0x8A00465F,0x78004D93,0x620067CB,0x1D88330, -0x78005CE4,0x6C005F04,0x5C0073EF,0x4C008334,0xFE645DA6,0xFE8C7530,0xFE8C7650,0xFE303A6D,0xFE04205E,0xDE001961,0xC40010B2,0xAA002336,0xFE445CDB,0xFE0435B7,0xB600301B,0x78004D93,0x14C8330,0xD06380,0xFE983611,0xFE641478,0xDE580F21,0xFE683962,0xFE2009C5,0xE20C007E,0xE6002422,0xC4000A69,0x9A002422,0x1346380,0xFA002639,0xCC0014AB,0xCC003831,0xB4001956, -0x92002DC2,0x1FF86380,0x96003D53,0x7E0046EB,0x66006383,0x1346380,0xFA002639,0xCC0014AB,0xCC003831,0xB4001956,0x92002DC2,0x1FF86380,0x96003D53,0x7E0046EB,0x66006383,0x1FF86380,0x96003D53,0x7E0046EB,0x66006383,0x66006383,0xFE944E93,0xFAC45BA4,0xFCC85A64,0xFE403763,0xFE04205A,0xDE001961,0xC40010B2,0xAA002336,0xFE784F21,0xFE18341D,0xB6002FA2,0x7E0046EB, -0x1B86380,0x3C142C,0x3C142C,0x3C142C,0x3C142C,0xFE000010,0xFE000010,0xFE000010,0x82000000,0x82000000,0x56000001,0x84000F20,0x84000F20,0x84000F20,0x680005A5,0x680005A5,0x4C0002EA,0x40000F20,0x40000F20,0x3E000955,0x2A000F22,0x54142B,0x54142B,0x54142B,0x580009A2,0x580009A2,0x46000543,0x3A0010A9,0x3A0010A9,0x38000ACE,0x28000FDB,0xAC142B, -0xAC142B,0x2C000E41,0x240011F3,0x1C00142B,0xFE100A3A,0xFE2C0DF1,0x3C142C,0xFA000682,0xA80005ED,0x840005EA,0x800004FD,0x56000709,0xF4000B09,0xB20008D2,0x52000F39,0x38000ACE,0x78142B,0x580F20,0x580F20,0x580F20,0x580F20,0xFE04000D,0xFE04000D,0xFE04000D,0x82000000,0x82000000,0x56000001,0x2800F20,0x2800F20,0x2800F20,0x680005A5,0x680005A5, -0x4C0002EA,0x1080F20,0x1080F20,0x3E000955,0x2A000F22,0x2800F20,0x2800F20,0x2800F20,0x680005A5,0x680005A5,0x4C0002EA,0x1080F20,0x1080F20,0x3E000955,0x2A000F22,0x1080F20,0x1080F20,0x3E000955,0x2A000F22,0x2A000F22,0xFE200965,0xFC480AB4,0x580F20,0xFA000682,0xA80005ED,0x840005EA,0x800004FD,0x56000709,0xF6040A20,0xBC000848,0xB80F20,0x3E000955, -0xB80F20,0x1382420,0xFF0011A4,0xFEB801F4,0xDEB00001,0x1D02420,0xFE4C07F9,0xDE2C0001,0x6DF82420,0xC4000A69,0x9A002422,0x1D02420,0xFE4C07F9,0xDE2C0001,0x6DF82420,0xC4000A69,0x9A002422,0x6DF82420,0xC4000A69,0x9A002422,0x9A002422,0x1D02420,0xFE4C07F9,0xDE2C0001,0x6DF82420,0xC4000A69,0x9A002422,0x6DF82420,0xC4000A69,0x9A002422,0x9A002422,0x6DF82420, -0xC4000A69,0x9A002422,0x9A002422,0x9A002422,0xFD102000,0x14C2420,0xFF2C208D,0xFEC01945,0xFE501231,0xF6000A12,0xC80005D2,0xBA000EBA,0xFEF01FA9,0xFEA01865,0xDE000465,0x9A002422,0x45FC2420,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, -0x0,0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table230[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000, -0x40000,0x80000,0x80000,0x80000,0x1,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x80000,0x80000,0x80000,0x1,0x80000,0x80000,0x80000,0x1,0x1,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x40000,0x80000,0x80000, -0x80000,0xC00000,0xC00000,0xC00000,0xC00000,0x11C0000,0x11C0000,0x11C0000,0x13FC0000,0x13FC0000,0x5E000001,0x11C0000,0x11C0000,0x11C0000,0x13FC0000,0x13FC0000,0x5E000001,0x13FC0000,0x13FC0000,0x5E000001,0x5E000001,0x11C0000,0x11C0000,0x11C0000,0x13FC0000,0x13FC0000,0x5E000001,0x13FC0000,0x13FC0000,0x5E000001,0x5E000001,0x13FC0000, -0x13FC0000,0x5E000001,0x5E000001,0x5E000001,0xE00000,0xCC0000,0xC00000,0x1040000,0x3400000,0x1980000,0x1D40000,0x27F80000,0xF00000,0x11C0000,0x1980000,0x5E000001,0x1980000,0x1D00000,0xBBFC0000,0xDDFC0000,0xE6000001,0xBBFC0000,0xDDFC0000,0xE6000001,0xDDFC0000,0xE6000001,0xE6000001,0xBBFC0000,0xDDFC0000,0xE6000001,0xDDFC0000,0xE6000001, -0xE6000001,0xDDFC0000,0xE6000001,0xE6000001,0xE6000001,0xBBFC0000,0xDDFC0000,0xE6000001,0xDDFC0000,0xE6000001,0xE6000001,0xDDFC0000,0xE6000001,0xE6000001,0xE6000001,0xDDFC0000,0xE6000001,0xE6000001,0xE6000001,0xE6000001,0x91FC0000,0xFEC0000,0xFEC0000,0xC7FC0000,0xD9FC0000,0xE3F40000,0xE6000001,0xE6000001,0xABFC0000,0xCFFC0000,0xE7E00000,0xE6000001, -0xD5FC0000,0xA48BEC,0xFE744D71,0xFE4C20D8,0xE6401785,0xFE5045F6,0xFE140D79,0xEC0C0111,0xF2042424,0xCA00098D,0xA2002426,0xFE306BE4,0xFA002945,0xD80015EB,0xD8003941,0xC00018FE,0x98002E12,0xA2006878,0x9C003FBB,0x84004933,0x6C00687B,0xF48BEC,0xEE004349,0xC6002804,0xC6004B2A,0xAE002717,0x920037A6,0x980072CF,0x960049C7,0x7E00509F,0x68006D47,0x1F08BEC, -0x7E0062AC,0x72006404,0x5C007ADF,0x52008BEC,0xFE786595,0xFE8C7DDC,0xF8A07F6D,0xFE3040E1,0xFE04237E,0xEE00192E,0xC8000FFB,0xB6002345,0xFE546487,0xFE043C07,0xB60031CB,0x7E00509F,0x15C8BEC,0xDC6878,0xFEA43B8D,0xFE701888,0xE6601145,0xFE743D82,0xFE2C0C8D,0xEA1400D9,0xF2042420,0xCA00098D,0xA2042422,0x3446878,0xFA002945,0xD80015EB,0xD8003941,0xC00018FE, -0x98002E12,0x27FC6878,0x9C003FBB,0x84004933,0x6C00687B,0x3446878,0xFA002945,0xD80015EB,0xD8003941,0xC00018FE,0x98002E12,0x27FC6878,0x9C003FBB,0x84004933,0x6C00687B,0x27FC6878,0x9C003FBB,0x84004933,0x6C00687B,0x6C00687B,0xFEA053D2,0xFECC6050,0xFECC5F74,0xFE543C9E,0xFE04237A,0xEE00192E,0xC8000FFB,0xB6002345,0xFE885438,0xFE2C3966,0xB6003152,0x84004933, -0x1D46878,0x401784,0x401784,0x401784,0x401784,0xFE0C0074,0xFE0C0074,0xFE0C0074,0x8E000004,0x8E000004,0x5E000005,0x94001142,0x94001142,0x94001142,0x74000631,0x74000631,0x5800030A,0x48001142,0x48001142,0x44000A69,0x30001142,0x601783,0x601783,0x601783,0x62000ACD,0x62000ACD,0x4C0005EB,0x4000132D,0x4000132D,0x3E000C3A,0x2E00122B,0xC41783, -0xC41783,0x3200106D,0x260014AE,0x20001783,0xFE200C8D,0xFE2C110D,0x401784,0xFA00078A,0xBC000679,0x8C000682,0x8C000559,0x6A0007B2,0xFE000D14,0xD00009C2,0x5C001166,0x3E000C3A,0x8C1783,0x601144,0x601144,0x601144,0x601144,0xFE0C0050,0xFE0C0050,0xFE0C0050,0x8C040001,0x8C040001,0x5E040001,0x901142,0x901142,0x901142,0x74000631,0x74000631, -0x5800030A,0x1241142,0x1241142,0x44000A69,0x30001142,0x901142,0x901142,0x901142,0x74000631,0x74000631,0x5800030A,0x1241142,0x1241142,0x44000A69,0x30001142,0x1241142,0x1241142,0x44000A69,0x30001142,0x30001142,0xFA340B48,0xFE4C0CA0,0x601144,0xFA00078A,0xBC000679,0x8C000682,0x8C000559,0x6A0007B2,0xFE0C0BE2,0xD0000919,0xD01142,0x44000A69, -0xD01142,0x1482420,0xFF0C12C8,0xFECC02FD,0xE6C00001,0x1E82420,0xFE640949,0xE63C0001,0x79F82420,0xCA000989,0xA2002422,0x1E82420,0xFE640949,0xE63C0001,0x79F82420,0xCA000989,0xA2002422,0x79F82420,0xCA000989,0xA2002422,0xA2002422,0x1E82420,0xFE640949,0xE63C0001,0x79F82420,0xCA000989,0xA2002422,0x79F82420,0xCA000989,0xA2002422,0xA2002422,0x79F82420, -0xCA000989,0xA2002422,0xA2002422,0xA2002422,0xFF142048,0x75C2420,0xF9402104,0xFED81A29,0xFE68133D,0xFC000928,0xD40004ED,0xC2000DFA,0xFF102000,0xFEC01919,0xE60003BA,0xA2002422,0x55FC2420,0x4,0x4,0x4,0x4,0x4,0x4,0x4,0x4,0x4,0x4,0x4000000,0x4000000,0x4000000,0x4000000,0x4000000, -0x4000000,0x2000000,0x2000000,0x2000000,0x1,0x2000002,0x2000002,0x2000002,0x2000002,0x2000002,0x2000002,0x1,0x1,0x1,0x1,0x2,0x2,0x2,0x2,0x2,0x18000000,0x4,0x4,0xA000000,0x8000000,0x6000000,0x6000000,0x4000000,0x4000001,0x2000001,0x2000000,0x1, -0x2,}; -static const uint32_t g_etc1_to_bc7_m6_table231[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x140000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000, -0x1C0000,0x380000,0x380000,0x380000,0x8000001,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x1C0000,0x380000,0x380000,0x380000,0x8000001,0x380000,0x380000,0x380000,0x8000001,0x8000001,0x2140000,0x140000,0x140000,0x180000,0x180000,0x2180000,0x2180000,0x200000,0x180000,0x180000,0x280000,0x380000, -0x280000,0xD00000,0xD00000,0xD00000,0xD00000,0x1340000,0x1340000,0x1340000,0x1FF80000,0x1FF80000,0x66000001,0x1340000,0x1340000,0x1340000,0x1FF80000,0x1FF80000,0x66000001,0x1FF80000,0x1FF80000,0x66000001,0x66000001,0x1340000,0x1340000,0x1340000,0x1FF80000,0x1FF80000,0x66000001,0x1FF80000,0x1FF80000,0x66000001,0x66000001,0x1FF80000, -0x1FF80000,0x66000001,0x66000001,0x66000001,0x2F00000,0x6DC0000,0xD00000,0x3180000,0x15C0000,0x1B80000,0x1FC0000,0x33F40000,0x1040000,0x1340000,0x1B80000,0x66000001,0x1B80000,0x1E00000,0xD3FC0000,0xE9FC0000,0xEE000001,0xD3FC0000,0xE9FC0000,0xEE000001,0xE9FC0000,0xEE000001,0xEE000001,0xD3FC0000,0xE9FC0000,0xEE000001,0xE9FC0000,0xEE000001, -0xEE000001,0xE9FC0000,0xEE000001,0xEE000001,0xEE000001,0xD3FC0000,0xE9FC0000,0xEE000001,0xE9FC0000,0xEE000001,0xEE000001,0xE9FC0000,0xEE000001,0xEE000001,0xEE000001,0xE9FC0000,0xEE000001,0xEE000001,0xEE000001,0xEE000001,0xB7FC0000,0x17FC0000,0x17FC0000,0xDBFC0000,0xE7F80000,0xEDF40000,0xEE000001,0xEE000001,0xC9FC0000,0xE1FC0000,0xEFF00000,0xEE000001, -0xE3FC0000,0xB09144,0xFE805385,0xFE582588,0xEE4C19B9,0xFE5C4AD6,0xFE201109,0xF6140195,0xFC0C2454,0xD6000915,0xAA082456,0xFE3C6E48,0xFE08296D,0xE200142D,0xE80036E6,0xC60015E6,0xA2002BFB,0xAE006878,0xA6003DB5,0x90004763,0x7400687B,0x1049144,0xFA004419,0xD2002844,0xD2004B5A,0xBA00258F,0x9E0036E6,0xA2007431,0x9C0048BB,0x8A004FCF,0x6E006E1B,0x7FC9144, -0x840064EC,0x780065C4,0x66007DB4,0x58009144,0xFE786AC5,0xFAA48366,0xFEAC8499,0xFE40451B,0xFE04257E,0xFC0015D9,0xD2000D46,0xB6002035,0xFE5C697E,0xFE184056,0xBE002F7B,0x8A004FCF,0x1749144,0xEC6878,0xFEB03D85,0xFE7C1A74,0xEE701145,0xFE8C3F52,0xFE380EE1,0xF22400D9,0xFA142420,0xD6000915,0xAA142422,0x35C6878,0xFE08295D,0xE200142D,0xE80036E6,0xC60015E6, -0xA2002BFB,0x33FC6878,0xA6003DB5,0x90004763,0x7400687B,0x35C6878,0xFE08295D,0xE200142D,0xE80036E6,0xC60015E6,0xA2002BFB,0x33FC6878,0xA6003DB5,0x90004763,0x7400687B,0x33FC6878,0xA6003DB5,0x90004763,0x7400687B,0x7400687B,0xFEB45485,0xF8E06116,0xFAE4601D,0xFE6C3E89,0xFE102549,0xFC0015D9,0xD2000D46,0xB6002035,0xFE985529,0xFE403AFB,0xBE002EEB,0x90004763, -0x1F46878,0x4C19B8,0x4C19B8,0x4C19B8,0x4C19B8,0xFE180124,0xFE180124,0xFE180124,0x98080034,0x98080034,0x66080035,0xAC001142,0xAC001142,0xAC001142,0x80000521,0x80000521,0x620001FD,0x54001142,0x54001142,0x4A000989,0x38001142,0x7019B8,0x7019B8,0x7019B8,0x6E000AFD,0x6E000AFD,0x58000593,0x4C0013E5,0x4C0013E5,0x44000BEA,0x34001283,0xE419B8, -0xE419B8,0x38001155,0x2C0015E6,0x240019BB,0xFE200E1D,0xFA441304,0x4C19B8,0xFE08079A,0xD4000562,0xA2000562,0x96000448,0x6E0006AD,0xFE000E34,0xF2000912,0x6600116E,0x44000BEA,0xA019B8,0x701144,0x701144,0x701144,0x701144,0xFE240080,0xFE240080,0xFE240080,0x94140001,0x94140001,0x66140001,0xA81142,0xA81142,0xA81142,0x80000521,0x80000521, -0x620001FD,0x1581142,0x1581142,0x4A000989,0x38001142,0xA81142,0xA81142,0xA81142,0x80000521,0x80000521,0x620001FD,0x1581142,0x1581142,0x4A000989,0x38001142,0x1581142,0x1581142,0x4A000989,0x38001142,0x38001142,0xFE3C0B68,0xFA640CD1,0x701144,0xFE08078A,0xD4000562,0xA2000562,0x96000448,0x6E0006AD,0xFE140C14,0xF2000831,0xF01142,0x4A000989, -0xF01142,0x1582420,0xFF241408,0xFEE40425,0xEED00001,0x3FC2420,0xFE880AB5,0xEE4C0001,0x85F82420,0xD60008B1,0xAA002422,0x3FC2420,0xFE880AB5,0xEE4C0001,0x85F82420,0xD60008B1,0xAA002422,0x85F82420,0xD60008B1,0xAA002422,0xAA002422,0x3FC2420,0xFE880AB5,0xEE4C0001,0x85F82420,0xD60008B1,0xAA002422,0x85F82420,0xD60008B1,0xAA002422,0xAA002422,0x85F82420, -0xD60008B1,0xAA002422,0xAA002422,0xAA002422,0xFF342081,0xF6C2420,0xFF4C2114,0xFEEC1AC8,0xFE90147D,0xFE040910,0xDE000431,0xD0000D22,0xFF182032,0xFECC1A04,0xF2000301,0xAA002422,0x63FC2420,0x80034,0x80034,0x80034,0x80034,0x80034,0x80034,0x80034,0x80034,0x80034,0x80034,0x1C000000,0x1C000000,0x1C000000,0x1C000000,0x1C000000, -0x1C000000,0xE000000,0xE000000,0xE000000,0x8000001,0x20C0032,0x20C0032,0x20C0032,0x20C0032,0x20C0032,0x20C0032,0xC000011,0xC000011,0xC000011,0x800000A,0x180032,0x180032,0x180032,0x4000022,0x4000032,0x98000000,0x80034,0x80034,0x44000000,0x2E000000,0x24000000,0x24000000,0x18000000,0x3600000D,0x24000008,0x12000001,0xC000011, -0x140032,}; -static const uint32_t g_etc1_to_bc7_m6_table232[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x240001,0x380000,0x380000,0x380000,0x380000,0x380000, -0x380000,0x700000,0x700000,0x700000,0x12000000,0x380000,0x380000,0x380000,0x380000,0x380000,0x380000,0x700000,0x700000,0x700000,0x12000000,0x700000,0x700000,0x700000,0x12000000,0x12000000,0x280000,0x240001,0x240001,0x2C0000,0x300000,0x340000,0x340000,0x400000,0x2C0000,0x300000,0x500000,0x700000, -0x500000,0xE00001,0xE00001,0xE00001,0xE00001,0x1500000,0x1500000,0x1500000,0x2DF80000,0x2DF80000,0x70000000,0x1500000,0x1500000,0x1500000,0x2DF80000,0x2DF80000,0x70000000,0x2DF80000,0x2DF80000,0x70000000,0x70000000,0x1500000,0x1500000,0x1500000,0x2DF80000,0x2DF80000,0x70000000,0x2DF80000,0x2DF80000,0x70000000,0x70000000,0x2DF80000, -0x2DF80000,0x70000000,0x70000000,0x70000000,0x7040000,0xF00000,0xE00001,0x1340000,0x17C0000,0x1E00000,0x11FC0000,0x3FF40000,0x11C0000,0x1500000,0x1E00000,0x70000000,0x1E00000,0x1F00001,0xEFFC0000,0xF7F80000,0xF8000000,0xEFFC0000,0xF7F80000,0xF8000000,0xF7F80000,0xF8000000,0xF8000000,0xEFFC0000,0xF7F80000,0xF8000000,0xF7F80000,0xF8000000, -0xF8000000,0xF7F80000,0xF8000000,0xF8000000,0xF8000000,0xEFFC0000,0xF7F80000,0xF8000000,0xF7F80000,0xF8000000,0xF8000000,0xF7F80000,0xF8000000,0xF8000000,0xF8000000,0xF7F80000,0xF8000000,0xF8000000,0xF8000000,0xF8000000,0xE3FC0000,0xA7FC0000,0xA7FC0000,0xF1FC0000,0xF5FC0000,0xF9F00000,0xF8000000,0xF8000000,0xEBFC0000,0xF3FC0000,0xF9E40000,0xF8000000, -0xF5FC0000,0xC097BB,0xFE8C5AEA,0xFE642B8F,0xF8581C9A,0xFE6850F9,0xFE3815EA,0xFC200282,0xFE14251D,0xDE040922,0xB41424D5,0xFE4471AC,0xFE082B02,0xEE0012C6,0xF40034A9,0xD20012B1,0xAE002A0C,0xBC006878,0xB2003B2E,0x9600456C,0x7E006878,0x31897B9,0xFA004686,0xE200290A,0xE2004B75,0xC6002442,0xA8003621,0xAE00761C,0xA60047DF,0x90004F04,0x7A006EE8,0x11FC97B9, -0x900067C5,0x7E006825,0x6C008115,0x5E0097B9,0xFE8C716F,0xFEAC89AD,0xFEAC8B86,0xFE544AE1,0xFE1028CE,0xFC00135A,0xDE000A4D,0xCA001CD5,0xFE707042,0xFE244643,0xD6002C8C,0x90004F04,0x19497B9,0xFC687B,0xFEC43F8B,0xFE941CBB,0xF8841142,0xFE984159,0xFE5811F2,0xFC3800DA,0xFE2C2431,0xDE10090E,0xB4242421,0x1786878,0xFE082AF2,0xEE0012C6,0xF40034A9,0xD20012B1, -0xAE002A0C,0x41FC6878,0xB2003B2E,0x9600456C,0x7E006878,0x1786878,0xFE082AF2,0xEE0012C6,0xF40034A9,0xD20012B1,0xAE002A0C,0x41FC6878,0xB2003B2E,0x9600456C,0x7E006878,0x41FC6878,0xB2003B2E,0x9600456C,0x7E006878,0x7E006878,0xFED055D8,0xFEEC6131,0xFEEC6086,0xFE8040FD,0xFE242846,0xFC00135A,0xDE000A4D,0xCA001CD5,0xFEB456A6,0xFE5C3DAE,0xD6002BE3,0x9600456C, -0xFFC6878,0x581C9A,0x581C9A,0x581C9A,0x581C9A,0xFE240266,0xFE240266,0xFE240266,0xA21400B5,0xA21400B5,0x701400B5,0xC8001142,0xC8001142,0xC8001142,0x9200040D,0x9200040D,0x68000121,0x60001144,0x60001144,0x56000895,0x40001144,0x2801C9A,0x2801C9A,0x2801C9A,0x7A000B7D,0x7A000B7D,0x6200056D,0x580014D3,0x580014D3,0x50000BA2,0x400012FD,0x1081C9A, -0x1081C9A,0x440012AD,0x32001788,0x2A001C9D,0xFC38101A,0xFE4C1596,0x581C9A,0xFE100871,0xF2000448,0xC0000464,0xAA00031D,0x80000585,0xFE140FDB,0xFC000884,0x7C001182,0x50000BA2,0xB81C9A,0x841142,0x841142,0x841142,0x841142,0xFE3400C1,0xFE3400C1,0xFE3400C1,0x9E240001,0x9E240001,0x70240001,0xC41142,0xC41142,0xC41142,0x9200040D,0x9200040D, -0x68000121,0x18C1142,0x18C1142,0x56000895,0x40001144,0xC41142,0xC41142,0xC41142,0x9200040D,0x9200040D,0x68000121,0x18C1142,0x18C1142,0x56000895,0x40001144,0x18C1142,0x18C1142,0x56000895,0x40001144,0x40001144,0xFE580B95,0xFE6C0D0D,0x841142,0xFE1807E9,0xF2000448,0xC0000464,0xAA00031D,0x80000585,0xFE340C31,0xFC000784,0x1181142,0x56000895, -0x1181142,0x1682422,0xFF301572,0xFEFC05B9,0xF8E00001,0x1FFC2420,0xFEA00C69,0xF85C0000,0x91FC2420,0xDC0007D9,0xB4002420,0x1FFC2420,0xFEA00C69,0xF85C0000,0x91FC2420,0xDC0007D9,0xB4002420,0x91FC2420,0xDC0007D9,0xB4002420,0xB4002420,0x1FFC2420,0xFEA00C69,0xF85C0000,0x91FC2420,0xDC0007D9,0xB4002420,0x91FC2420,0xDC0007D9,0xB4002420,0xB4002420,0x91FC2420, -0xDC0007D9,0xB4002420,0xB4002420,0xB4002420,0xFD4C2102,0x9802420,0xFB642185,0xFF141BC1,0xFEA815C1,0xFE0409FA,0xE8000371,0xD8000C44,0xFF2C20D5,0xFEF01B2D,0xFE000248,0xB4002420,0x75FC2420,0x1400B5,0x1400B5,0x1400B5,0x1400B5,0x1400B5,0x1400B5,0x1400B5,0x1400B5,0x1400B5,0x1400B5,0x38000000,0x38000000,0x38000000,0x38000000,0x38000000, -0x38000000,0x1C000000,0x1C000000,0x1C000000,0x12000000,0x1C00B5,0x1C00B5,0x1C00B5,0x1C00B5,0x1C00B5,0x1C00B5,0x18000044,0x18000044,0x18000044,0x12000024,0x3000B5,0x3000B5,0x3000B5,0xA000071,0x80000B5,0xFA040005,0x1400B5,0x1400B5,0x84000000,0x5C000000,0x46000000,0x46000000,0x2E000000,0x76000035,0x5600001A,0x22000004,0x18000044, -0x2400B5,}; -static const uint32_t g_etc1_to_bc7_m6_table233[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x340001,0x500000,0x500000,0x500000,0x500000,0x500000, -0x500000,0xA00000,0xA00000,0xA00000,0x1A000000,0x500000,0x500000,0x500000,0x500000,0x500000,0x500000,0xA00000,0xA00000,0xA00000,0x1A000000,0xA00000,0xA00000,0xA00000,0x1A000000,0x1A000000,0x4380000,0x340001,0x340001,0x63C0000,0x440000,0x480000,0x480000,0x580000,0x63C0000,0x440000,0x700000,0xA00000, -0x700000,0xF00001,0xF00001,0xF00001,0xF00001,0x1680000,0x1680000,0x1680000,0x39F80000,0x39F80000,0x78000000,0x1680000,0x1680000,0x1680000,0x39F80000,0x39F80000,0x78000000,0x39F80000,0x39F80000,0x78000000,0x78000000,0x1680000,0x1680000,0x1680000,0x39F80000,0x39F80000,0x78000000,0x39F80000,0x39F80000,0x78000000,0x78000000,0x39F80000, -0x39F80000,0x78000000,0x78000000,0x78000000,0x3180000,0x9000000,0xF00001,0x1480000,0x3940000,0x3FC0000,0x1FF80000,0x49F80000,0x1300000,0x1680000,0x3FC0000,0x78000000,0x3FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0xC89B73,0xFE98601A,0xFE70308B,0xFE641F86,0xFE7454F9,0xFE3819DE,0xFE280401,0xFE20257D,0xE4100916,0xBA1C2481,0xFE507334,0xFE142CA6,0xFA0011D2,0xFA0031B5,0xDE000F8D,0xB4002774,0xC6006693,0xB80037EE,0x9C004258,0x86006694,0x12C9B71,0xFA0048EA,0xE20029B6,0xE8004A85,0xCC0022AE,0xAE003499,0xBA0075EC,0xB20045F3,0x9A004CE6,0x80006DCC,0x1BF89B71, -0x9600690D,0x8A0068A1,0x7200821D,0x64009B71,0xFE94756C,0xF8C08E33,0xFAC48FAF,0xFE544EB5,0xFE242B66,0xFC0011E2,0xEA0007C2,0xD0001968,0xFE7873EA,0xFE40496C,0xD6002964,0x9A004CE6,0x1AC9B71,0x10C6693,0xFED04023,0xFEAC1E33,0xFE941142,0xFEB0416D,0xFE6413AA,0xFE4400F5,0xFE402373,0xE41C08A6,0xBA342315,0x38C6693,0xFE202C26,0xFA0011D2,0xFA0031B5,0xDE000F8D, -0xB4002774,0x4BFC6693,0xB80037EE,0x9C004258,0x86006694,0x38C6693,0xFE202C26,0xFA0011D2,0xFA0031B5,0xDE000F8D,0xB4002774,0x4BFC6693,0xB80037EE,0x9C004258,0x86006694,0x4BFC6693,0xB80037EE,0x9C004258,0x86006694,0x86006694,0xFED05534,0xF900600B,0xFD085F33,0xFE9440FA,0xFE3829B2,0xFC0011E2,0xEA0007C2,0xD0001968,0xFEC455C3,0xFE843E6A,0xDE0028A6,0x9C004258, -0x1DF86693,0x641F86,0x641F86,0x641F86,0x641F86,0xFE280401,0xFE280401,0xFE280401,0xAC1C016D,0xAC1C016D,0x781C016D,0xE0001142,0xE0001142,0xE0001142,0xA200031D,0xA200031D,0x74000089,0x6C001144,0x6C001144,0x5C0007D9,0x48001144,0x901F85,0x901F85,0x901F85,0x86000C35,0x86000C35,0x680005B5,0x620015A4,0x620015A4,0x58000B8A,0x46001365,0x1241F85, -0x1241F85,0x4A001419,0x3E001918,0x30001F85,0xFE3C123E,0xF45818B1,0x641F86,0xFE180989,0xFC000384,0xC8000368,0xB800025D,0x8C0004A5,0xFE201212,0xFE0008CA,0x8600118A,0x58000B8A,0xD01F85,0x941142,0x941142,0x941142,0x941142,0xFE4400F5,0xFE4400F5,0xFE4400F5,0xA6340001,0xA6340001,0x78340001,0xDC1142,0xDC1142,0xDC1142,0xA200031D,0xA200031D, -0x74000089,0x1BC1142,0x1BC1142,0x5C0007D9,0x48001144,0xDC1142,0xDC1142,0xDC1142,0xA200031D,0xA200031D,0x74000089,0x1BC1142,0x1BC1142,0x5C0007D9,0x48001144,0x1BC1142,0x1BC1142,0x5C0007D9,0x48001144,0x48001144,0xFA6C0BE2,0xFC880D22,0x941142,0xFE340845,0xFC000384,0xC8000368,0xB800025D,0x8C0004A5,0xFA480C82,0xFE0407B4,0x1381142,0x5C0007D9, -0x1381142,0x1782312,0xFF441595,0xFF0806B9,0xFEF00001,0x35FC2312,0xFEB80D39,0xFE700000,0x9DF42312,0xE60006C4,0xBA002314,0x35FC2312,0xFEB80D39,0xFE700000,0x9DF42312,0xE60006C4,0xBA002314,0x9DF42312,0xE60006C4,0xBA002314,0xBA002314,0x35FC2312,0xFEB80D39,0xFE700000,0x9DF42312,0xE60006C4,0xBA002314,0x9DF42312,0xE60006C4,0xBA002314,0xBA002314,0x9DF42312, -0xE60006C4,0xBA002314,0xBA002314,0xBA002314,0xFF502032,0x1902312,0xFF6C2099,0xFF201B94,0xFED01619,0xFE180AF1,0xF20002A1,0xE0000B14,0xFD502000,0xFF081AAA,0xFE0001C4,0xBA002314,0x81FC2312,0x1C016D,0x1C016D,0x1C016D,0x1C016D,0x1C016D,0x1C016D,0x1C016D,0x1C016D,0x1C016D,0x1C016D,0x50000000,0x50000000,0x50000000,0x50000000,0x50000000, -0x50000000,0x26000001,0x26000001,0x26000001,0x1A000000,0x28016D,0x28016D,0x28016D,0x28016D,0x28016D,0x28016D,0x22000082,0x22000082,0x22000082,0x18000044,0x4C016D,0x4C016D,0x4C016D,0x100000DD,0xC00016D,0xFE0C003D,0x1C016D,0x1C016D,0xBC000000,0x82000000,0x64000000,0x64000000,0x42000000,0xA0000074,0x7400003A,0x32000009,0x22000082, -0x34016D,}; -static const uint32_t g_etc1_to_bc7_m6_table234[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x440001,0x680000,0x680000,0x680000,0x680000,0x680000, -0x680000,0xD00000,0xD00000,0xD00000,0x22000000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0xD00000,0xD00000,0xD00000,0x22000000,0xD00000,0xD00000,0xD00000,0x22000000,0x22000000,0xC480000,0x440001,0x440001,0x500000,0x580000,0x25C0000,0x25C0000,0x740000,0x500000,0x580000,0x940000,0xD00000, -0x940000,0x1000001,0x1000001,0x1000001,0x1000001,0x1800000,0x1800000,0x1800000,0x45F80000,0x45F80000,0x80000000,0x1800000,0x1800000,0x1800000,0x45F80000,0x45F80000,0x80000000,0x45F80000,0x45F80000,0x80000000,0x80000000,0x1800000,0x1800000,0x1800000,0x45F80000,0x45F80000,0x80000000,0x45F80000,0x45F80000,0x80000000,0x80000000,0x45F80000, -0x45F80000,0x80000000,0x80000000,0x80000000,0x12C0000,0x1140000,0x1000001,0x35C0000,0x1B00000,0x13FC0000,0x2BFC0000,0x55F40000,0x1440000,0x1800000,0x13FC0000,0x80000000,0x13FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0xD0985B,0xFEA46016,0xFE7C32E7,0xFE6C22C6,0xFE80534D,0xFE4C1AEE,0xFE300602,0xFE2C22E1,0xE8180846,0xBE242169,0xFE5C6E98,0xFE142B26,0xFC001148,0xFA002BC5,0xE2000B41,0xBA0021D8,0xD0005F33,0xBE0031CA,0xA6003B0D,0x8A005F34,0x1389859,0xFC004866,0xE80029FE,0xEE004581,0xD2001F56,0xB4002FCD,0xC0006FCC,0xB20040B3,0x9C004648,0x86006704,0x21F89859, -0x9C0065F9,0x8A0063E1,0x78007CE9,0x68009859,0xFEA072F9,0xFCC88B1B,0xFECC8CE7,0xFE6C4D3A,0xFE2829D6,0xFE040EE2,0xEE0004B9,0xD6001419,0xFE7871FA,0xFE4047CC,0xDE00246A,0x9C004648,0x1BC9859,0x1185F33,0xFEE83C13,0xFEB81D03,0xFEA41142,0xFEC43C4B,0xFE7C126A,0xFE5C013D,0xFE4C1F8B,0xE8300726,0xBE441F05,0x1A05F33,0xFE3829C6,0xFC001148,0xFA002BC5,0xE2000B41, -0xBA0021D8,0x55F85F33,0xBE0031CA,0xA6003B0D,0x8A005F34,0x1A05F33,0xFE3829C6,0xFC001148,0xFA002BC5,0xE2000B41,0xBA0021D8,0x55F85F33,0xBE0031CA,0xA6003B0D,0x8A005F34,0x55F85F33,0xBE0031CA,0xA6003B0D,0x8A005F34,0x8A005F34,0xFEE44F59,0xFF0C58D3,0xFF0C5843,0xFEB03CE2,0xFE4826EA,0xFE040EDE,0xEE0004B9,0xD6001419,0xFEDC4F89,0xFE8439AA,0xDE0023A6,0xA6003B0D, -0x27FC5F33,0x6C22C6,0x6C22C6,0x6C22C6,0x6C22C6,0xFE300602,0xFE300602,0xFE300602,0xB6240265,0xB6240265,0x80240265,0xF8001142,0xF8001142,0xF8001142,0xAE00025D,0xAE00025D,0x80000031,0x78001144,0x78001144,0x66000728,0x50001144,0xA422C5,0xA422C5,0xA422C5,0x92000D2D,0x92000D2D,0x7400063D,0x6E0016B4,0x6E0016B4,0x60000B84,0x4C0013E5,0x14C22C5, -0x14C22C5,0x500015CD,0x44001AD8,0x360022C5,0xFE4C14D5,0xFA641BA9,0x6C22C6,0xFE240B44,0xFE040388,0xE20002B1,0xCC00019A,0x960003E8,0xFC301481,0xFE000A0A,0x9600119B,0x60000B84,0xE822C5,0xA41142,0xA41142,0xA41142,0xA41142,0xFE5C013D,0xFE5C013D,0xFE5C013D,0xAE440001,0xAE440001,0x80440001,0xF41142,0xF41142,0xF41142,0xAE00025D,0xAE00025D, -0x80000031,0x1F01142,0x1F01142,0x66000728,0x50001144,0xF41142,0xF41142,0xF41142,0xAE00025D,0xAE00025D,0x80000031,0x1F01142,0x1F01142,0x66000728,0x50001144,0x1F01142,0x1F01142,0x66000728,0x50001144,0x50001144,0xFE740C02,0xFE8C0D6A,0xA41142,0xFE440894,0xFE040384,0xE20002B1,0xCC00019A,0x960003E8,0xFE500CB2,0xFE1807FD,0x15C1142,0x66000728, -0x15C1142,0x1801F02,0xFF501315,0xFF2005F1,0xFF000001,0x41FC1F02,0xFECC0BD5,0xFE880000,0xA1FC1F02,0xEC0004C8,0xBE001F04,0x41FC1F02,0xFECC0BD5,0xFE880000,0xA1FC1F02,0xEC0004C8,0xBE001F04,0xA1FC1F02,0xEC0004C8,0xBE001F04,0xBE001F04,0x41FC1F02,0xFECC0BD5,0xFE880000,0xA1FC1F02,0xEC0004C8,0xBE001F04,0xA1FC1F02,0xEC0004C8,0xBE001F04,0xBE001F04,0xA1FC1F02, -0xEC0004C8,0xBE001F04,0xBE001F04,0xBE001F04,0xF7681C99,0x5981F02,0xFF6C1D09,0xFF30184D,0xFEE81379,0xFE4809A1,0xF4000164,0xE0000894,0xFD581C22,0xFF1417A5,0xFE000124,0xBE001F04,0x89FC1F02,0x240265,0x240265,0x240265,0x240265,0x240265,0x240265,0x240265,0x240265,0x240265,0x240265,0x6A000000,0x6A000000,0x6A000000,0x6A000000,0x6A000000, -0x6A000000,0x32000001,0x32000001,0x32000001,0x22000000,0x340265,0x340265,0x340265,0x340265,0x340265,0x340265,0x280000DA,0x280000DA,0x280000DA,0x1E000074,0x640265,0x640265,0x640265,0x16000171,0x10000265,0xF21400C8,0x240265,0x240265,0xF6000000,0xAA000000,0x82000000,0x82000000,0x54000000,0xC40000C1,0x96000061,0x40000010,0x280000DA, -0x480265,}; -static const uint32_t g_etc1_to_bc7_m6_table235[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x540001,0x800000,0x800000,0x800000,0x800000,0x800000, -0x800000,0x1000000,0x1000000,0x1000000,0x2A000000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x1000000,0x1000000,0x1000000,0x2A000000,0x1000000,0x1000000,0x1000000,0x2A000000,0x2A000000,0x5C0000,0x540001,0x540001,0x640000,0x6C0000,0x740000,0x740000,0x900000,0x640000,0x6C0000,0xB40000,0x1000000, -0xB40000,0x1100001,0x1100001,0x1100001,0x1100001,0x1980000,0x1980000,0x1980000,0x51F80000,0x51F80000,0x88000000,0x1980000,0x1980000,0x1980000,0x51F80000,0x51F80000,0x88000000,0x51F80000,0x51F80000,0x88000000,0x88000000,0x1980000,0x1980000,0x1980000,0x51F80000,0x51F80000,0x88000000,0x51F80000,0x51F80000,0x88000000,0x88000000,0x51F80000, -0x51F80000,0x88000000,0x88000000,0x88000000,0x73C0000,0x1240000,0x1100001,0x1740000,0x1CC0000,0x21FC0000,0x39FC0000,0x5FF80000,0x1580000,0x1980000,0x21FC0000,0x88000000,0x21FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0xD895C3,0xFEB0608A,0xFE8835A3,0xFE78265A,0xFE80521D,0xFE581C6A,0xFE440859,0xFE3820E5,0xEC2007F6,0xC22C1ED1,0xFE686AA4,0xFE202A26,0xFE0C116E,0xFE04269A,0xE80007B5,0xC0001CD4,0xD600582B,0xC4002C2E,0xAC003425,0x90005828,0x14495C1,0xFE08488A,0xEE002B26,0xF400410D,0xD8001CB6,0xB4002B8D,0xC6006A14,0xB8003BF7,0xA4004028,0x8C00609C,0x27F895C1, -0x9C006399,0x90005FA9,0x7E00782D,0x6C0095C1,0xFEA07179,0xFECC888F,0xFECC8AE7,0xFE6C4C2A,0xFE382902,0xFE040CD2,0xF4000288,0xD8000F71,0xFE906FEE,0xFE5046F2,0xE600202C,0xA4004028,0x1D095C1,0x120582B,0xFEF43833,0xFEC01C1A,0xFEB41142,0xFED03783,0xFE88113A,0xFE680195,0xFE641BE3,0xEA4005CA,0xC2541B35,0x1B05828,0xFE4C27C5,0xFE141142,0xFE08268D,0xE80007B5, -0xC0001CD4,0x5DF45828,0xC4002C2E,0xAC003425,0x90005828,0x1B05828,0xFE4C27C5,0xFE141142,0xFE08268D,0xE80007B5,0xC0001CD4,0x5DF45828,0xC4002C2E,0xAC003425,0x90005828,0x5DF45828,0xC4002C2E,0xAC003425,0x90005828,0x90005828,0xFEF8494A,0xFF0C5253,0xFF0C5243,0xFEB03872,0xFE682411,0xFE040CCE,0xF4000288,0xD8000F71,0xFEDC4979,0xFE9035DE,0xE6001F68,0xAC003425, -0x31FC5828,0x78265A,0x78265A,0x78265A,0x78265A,0xFE440859,0xFE440859,0xFE440859,0xC02C039D,0xC02C039D,0x882C039D,0xFE0C116E,0xFE0C116E,0xFE0C116E,0xBA0001BD,0xBA0001BD,0x88000004,0x84001144,0x84001144,0x72000668,0x58001144,0x2B0265A,0x2B0265A,0x2B0265A,0xA2000E36,0xA2000E36,0x7A00070D,0x740017D8,0x740017D8,0x68000B94,0x5200147D,0x168265A, -0x168265A,0x560017C9,0x4A001CC8,0x3A00265D,0xFE5817D5,0xFE6C1F01,0x78265A,0xFE340D49,0xFE14045E,0xEE0001F4,0xDA000112,0xAA00031D,0xFE341721,0xFE100BF2,0xA60011A8,0x68000B94,0xFC265A,0xB41142,0xB41142,0xB41142,0xB41142,0xFE680195,0xFE680195,0xFE680195,0xB6540001,0xB6540001,0x88540001,0x10C1142,0x10C1142,0x10C1142,0xBA0001BD,0xBA0001BD, -0x88000004,0xBF81142,0xBF81142,0x72000668,0x58001144,0x10C1142,0x10C1142,0x10C1142,0xBA0001BD,0xBA0001BD,0x88000004,0xBF81142,0xBF81142,0x72000668,0x58001144,0xBF81142,0xBF81142,0x72000668,0x58001144,0x58001144,0xFE900C31,0xFCA80D75,0xB41142,0xFE5C08C9,0xFE1803E8,0xEE0001F4,0xDA000112,0xAA00031D,0xFA700CD1,0xFE300869,0x17C1142,0x72000668, -0x17C1142,0x1881B32,0xFF5C10BD,0xFF2C052D,0xFF100001,0x4DFC1B32,0xFEE40A55,0xFEA00000,0xA7FC1B32,0xEC000328,0xC2001B34,0x4DFC1B32,0xFEE40A55,0xFEA00000,0xA7FC1B32,0xEC000328,0xC2001B34,0xA7FC1B32,0xEC000328,0xC2001B34,0xC2001B34,0x4DFC1B32,0xFEE40A55,0xFEA00000,0xA7FC1B32,0xEC000328,0xC2001B34,0xA7FC1B32,0xEC000328,0xC2001B34,0xC2001B34,0xA7FC1B32, -0xEC000328,0xC2001B34,0xC2001B34,0xC2001B34,0xFB7018F1,0x9A01B32,0xFB841962,0xFF401540,0xFEE81109,0xFE480871,0xF800009D,0xE4000665,0xFF5C1892,0xFF2014D2,0xFE1000E5,0xC2001B34,0x91FC1B32,0x2C039D,0x2C039D,0x2C039D,0x2C039D,0x2C039D,0x2C039D,0x2C039D,0x2C039D,0x2C039D,0x2C039D,0x82000000,0x82000000,0x82000000,0x82000000,0x82000000, -0x82000000,0x3E000001,0x3E000001,0x3E000001,0x2A000000,0x40039D,0x40039D,0x40039D,0x40039D,0x40039D,0x40039D,0x34000152,0x34000152,0x34000152,0x280000AD,0x7C039D,0x7C039D,0x7C039D,0x1C00022D,0x1400039D,0xF61C0188,0x2C039D,0x2C039D,0xFE04001D,0xD2000000,0xA0000000,0xA0000000,0x68000000,0xF6000121,0xC2000099,0x50000019,0x34000152, -0x58039D,}; -static const uint32_t g_etc1_to_bc7_m6_table236[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x680000,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000, -0x2980000,0x1380000,0x1380000,0x1380000,0x32000001,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000,0x2980000,0x1380000,0x1380000,0x1380000,0x32000001,0x1380000,0x1380000,0x1380000,0x32000001,0x32000001,0xE6C0000,0x680000,0x680000,0x780000,0x2800000,0x8C0000,0x8C0000,0x2AC0000,0x780000,0x2800000,0xDC0000,0x1380000, -0xDC0000,0x1240000,0x1240000,0x1240000,0x1240000,0x3B00000,0x3B00000,0x3B00000,0x5DFC0000,0x5DFC0000,0x90000001,0x3B00000,0x3B00000,0x3B00000,0x5DFC0000,0x5DFC0000,0x90000001,0x5DFC0000,0x5DFC0000,0x90000001,0x90000001,0x3B00000,0x3B00000,0x3B00000,0x5DFC0000,0x5DFC0000,0x90000001,0x5DFC0000,0x5DFC0000,0x90000001,0x90000001,0x5DFC0000, -0x5DFC0000,0x90000001,0x90000001,0x90000001,0x1540000,0x1380000,0x1240000,0x18C0000,0x1EC0000,0x33FC0000,0x49F80000,0x6BF80000,0x36C0000,0x3B00000,0x33FC0000,0x90000001,0x33FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0xE49371,0xFEB06174,0xFE943921,0xFE842AC8,0xFE8C5159,0xFE641E9A,0xFE500B99,0xFE401F71,0xEE300834,0xC8341C7D,0xFE7466F6,0xFE2C29B8,0xFE181234,0xFE0821B1,0xEE000485,0xC60017E2,0xE00050A2,0xCA002684,0xB2002CE1,0x960050A2,0x150936F,0xFE0849BC,0xEE002D78,0xFA003CBD,0xE2001A89,0xBA00277B,0xCC006426,0xC0003749,0xAA0039C2,0x920059D2,0x2DF8936F, -0xA60061D4,0x96005B93,0x8400735F,0x7000936F,0xFEB46FEE,0xFECC86CD,0xF6DC89B8,0xFE804B93,0xFE3C288E,0xFE040BB2,0xF80000EC,0xE0000ADE,0xFE986E7F,0xFE544677,0xF0001BFE,0xAA0039C2,0x1E0936F,0x12C50A5,0xFF00340D,0xFED81AE4,0xFEC41144,0xFEDC3275,0xFEA01012,0xFE8001F9,0xFE701821,0xEA580474,0xC8681735,0x1C050A2,0xFE642553,0xFE2C1144,0xFE0821A1,0xEE000485, -0xC60017E2,0x65F850A2,0xCA002684,0xB2002CE1,0x960050A2,0x1C050A2,0xFE642553,0xFE2C1144,0xFE0821A1,0xEE000485,0xC60017E2,0x65F850A2,0xCA002684,0xB2002CE1,0x960050A2,0x65F850A2,0xCA002684,0xB2002CE1,0x960050A2,0x960050A2,0xFEF84350,0xFB244B59,0xFD284B14,0xFEC033C8,0xFE7C2149,0xFE040BAE,0xF80000EC,0xE0000ADE,0xFEF843BD,0xFEA431C2,0xF0001B1D,0xB2002CE1, -0x3DF850A2,0x842AC8,0x842AC8,0x842AC8,0x842AC8,0xFE500B99,0xFE500B99,0xFE500B99,0xCC340548,0xCC340548,0x90340549,0xFE181234,0xFE181234,0xFE181234,0xCC000121,0xCC000121,0x92040009,0x92001142,0x92001142,0x7E0005AA,0x62001142,0xC42AC8,0xC42AC8,0xC42AC8,0xA8000FDA,0xA8000FDA,0x86000849,0x80001946,0x80001946,0x74000BC6,0x5E001523,0x18C2AC8, -0x18C2AC8,0x5C001A61,0x50001F2E,0x40002ACB,0xFE681BBA,0xF67C23A8,0x842AC8,0xFE441026,0xFE1805AA,0xFC000171,0xEA000088,0xB6000274,0xFE3C1B18,0xFE180E8E,0xB60011C3,0x74000BC6,0x1182AC8,0xC41144,0xC41144,0xC41144,0xC41144,0xFE8001F9,0xFE8001F9,0xFE8001F9,0xBE680001,0xBE680001,0x90680001,0x3241142,0x3241142,0x3241142,0xCC000121,0xCC000121, -0x900C0001,0x17FC1142,0x17FC1142,0x7E0005AA,0x62001142,0x3241142,0x3241142,0x3241142,0xCC000121,0xCC000121,0x900C0001,0x17FC1142,0x17FC1142,0x7E0005AA,0x62001142,0x17FC1142,0x17FC1142,0x7E0005AA,0x62001142,0x62001142,0xFEA00C82,0xF6BC0DC8,0xC41144,0xFE78094D,0xFE2C046A,0xFC000171,0xEA000088,0xB6000274,0xFE780D0D,0xFE4808B4,0x1A41142,0x7E0005AA, -0x1A41142,0x1901735,0xFF680E48,0xFF380464,0xFF240000,0x59FC1735,0xFEFC08C8,0xFEB80001,0xADFC1735,0xF20001B1,0xC8001735,0x59FC1735,0xFEFC08C8,0xFEB80001,0xADFC1735,0xF20001B1,0xC8001735,0xADFC1735,0xF20001B1,0xC8001735,0xC8001735,0x59FC1735,0xFEFC08C8,0xFEB80001,0xADFC1735,0xF20001B1,0xC8001735,0xADFC1735,0xF20001B1,0xC8001735,0xC8001735,0xADFC1735, -0xF20001B1,0xC8001735,0xC8001735,0xC8001735,0xFF781522,0x1AC1735,0xFF8C1589,0xFF401231,0xFEFC0E90,0xFE840734,0xFC000014,0xEC000454,0xFF701520,0xFF4011AD,0xFE3000C5,0xC8001735,0x99FC1735,0x340548,0x340548,0x340548,0x340548,0x340548,0x340548,0x340548,0x340548,0x340548,0x340548,0x9C000000,0x9C000000,0x9C000000,0x9C000000,0x9C000000, -0x9C000000,0x4C000000,0x4C000000,0x4C000000,0x32000001,0x4C0548,0x4C0548,0x4C0548,0x4C0548,0x4C0548,0x4C0548,0x3A0001F9,0x3A0001F9,0x3A0001F9,0x2E0000FA,0x980548,0x980548,0x980548,0x2000034D,0x1800054A,0xFA2402AD,0x340548,0x340548,0xFE100091,0xFE000000,0xC2000000,0xC2000000,0x7E000000,0xFC000200,0xE40000DD,0x60000024,0x3A0001F9, -0x6C0548,}; -static const uint32_t g_etc1_to_bc7_m6_table237[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x780000,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x2B00000, -0x2B00000,0x1680000,0x1680000,0x1680000,0x3A000001,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x2B00000,0x1680000,0x1680000,0x1680000,0x3A000001,0x1680000,0x1680000,0x1680000,0x3A000001,0x3A000001,0x800000,0x780000,0x780000,0x8C0000,0x2940000,0xA40000,0xA40000,0xC80000,0x8C0000,0x2940000,0xFC0000,0x1680000, -0xFC0000,0x1340000,0x1340000,0x1340000,0x1340000,0x1C80000,0x1C80000,0x1C80000,0x69FC0000,0x69FC0000,0x98000001,0x1C80000,0x1C80000,0x1C80000,0x69FC0000,0x69FC0000,0x98000001,0x69FC0000,0x69FC0000,0x98000001,0x98000001,0x1C80000,0x1C80000,0x1C80000,0x69FC0000,0x69FC0000,0x98000001,0x69FC0000,0x69FC0000,0x98000001,0x98000001,0x69FC0000, -0x69FC0000,0x98000001,0x98000001,0x98000001,0x5640000,0x1480000,0x1340000,0x1A40000,0x7FC0000,0x41FC0000,0x55FC0000,0x77F40000,0x3800000,0x1C80000,0x41FC0000,0x98000001,0x41FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0xEC91E9,0xFEC46279,0xFEA03CA5,0xFE902F14,0xFE985139,0xFE64210A,0xFE5C0EF9,0xFE4C1EBD,0xF23808E4,0xCC3C1AF5,0xFE806462,0xFE3829E8,0xFE241378,0xFE081E61,0xF000026D,0xCC00141E,0xE8004A4E,0xD0002208,0xB80026E5,0x9C004A4E,0x15C91E7,0xFE084BDC,0xF4003058,0xFA0039ED,0xE2001939,0xC000247F,0xD8005F3E,0xC60033B5,0xAE00348E,0x98005432,0x33F891E7, -0xA60060B4,0x9C005887,0x8A006F9B,0x740091E7,0xFEBC6F6D,0xF8E085F5,0xFAE48878,0xFE804B93,0xFE4C28D8,0xFE100BA9,0xFC000045,0xE40007A9,0xFE986E0F,0xFE5C46BB,0xF60018B4,0xAE00348E,0x1F091E7,0x1384A4D,0xFF0C3095,0xFEE419E8,0xFED41144,0xFEE82E49,0xFEAC0F1E,0xFE980269,0xFE881509,0xEE6C0364,0xCC7813ED,0x1D04A4D,0xFE70234B,0xFE441142,0xFE141E3D,0xF000026D, -0xCC00141E,0x6DF84A4D,0xD0002208,0xB80026E5,0x9C004A4E,0x1D04A4D,0xFE70234B,0xFE441142,0xFE141E3D,0xF000026D,0xCC00141E,0x6DF84A4D,0xD0002208,0xB80026E5,0x9C004A4E,0x6DF84A4D,0xD0002208,0xB80026E5,0x9C004A4E,0x9C004A4E,0xFF143E02,0xFF2C4531,0xFF2C4538,0xFED43009,0xFE901F05,0xFE240B1D,0xFC000045,0xE40007A9,0xFEF83E4D,0xFEC02DC4,0xF60017D3,0xB80026E5, -0x45FC4A4D,0x902F14,0x902F14,0x902F14,0x902F14,0xFE5C0EF9,0xFE5C0EF9,0xFE5C0EF9,0xD63C0708,0xD63C0708,0x983C0709,0xFE241378,0xFE241378,0xFE241378,0xD80000B9,0xD80000B9,0x9A080035,0x9E001142,0x9E001142,0x84000502,0x6A001142,0x2D02F13,0x2D02F13,0x2D02F13,0xB400119A,0xB400119A,0x8C0009B1,0x8C001AA6,0x8C001AA6,0x7A000C12,0x620015C2,0x1AC2F13, -0x1AC2F13,0x66001D13,0x56002186,0x46002F13,0xFE681F7A,0xFC8827B4,0x902F14,0xFE4C1319,0xFE2C078E,0xFE040199,0xFC000041,0xC00001D4,0xFE501E81,0xFE301181,0xC60011D4,0x7A000C12,0x12C2F13,0xD41144,0xD41144,0xD41144,0xD41144,0xFE980269,0xFE980269,0xFE980269,0xC6780001,0xC6780001,0x98780001,0x33C1142,0x33C1142,0x33C1142,0xD80000B9,0xD80000B9, -0x981C0001,0x23FC1142,0x23FC1142,0x84000502,0x6A001142,0x33C1142,0x33C1142,0x33C1142,0xD80000B9,0xD80000B9,0x981C0001,0x23FC1142,0x23FC1142,0x84000502,0x6A001142,0x23FC1142,0x23FC1142,0x84000502,0x6A001142,0x6A001142,0xFAB40CD1,0xFECC0DC8,0xD41144,0xFE880994,0xFE4804D9,0xFE0C0190,0xFC000041,0xC00001D4,0xFE940D2A,0xFE5C0901,0x1C81142,0x84000502, -0x1C81142,0x19813ED,0xFF740C44,0xFF4C03D9,0xFF340000,0x65FC13ED,0xFF080784,0xFED00000,0xB3FC13ED,0xF80000CD,0xCC0013ED,0x65FC13ED,0xFF080784,0xFED00000,0xB3FC13ED,0xF80000CD,0xCC0013ED,0xB3FC13ED,0xF80000CD,0xCC0013ED,0xCC0013ED,0x65FC13ED,0xFF080784,0xFED00000,0xB3FC13ED,0xF80000CD,0xCC0013ED,0xB3FC13ED,0xF80000CD,0xCC0013ED,0xCC0013ED,0xB3FC13ED, -0xF80000CD,0xCC0013ED,0xCC0013ED,0xCC0013ED,0xFF781232,0x1B413ED,0xFF8C1289,0xFF580FA1,0xFF100C84,0xFE980631,0xFE0C0000,0xF20002D0,0xFF701220,0xFF400F1D,0xFE5000A9,0xCC0013ED,0x9FFC13ED,0x3C0708,0x3C0708,0x3C0708,0x3C0708,0x3C0708,0x3C0708,0x3C0708,0x3C0708,0x3C0708,0x3C0708,0xB6000000,0xB6000000,0xB6000000,0xB6000000,0xB6000000, -0xB6000000,0x58000000,0x58000000,0x58000000,0x3A000001,0x580708,0x580708,0x580708,0x580708,0x580708,0x580708,0x460002A1,0x460002A1,0x460002A1,0x34000152,0xB00708,0xB00708,0xB00708,0x2600045D,0x1C00070A,0xFE2C03F5,0x3C0708,0x3C0708,0xFA180154,0xFC080029,0xE0000000,0xE0000000,0x92000000,0xFA080322,0xFE000140,0x70000031,0x460002A1, -0x7C0708,}; -static const uint32_t g_etc1_to_bc7_m6_table238[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0x880000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000, -0xC80000,0x1980000,0x1980000,0x1980000,0x42000001,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0xC80000,0x1980000,0x1980000,0x1980000,0x42000001,0x1980000,0x1980000,0x1980000,0x42000001,0x42000001,0x900000,0x880000,0x880000,0x49C0000,0x2A80000,0xB80000,0xB80000,0xE40000,0x49C0000,0x2A80000,0x1200000,0x1980000, -0x1200000,0x1440000,0x1440000,0x1440000,0x1440000,0x1E00000,0x1E00000,0x1E00000,0x75FC0000,0x75FC0000,0xA0000001,0x1E00000,0x1E00000,0x1E00000,0x75FC0000,0x75FC0000,0xA0000001,0x75FC0000,0x75FC0000,0xA0000001,0xA0000001,0x1E00000,0x1E00000,0x1E00000,0x75FC0000,0x75FC0000,0xA0000001,0x75FC0000,0x75FC0000,0xA0000001,0xA0000001,0x75FC0000, -0x75FC0000,0xA0000001,0xA0000001,0xA0000001,0x1780000,0x5580000,0x1440000,0x1B80000,0x1BFC0000,0x51FC0000,0x63FC0000,0x81F80000,0x3940000,0x1E00000,0x51FC0000,0xA0000001,0x51FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0xF490E1,0xFEC46419,0xFEA04085,0xFE9833B4,0xFEA451A9,0xFE7023E2,0xFE5C12C9,0xFE581EA9,0xF6400A14,0xD04419ED,0xFE806262,0xFE402ADB,0xFE301544,0xFE141BF1,0xF60000F9,0xD00010E6,0xF000444D,0xD6001E14,0xBE002159,0xA000444E,0x16890DF,0xFE144EC8,0xFA003418,0xFA00381D,0xE80018CD,0xC6002223,0xE2005A92,0xCC0030B1,0xB4002FCA,0x98004EF2,0x39F890DF, -0xAC006018,0x9C005607,0x8A006C1B,0x780090DF,0xFEBC6F7D,0xFCE884ED,0xFEEC87B8,0xFE944C1E,0xFE50299A,0xFE180BFB,0xFE040069,0xEA0004E2,0xFEB46DCE,0xFE704795,0xF6001634,0xB4002FCA,0x3FC90DF,0x144444D,0xFF182D4D,0xFEF01904,0xFEE41144,0xFEF42A65,0xFEB80E5A,0xFEA402D5,0xFE941241,0xF07C0278,0xD08810E5,0x1E0444D,0xFE882153,0xFE5C1142,0xFE2C1B35,0xF60000F9, -0xD00010E6,0x75FC444D,0xD6001E14,0xBE002159,0xA000444E,0x1E0444D,0xFE882153,0xFE5C1142,0xFE2C1B35,0xF60000F9,0xD00010E6,0x75FC444D,0xD6001E14,0xBE002159,0xA000444E,0x75FC444D,0xD6001E14,0xBE002159,0xA000444E,0xA000444E,0xFF20392D,0xFF2C3FE1,0xF9404005,0xFEEC2C7A,0xFEA41CE9,0xFE380A7D,0xFE0C0050,0xEA0004E2,0xFF08395E,0xFEC82A3E,0xF6001553,0xBE002159, -0x51FC444D,0x9833B4,0x9833B4,0x9833B4,0x9833B4,0xFE5C12C9,0xFE5C12C9,0xFE5C12C9,0xE0440908,0xE0440908,0xA0440909,0xFE301544,0xFE301544,0xFE301544,0xE8000059,0xE8000059,0xA20C0089,0xAA001142,0xAA001142,0x9000046A,0x72001142,0xE433B3,0xE433B3,0xE433B3,0xC000139A,0xC000139A,0x98000B59,0x98001C26,0x98001C26,0x86000C7A,0x6E001672,0x1D033B3, -0x1D033B3,0x6C001FE3,0x5C00240E,0x4C0033B3,0xFE7423B8,0xFE8C2C28,0x9833B4,0xFE541675,0xFE2C09CE,0xFE0C0274,0xFE040069,0xCC00015A,0xFE5022C1,0xFE3014B1,0xD60011ED,0x86000C7A,0x14833B3,0xE41144,0xE41144,0xE41144,0xE41144,0xFEA402D5,0xFEA402D5,0xFEA402D5,0xCE880001,0xCE880001,0xA0880001,0x1541142,0x1541142,0x1541142,0xE8000059,0xE8000059, -0xA02C0001,0x2FFC1142,0x2FFC1142,0x9000046A,0x72001142,0x1541142,0x1541142,0x1541142,0xE8000059,0xE8000059,0xA02C0001,0x2FFC1142,0x2FFC1142,0x9000046A,0x72001142,0x2FFC1142,0x2FFC1142,0x9000046A,0x72001142,0x72001142,0xFEBC0CF9,0xF6DC0E1D,0xE41144,0xFE9809E5,0xFE5C0551,0xFE2001E2,0xFE0C0050,0xCC00015A,0xFAAC0D75,0xFE740975,0x1E81142,0x9000046A, -0x1E81142,0x1A010E5,0xFF800A68,0xFF58033D,0xFF440000,0x71FC10E5,0xFF200654,0xFEE80000,0xB9FC10E5,0xF800003D,0xD00010E5,0x71FC10E5,0xFF200654,0xFEE80000,0xB9FC10E5,0xF800003D,0xD00010E5,0xB9FC10E5,0xF800003D,0xD00010E5,0xD00010E5,0x71FC10E5,0xFF200654,0xFEE80000,0xB9FC10E5,0xF800003D,0xD00010E5,0xB9FC10E5,0xF800003D,0xD00010E5,0xD00010E5,0xB9FC10E5, -0xF800003D,0xD00010E5,0xD00010E5,0xD00010E5,0xFD900F79,0x1BC10E5,0xF79C0FD4,0xFF6C0D2A,0xFF280A82,0xFEC00544,0xFE340000,0xF2000190,0xF7880F79,0xFF4C0CE2,0xFE700090,0xD00010E5,0xA7FC10E5,0x440908,0x440908,0x440908,0x440908,0x440908,0x440908,0x440908,0x440908,0x440908,0x440908,0xCE000000,0xCE000000,0xCE000000,0xCE000000,0xCE000000, -0xCE000000,0x64000000,0x64000000,0x64000000,0x42000001,0x640908,0x640908,0x640908,0x640908,0x640908,0x640908,0x52000369,0x52000369,0x52000369,0x3A0001BA,0xCC0908,0xCC0908,0xCC0908,0x2C000595,0x2000090A,0xFE2C05A5,0x440908,0x440908,0xFE200244,0xFE0C009D,0xFE000000,0xFE000000,0xA6000000,0xFE100482,0xFE000200,0x7E000041,0x52000369, -0x900908,}; -static const uint32_t g_etc1_to_bc7_m6_table239[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0x980000,0xE00000,0xE00000,0xE00000,0xE00000,0xE00000, -0xE00000,0x1CC0000,0x1CC0000,0x1CC0000,0x4A000001,0xE00000,0xE00000,0xE00000,0xE00000,0xE00000,0xE00000,0x1CC0000,0x1CC0000,0x1CC0000,0x4A000001,0x1CC0000,0x1CC0000,0x1CC0000,0x4A000001,0x4A000001,0x8A00000,0x980000,0x980000,0xB00000,0x2BC0000,0x2CC0000,0x2CC0000,0x1000000,0xB00000,0x2BC0000,0x1400000,0x1CC0000, -0x1400000,0x1540000,0x1540000,0x1540000,0x1540000,0x1F80000,0x1F80000,0x1F80000,0x81FC0000,0x81FC0000,0xA8000001,0x1F80000,0x1F80000,0x1F80000,0x81FC0000,0x81FC0000,0xA8000001,0x81FC0000,0x81FC0000,0xA8000001,0xA8000001,0x1F80000,0x1F80000,0x1F80000,0x81FC0000,0x81FC0000,0xA8000001,0x81FC0000,0x81FC0000,0xA8000001,0xA8000001,0x81FC0000, -0x81FC0000,0xA8000001,0xA8000001,0xA8000001,0x18C0000,0xD680000,0x1540000,0x3CC0000,0x2FFC0000,0x5FFC0000,0x71FC0000,0x8BFC0000,0x3A80000,0x1F80000,0x5FFC0000,0xA8000001,0x5FFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0xFC9059,0xFED0662D,0xFEAC44A9,0xFEA438A8,0xFEB052A9,0xFE7C2732,0xFE6816F9,0xFE581F29,0xFA480BC4,0xD44C1965,0xFE8C60F6,0xFE4C2C1F,0xFE3C1798,0xFE201A39,0xFA000035,0xD4080E36,0xF8003EA2,0xDC001AA8,0xC4001C3D,0xA6003EA2,0x1749057,0xFE20521C,0xFC0038AF,0xFA00374D,0xEE001919,0xCC002067,0xE2005652,0xD2002E3D,0xBA002B7E,0xA20049CB,0x3FF89057, -0xB2006024,0xA6005459,0x9000690F,0x7C009057,0xFED06FC2,0xFEEC8481,0xFEEC87D8,0xFE984D29,0xFE602AF8,0xFE240D01,0xFE100125,0xF00002D9,0xFEB46DDE,0xFE80487B,0xFE001462,0xBA002B7E,0xBFC9057,0x14C3EA5,0xFF242A35,0xFF081824,0xFEF41144,0xFF0026C9,0xFECC0DAC,0xFEBC0355,0xFEAC0FB9,0xF28C01B4,0xD4980E1D,0x1F03EA2,0xFEA01F7B,0xFE741142,0xFE381881,0xFA000035, -0xD4100E1E,0x7DF83EA2,0xDC001AA8,0xC4001C3D,0xA6003EA2,0x1F03EA2,0xFEA01F7B,0xFE741142,0xFE381881,0xFA000035,0xD4100E1E,0x7DF83EA2,0xDC001AA8,0xC4001C3D,0xA6003EA2,0x7DF83EA2,0xDC001AA8,0xC4001C3D,0xA6003EA2,0xA6003EA2,0xFF343494,0xFB443A79,0xFD483A9D,0xFEEC292A,0xFEA81AF3,0xFE480A18,0xFE280088,0xF00002D9,0xFF1834C1,0xFEDC273A,0xFE001362,0xC4001C3D, -0x5BFC3EA2,0xA438A8,0xA438A8,0xA438A8,0xA438A8,0xFE6816F9,0xFE6816F9,0xFE6816F9,0xEA4C0B48,0xEA4C0B48,0xA84C0B49,0xFE3C1798,0xFE3C1798,0xFE3C1798,0xFA000025,0xFA000025,0xAE100102,0xB6001142,0xB6001142,0x960003DA,0x7A001142,0xF438A8,0xF438A8,0xF438A8,0xCC0015DA,0xCC0015DA,0xA2000D3B,0xA2001D8D,0xA2001D8D,0x8C000CF6,0x7400172E,0x1F038A8, -0x1F038A8,0x720022FB,0x660026FB,0x500038AB,0xFE842831,0xF69C3169,0xA438A8,0xFE5C1A55,0xFE400CA2,0xFE1C03D8,0xFE100125,0xDA0000FA,0xFE64274E,0xFE44188A,0xE6001206,0x8C000CF6,0x15C38A8,0xF41144,0xF41144,0xF41144,0xF41144,0xFEBC0355,0xFEBC0355,0xFEBC0355,0xD6980001,0xD6980001,0xA8980001,0x16C1142,0x16C1142,0x16C1142,0xFA000025,0xFA000025, -0xA83C0001,0x3BFC1142,0x3BFC1142,0x960003DA,0x7A001142,0x16C1142,0x16C1142,0x16C1142,0xFA000025,0xFA000025,0xA83C0001,0x3BFC1142,0x3BFC1142,0x960003DA,0x7A001142,0x3BFC1142,0x3BFC1142,0x960003DA,0x7A001142,0x7A001142,0xFED80D22,0xFEEC0E1D,0xF41144,0xFEA40A68,0xFE7005D1,0xFE380249,0xFE280088,0xDA0000FA,0xFEB40D9D,0xFE9809D9,0x7FC1142,0x960003DA, -0x7FC1142,0x1A80E1D,0xFF8C08B4,0xFF6402B1,0xFF540000,0x7DFC0E1D,0xFF380544,0xFF000000,0xBFFC0E1D,0xFE000001,0xD4000E1D,0x7DFC0E1D,0xFF380544,0xFF000000,0xBFFC0E1D,0xFE000001,0xD4000E1D,0xBFFC0E1D,0xFE000001,0xD4000E1D,0xD4000E1D,0x7DFC0E1D,0xFF380544,0xFF000000,0xBFFC0E1D,0xFE000001,0xD4000E1D,0xBFFC0E1D,0xFE000001,0xD4000E1D,0xD4000E1D,0xBFFC0E1D, -0xFE000001,0xD4000E1D,0xD4000E1D,0xD4000E1D,0xFF940CD5,0x1C40E1D,0xFBA40D24,0xFF6C0AFA,0xFF3C08CA,0xFEC80451,0xFE5C0000,0xF60000B9,0xFB900CD1,0xFF640AD2,0xFE940079,0xD4000E1D,0xAFFC0E1D,0x4C0B48,0x4C0B48,0x4C0B48,0x4C0B48,0x4C0B48,0x4C0B48,0x4C0B48,0x4C0B48,0x4C0B48,0x4C0B48,0xE6000000,0xE6000000,0xE6000000,0xE6000000,0xE6000000, -0xE6000000,0x70000000,0x70000000,0x70000000,0x4A000001,0x700B48,0x700B48,0x700B48,0x700B48,0x700B48,0x700B48,0x58000449,0x58000449,0x58000449,0x4600022A,0xE40B48,0xE40B48,0xE40B48,0x320006F5,0x24000B4A,0xF63C0784,0x4C0B48,0x4C0B48,0xFE2C039D,0xFE140164,0xFE080029,0xFE080029,0xBA000000,0xFE100652,0xFE000340,0x86000050,0x58000449, -0xA00B48,}; -static const uint32_t g_etc1_to_bc7_m6_table240[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xA80001,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000, -0xFC0000,0x3F80000,0x3F80000,0x3F80000,0x54000000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0x3F80000,0x3F80000,0x3F80000,0x54000000,0x3F80000,0x3F80000,0x3F80000,0x54000000,0x54000000,0x2B40000,0xA80001,0xA80001,0x2C40000,0xD40000,0xE80000,0xE80000,0x11C0000,0x2C40000,0xD40000,0x1680000,0x3F80000, -0x1680000,0x1640001,0x1640001,0x1640001,0x1640001,0x19FC0000,0x19FC0000,0x19FC0000,0x8FF80000,0x8FF80000,0xB2000000,0x19FC0000,0x19FC0000,0x19FC0000,0x8FF80000,0x8FF80000,0xB2000000,0x8FF80000,0x8FF80000,0xB2000000,0xB2000000,0x19FC0000,0x19FC0000,0x19FC0000,0x8FF80000,0x8FF80000,0xB2000000,0x8FF80000,0x8FF80000,0xB2000000,0xB2000000,0x8FF80000, -0x8FF80000,0xB2000000,0xB2000000,0xB2000000,0x1A00000,0x77C0000,0x1640001,0x1E80000,0x45FC0000,0x71FC0000,0x7FFC0000,0x99F40000,0x1C00000,0x19FC0000,0x71FC0000,0xB2000000,0x71FC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1049057,0xFEDC690F,0xFEB849CB,0xFEB03EA2,0xFEB05459,0xFE882B7E,0xFE741C3D,0xFE642067,0xFA540E36,0xD8541965,0xFE986024,0xFE582E3D,0xFE441AA8,0xFE201919,0xFE040035,0xDA080BC4,0xFE0438AF,0xE0001798,0xCA0016F9,0xAC0038A8,0x3809057,0xFE385652,0xFE0C3EA2,0xFE08374D,0xEE001A39,0xD2001F29,0xEE00521C,0xD8002C1F,0xC0002732,0xA80044A9,0x45FC9057, -0xB80060F6,0xA60052A9,0x9600662D,0x80009059,0xFED870CD,0xFEEC8535,0xF90088B3,0xFE984F45,0xFE682CFD,0xFE2C0EC7,0xFE1C02D9,0xF6000125,0xFEB46F22,0xFE804A3D,0xFE001354,0xC0002732,0x15FC9057,0x15838AB,0xFF3026FB,0xFF14172E,0xFF081142,0xFF1822FB,0xFEE40CF6,0xFED003DA,0xFEB80D3B,0xF6A00102,0xD8A80B49,0x7FC38A8,0xFEB81D8D,0xFE901142,0xFE5815DA,0xFE080025, -0xD8280B48,0x85FC38A8,0xE0001798,0xCA0016F9,0xAC0038A8,0x7FC38A8,0xFEB81D8D,0xFE901142,0xFE5815DA,0xFE080025,0xD8280B48,0x85FC38A8,0xE0001798,0xCA0016F9,0xAC0038A8,0x85FC38A8,0xE0001798,0xCA0016F9,0xAC0038A8,0xAC0038A8,0xFF342F72,0xFF4C34BB,0xFF4C3533,0xFF082595,0xFEC818C5,0xFE7009A1,0xFE3C00FA,0xF6000125,0xFF243009,0xFEE8241E,0xFE001254,0xCA0016F9, -0x65FC38A8,0xB03EA2,0xB03EA2,0xB03EA2,0xB03EA2,0xFE741C3D,0xFE741C3D,0xFE741C3D,0xF4540E1E,0xF4540E1E,0xB2540E1D,0xFE441AA8,0xFE441AA8,0xFE441AA8,0xFE040035,0xFE040035,0xB81801B4,0xC4001142,0xC4001142,0xA0000355,0x82001144,0x1043EA2,0x1043EA2,0x1043EA2,0xE2001881,0xE2001881,0xA8000FB9,0xAE001F7B,0xAE001F7B,0x98000DAC,0x7A001824,0x7FC3EA2, -0x7FC3EA2,0x7E0026C9,0x6C002A35,0x56003EA5,0xFE902DE5,0xFCA83721,0xB03EA2,0xFE701F65,0xFE441079,0xFE24061D,0xFE1C02D9,0xEA000088,0xFE782CA5,0xFE501D0A,0xF6001225,0x98000DAC,0x1743EA2,0x1081142,0x1081142,0x1081142,0x1081142,0xFED003DA,0xFED003DA,0xFED003DA,0xE0A80001,0xE0A80001,0xB2A80001,0x1881142,0x1881142,0x1881142,0xFE080025,0xFE080025, -0xB24C0001,0x49F81142,0x49F81142,0xA0000355,0x82001144,0x1881142,0x1881142,0x1881142,0xFE080025,0xFE080025,0xB24C0001,0x49F81142,0x49F81142,0xA0000355,0x82001144,0x49F81142,0x49F81142,0xA0000355,0x82001144,0x82001144,0xFAEC0D75,0xF9000E72,0x1081142,0xFEC40ABA,0xFE98065D,0xFE5802F2,0xFE3C00FA,0xEA000088,0xFED00DD0,0xFEA40A40,0x19FC1142,0xA0000355, -0x19FC1142,0x1B00B4A,0xFF9806F5,0xFF70022A,0xFF640001,0x8DFC0B48,0xFF4C0449,0xFF1C0000,0xC7FC0B48,0xFE2C0000,0xD8000B48,0x8DFC0B48,0xFF4C0449,0xFF1C0000,0xC7FC0B48,0xFE2C0000,0xD8000B48,0xC7FC0B48,0xFE2C0000,0xD8000B48,0xD8000B48,0x8DFC0B48,0xFF4C0449,0xFF1C0000,0xC7FC0B48,0xFE2C0000,0xD8000B48,0xC7FC0B48,0xFE2C0000,0xD8000B48,0xD8000B48,0xC7FC0B48, -0xFE2C0000,0xD8000B48,0xD8000B48,0xD8000B48,0xFF940A68,0xFCC0B48,0xFFAC0A6D,0xFF8408D1,0xFF500709,0xFEF00382,0xFE880000,0xFA000029,0xFF980A22,0xFF7008B4,0xFEB80064,0xD8000B48,0xB9FC0B48,0x540E1D,0x540E1D,0x540E1D,0x540E1D,0x540E1D,0x540E1D,0x540E1D,0x540E1D,0x540E1D,0x540E1D,0xFE000001,0xFE000001,0xFE000001,0xFE000001,0xFE000001, -0xFE000001,0x7E000000,0x7E000000,0x7E000000,0x54000000,0x7C0E1D,0x7C0E1D,0x7C0E1D,0x7C0E1D,0x7C0E1D,0x7C0E1D,0x62000544,0x62000544,0x62000544,0x4C0002B1,0xFC0E1D,0xFC0E1D,0xFC0E1D,0x380008B4,0x2A000E1D,0xFC4809D9,0x540E1D,0x540E1D,0xFE2C0568,0xFE200290,0xFE1000B9,0xFE1000B9,0xD0000000,0xFE20088A,0xFE1404E2,0x9E000065,0x62000544, -0xB00E1D,}; -static const uint32_t g_etc1_to_bc7_m6_table241[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0xB80001,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000, -0x1140000,0xFF80000,0xFF80000,0xFF80000,0x5C000000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0x1140000,0xFF80000,0xFF80000,0xFF80000,0x5C000000,0xFF80000,0xFF80000,0xFF80000,0x5C000000,0x5C000000,0xAC40000,0xB80001,0xB80001,0xD80000,0xE80000,0xFC0000,0xFC0000,0x1380000,0xD80000,0xE80000,0x18C0000,0xFF80000, -0x18C0000,0x1740001,0x1740001,0x1740001,0x1740001,0x31FC0000,0x31FC0000,0x31FC0000,0x9BF80000,0x9BF80000,0xBA000000,0x31FC0000,0x31FC0000,0x31FC0000,0x9BF80000,0x9BF80000,0xBA000000,0x9BF80000,0x9BF80000,0xBA000000,0xBA000000,0x31FC0000,0x31FC0000,0x31FC0000,0x9BF80000,0x9BF80000,0xBA000000,0x9BF80000,0x9BF80000,0xBA000000,0xBA000000,0x9BF80000, -0x9BF80000,0xBA000000,0xBA000000,0xBA000000,0x1B40000,0xF8C0000,0x1740001,0x1FC0000,0x59FC0000,0x7FFC0000,0x8DFC0000,0xA3F80000,0x1D40000,0x31FC0000,0x7FFC0000,0xBA000000,0x7FFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x10C90DF,0xFEE86C1B,0xFEC04EF2,0xFEBC444E,0xFEC45607,0xFE942FCA,0xFE802159,0xFE702223,0xFE5C10E6,0xDC5C19ED,0xFEA46018,0xFE6430B1,0xFE501E14,0xFE2C18CD,0xFE1000F9,0xDE100A14,0xFE083418,0xE6001544,0xCA0012C9,0xB20033B4,0x38C90DF,0xFE385A92,0xFE1C444D,0xFE08381D,0xF4001BF1,0xD2001EA9,0xF4004EC8,0xDE002ADB,0xC60023E2,0xAE004085,0x4BFC90DF, -0xBE006262,0xAC0051A9,0x9C006419,0x840090E1,0xFEE47241,0xFB048637,0xFD08897B,0xFEAC5155,0xFE742F9E,0xFE381111,0xFE2804E2,0xFC000069,0xFED07092,0xFE904C7B,0xFE001374,0xC60023E2,0x1DF890DF,0x16433B3,0xFF44240E,0xFF201672,0xFF181142,0xFF241FE3,0xFEF00C7A,0xFEDC046A,0xFECC0B59,0xF6B40089,0xDCB80909,0x15FC33B3,0xFECC1C26,0xFEA81142,0xFE70139A,0xFE2C0059, -0xDC3C0908,0x8DFC33B3,0xE6001544,0xCA0012C9,0xB20033B4,0x15FC33B3,0xFECC1C26,0xFEA81142,0xFE70139A,0xFE2C0059,0xDC3C0908,0x8DFC33B3,0xE6001544,0xCA0012C9,0xB20033B4,0x8DFC33B3,0xE6001544,0xCA0012C9,0xB20033B4,0xB20033B4,0xFF3C2BBB,0xF75C3071,0xF96030C6,0xFF142266,0xFEDC1721,0xFE7C0965,0xFE64015A,0xFC000069,0xFF342BED,0xFF042108,0xFE141223,0xCA0012C9, -0x6FFC33B3,0xBC444E,0xBC444E,0xBC444E,0xBC444E,0xFE802159,0xFE802159,0xFE802159,0xFE5C10E6,0xFE5C10E6,0xBA5C10E5,0xFE501E14,0xFE501E14,0xFE501E14,0xFE1000F9,0xFE1000F9,0xC01C0278,0xD0001142,0xD0001142,0xAC0002D5,0x8A001144,0x114444D,0x114444D,0x114444D,0xE8001B35,0xE8001B35,0xB4001241,0xBA002153,0xBA002153,0xA2000E5A,0x86001904,0xFF8444D, -0xFF8444D,0x84002A65,0x72002D4D,0x5C00444D,0xFEA03348,0xFEAC3CB9,0xBC444E,0xFE78242D,0xFE541448,0xFE3408B9,0xFE2804E2,0xF8000050,0xFE7831F5,0xFE5021DA,0xFE001274,0xA2000E5A,0x18C444D,0x1181142,0x1181142,0x1181142,0x1181142,0xFEDC046A,0xFEDC046A,0xFEDC046A,0xE8B80001,0xE8B80001,0xBAB80001,0x1A01142,0x1A01142,0x1A01142,0xFE2C0059,0xFE2C0059, -0xBA5C0001,0x55F81142,0x55F81142,0xAC0002D5,0x8A001144,0x1A01142,0x1A01142,0x1A01142,0xFE2C0059,0xFE2C0059,0xBA5C0001,0x55F81142,0x55F81142,0xAC0002D5,0x8A001144,0x55F81142,0x55F81142,0xAC0002D5,0x8A001144,0x8A001144,0xFEF40D9D,0xFF0C0E7A,0x1181142,0xFEDC0AFD,0xFEAC06D9,0xFE740361,0xFE64015A,0xF8000050,0xF8EC0E1D,0xFEC40AB2,0x27FC1142,0xAC0002D5, -0x27FC1142,0x1B8090A,0xFFA40595,0xFF8801BA,0xFF740001,0x99FC0908,0xFF580369,0xFF340000,0xCDFC0908,0xFE600000,0xDC000908,0x99FC0908,0xFF580369,0xFF340000,0xCDFC0908,0xFE600000,0xDC000908,0xCDFC0908,0xFE600000,0xDC000908,0xDC000908,0x99FC0908,0xFF580369,0xFF340000,0xCDFC0908,0xFE600000,0xDC000908,0xCDFC0908,0xFE600000,0xDC000908,0xDC000908,0xCDFC0908, -0xFE600000,0xDC000908,0xDC000908,0xDC000908,0xFBAC0841,0x1D80908,0xFFAC087D,0xFF94070A,0xFF680595,0xFF0402C2,0xFEB00000,0xFE000000,0xFF980832,0xFF8806D1,0xFEDC0051,0xDC000908,0xBFFC0908,0x5C10E5,0x5C10E5,0x5C10E5,0x5C10E5,0x5C10E5,0x5C10E5,0x5C10E5,0x5C10E5,0x5C10E5,0x5C10E5,0xFE0C003D,0xFE0C003D,0xFE0C003D,0xFE0C003D,0xFE0C003D, -0xFE0C003D,0x8A000000,0x8A000000,0x8A000000,0x5C000000,0x8810E5,0x8810E5,0x8810E5,0x8810E5,0x8810E5,0x8810E5,0x6E000654,0x6E000654,0x6E000654,0x5200033D,0x11410E5,0x11410E5,0x11410E5,0x3E000A68,0x2E0010E5,0xFE4C0C35,0x5C10E5,0x5C10E5,0xFE3C0745,0xFE280401,0xFE180190,0xFE180190,0xE4000000,0xFE200AFA,0xFE1406B2,0xA6000074,0x6E000654, -0xC010E5,}; -static const uint32_t g_etc1_to_bc7_m6_table242[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0xC80001,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x12C0000, -0x12C0000,0x1BF80000,0x1BF80000,0x1BF80000,0x64000000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x12C0000,0x1BF80000,0x1BF80000,0x1BF80000,0x64000000,0x1BF80000,0x1BF80000,0x1BF80000,0x64000000,0x64000000,0xD80000,0xC80001,0xC80001,0x6E80000,0xFC0000,0x3100000,0x3100000,0x1540000,0x6E80000,0xFC0000,0x1AC0000,0x1BF80000, -0x1AC0000,0x1840001,0x1840001,0x1840001,0x1840001,0x49FC0000,0x49FC0000,0x49FC0000,0xA7F80000,0xA7F80000,0xC2000000,0x49FC0000,0x49FC0000,0x49FC0000,0xA7F80000,0xA7F80000,0xC2000000,0xA7F80000,0xA7F80000,0xC2000000,0xC2000000,0x49FC0000,0x49FC0000,0x49FC0000,0xA7F80000,0xA7F80000,0xC2000000,0xA7F80000,0xA7F80000,0xC2000000,0xC2000000,0xA7F80000, -0xA7F80000,0xC2000000,0xC2000000,0xC2000000,0x3C40000,0x1A00000,0x1840001,0x1FFC0000,0x6DFC0000,0x8FFC0000,0x9BFC0000,0xADFC0000,0x1E80000,0x49FC0000,0x8FFC0000,0xC2000000,0x8FFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x11491E7,0xFEE86F9B,0xFECC5432,0xFEC44A4E,0xFEC45887,0xFEA0348E,0xFE8C26E5,0xFE7C247F,0xFE64141E,0xE0641AF5,0xFEB060B4,0xFE7033B5,0xFE5C2208,0xFE381939,0xFE14026D,0xE21808E4,0xFE143058,0xEC001378,0xD0000EF9,0xB6002F14,0x39891E7,0xFE4C5F3E,0xFE2C4A4E,0xFE0839ED,0xFA001E61,0xD8001EBD,0xFA004BDC,0xE20029E8,0xCC00210A,0xAE003CA5,0x51FC91E7, -0xBE006462,0xB2005139,0x9C006279,0x880091E9,0xFEE47431,0xFF0C873F,0xFF0C8ACF,0xFEB45441,0xFE7C3299,0xFE48140A,0xFE3407A9,0xFE040045,0xFED4724F,0xFE984F5B,0xFE001494,0xCC00210A,0x23FC91E7,0x1702F13,0xFF502186,0xFF3815C2,0xFF281142,0xFF301D13,0xFF080C12,0xFEF40502,0xFEE409B1,0xFAC80035,0xE0C80709,0x29FC2F13,0xFEE41AA6,0xFEC01142,0xFE88119A,0xFE4000B9, -0xE0500708,0x97F82F13,0xEC001378,0xD0000EF9,0xB6002F14,0x29FC2F13,0xFEE41AA6,0xFEC01142,0xFE88119A,0xFE4000B9,0xE0500708,0x97F82F13,0xEC001378,0xD0000EF9,0xB6002F14,0x97F82F13,0xEC001378,0xD0000EF9,0xB6002F14,0xB6002F14,0xFF5027BA,0xFD682BE9,0xFF6C2C5A,0xFF30200A,0xFEE8158A,0xFEA4095B,0xFE7C01D4,0xFE040041,0xFF442838,0xFF041EA8,0xFE341206,0xD0000EF9, -0x7BFC2F13,0xC44A4E,0xC44A4E,0xC44A4E,0xC44A4E,0xFE8C26E5,0xFE8C26E5,0xFE8C26E5,0xFE64141E,0xFE64141E,0xC26413ED,0xFE5C2208,0xFE5C2208,0xFE5C2208,0xFE14026D,0xFE14026D,0xC8200364,0xDC001142,0xDC001142,0xB2000269,0x92001144,0x3244A4D,0x3244A4D,0x3244A4D,0xF4001E3D,0xF4001E3D,0xBA001509,0xC000234B,0xC000234B,0xA8000F1E,0x8C0019E8,0x17FC4A4D, -0x17FC4A4D,0x8A002E49,0x78003095,0x62004A4D,0xFEA03908,0xF8C0430A,0xC44A4E,0xFE802964,0xFE5818D5,0xFE3C0BE6,0xFE3407A9,0xFE040045,0xFE9437CD,0xFE5C2742,0xFE001394,0xA8000F1E,0x1A44A4D,0x1281142,0x1281142,0x1281142,0x1281142,0xFEF40502,0xFEF40502,0xFEF40502,0xF0C80001,0xF0C80001,0xC2C80001,0x1B81142,0x1B81142,0x1B81142,0xFE4000B9,0xFE4000B9, -0xC26C0001,0x61F81142,0x61F81142,0xB2000269,0x92001144,0x1B81142,0x1B81142,0x1B81142,0xFE4000B9,0xFE4000B9,0xC26C0001,0x61F81142,0x61F81142,0xB2000269,0x92001144,0x61F81142,0x61F81142,0xB2000269,0x92001144,0x92001144,0xFF100DC8,0xF9200EC9,0x1281142,0xFEDC0B8D,0xFEC0075D,0xFE7C0414,0xFE7C01D4,0xFE040041,0xFEF80E21,0xFEDC0AFD,0x37FC1142,0xB2000269, -0x37FC1142,0x1C0070A,0xFFB0045D,0xFF940152,0xFF840001,0xA5FC0708,0xFF7002A1,0xFF4C0000,0xD3FC0708,0xFE900000,0xE0000708,0xA5FC0708,0xFF7002A1,0xFF4C0000,0xD3FC0708,0xFE900000,0xE0000708,0xD3FC0708,0xFE900000,0xE0000708,0xE0000708,0xA5FC0708,0xFF7002A1,0xFF4C0000,0xD3FC0708,0xFE900000,0xE0000708,0xD3FC0708,0xFE900000,0xE0000708,0xE0000708,0xD3FC0708, -0xFE900000,0xE0000708,0xE0000708,0xE0000708,0xFFB40659,0x1E00708,0xF9C00692,0xFF94057A,0xFF7C045D,0xFF2C0232,0xFED80000,0xFE380000,0xFFAC065D,0xFF940550,0xFEFC0040,0xE0000708,0xC7FC0708,0x6413ED,0x6413ED,0x6413ED,0x6413ED,0x6413ED,0x6413ED,0x6413ED,0x6413ED,0x6413ED,0x6413ED,0xFE0C00CD,0xFE0C00CD,0xFE0C00CD,0xFE0C00CD,0xFE0C00CD, -0xFE0C00CD,0x96000000,0x96000000,0x96000000,0x64000000,0x9413ED,0x9413ED,0x9413ED,0x9413ED,0x9413ED,0x9413ED,0x7A000784,0x7A000784,0x7A000784,0x580003D9,0x12C13ED,0x12C13ED,0x12C13ED,0x44000C44,0x320013ED,0xFE4C0F05,0x6413ED,0x6413ED,0xFE3C0975,0xFE3405B4,0xFE1802D0,0xFE1802D0,0xF8000000,0xFC380D75,0xFE2008D1,0xB6000089,0x7A000784, -0xD413ED,}; -static const uint32_t g_etc1_to_bc7_m6_table243[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0xD80001,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000, -0x1440000,0x27F80000,0x27F80000,0x27F80000,0x6C000000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x1440000,0x27F80000,0x27F80000,0x27F80000,0x6C000000,0x27F80000,0x27F80000,0x27F80000,0x6C000000,0x6C000000,0xE80000,0xD80001,0xD80001,0x2FC0000,0x1100000,0x1280000,0x1280000,0x36C0000,0x2FC0000,0x1100000,0x1D00000,0x27F80000, -0x1D00000,0x1940001,0x1940001,0x1940001,0x1940001,0x63FC0000,0x63FC0000,0x63FC0000,0xB3F80000,0xB3F80000,0xCA000000,0x63FC0000,0x63FC0000,0x63FC0000,0xB3F80000,0xB3F80000,0xCA000000,0xB3F80000,0xB3F80000,0xCA000000,0xCA000000,0x63FC0000,0x63FC0000,0x63FC0000,0xB3F80000,0xB3F80000,0xCA000000,0xB3F80000,0xB3F80000,0xCA000000,0xCA000000,0xB3F80000, -0xB3F80000,0xCA000000,0xCA000000,0xCA000000,0x1D80000,0x1B00000,0x1940001,0x3DFC0000,0x81FC0000,0x9FF80000,0xA9F80000,0xB9F80000,0x1FC0000,0x63FC0000,0x9FF80000,0xCA000000,0x9FF80000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x11C936F,0xFEF4735F,0xFED859D2,0xFED050A2,0xFED05B93,0xFEA839C2,0xFE982CE1,0xFE88277B,0xFE7017E2,0xE46C1C7D,0xFEB061D4,0xFE7C3749,0xFE682684,0xFE381A89,0xFE200485,0xE6200834,0xFE142D78,0xF2001234,0xD6000B99,0xBC002AC8,0x3A4936F,0xFE646426,0xFE3C50A2,0xFE083CBD,0xFA0021B1,0xDE001F71,0xFA0049BC,0xE80029B8,0xCC001E9A,0xB4003921,0x57FC936F, -0xC40066F6,0xB8005159,0xA6006174,0x8C009371,0xFEF476C5,0xFF0C88FF,0xFF0C8D0F,0xFEC45747,0xFE883642,0xFE541781,0xFE3C0ADE,0xFE0C00EC,0xFED474DF,0xFE9852EB,0xFE101676,0xCC001E9A,0x2BFC936F,0x1782ACB,0xFF5C1F2E,0xFF401523,0xFF381142,0xFF441A61,0xFF140BC6,0xFF0005AA,0xFEF00849,0xFCD80009,0xE4D80549,0x37FC2AC8,0xFEFC1946,0xFED81142,0xFEA00FDA,0xFE640121, -0xE4640548,0x9DFC2AC8,0xF2001234,0xD6000B99,0xBC002AC8,0x37FC2AC8,0xFEFC1946,0xFED81142,0xFEA00FDA,0xFE640121,0xE4640548,0x9DFC2AC8,0xF2001234,0xD6000B99,0xBC002AC8,0x9DFC2AC8,0xF2001234,0xD6000B99,0xBC002AC8,0xBC002AC8,0xFF58247D,0xFF6C27E1,0xFF6C289A,0xFF301D7A,0xFEFC143A,0xFEB80925,0xFE900274,0xFE280088,0xFD5824CE,0xFF181C5E,0xFE5811EB,0xD6000B99, -0x83FC2AC8,0xD050A2,0xD050A2,0xD050A2,0xD050A2,0xFE982CE1,0xFE982CE1,0xFE982CE1,0xFE7017E2,0xFE7017E2,0xCA6C1735,0xFE682684,0xFE682684,0xFE682684,0xFE200485,0xFE200485,0xD2280474,0xE6001144,0xE6001144,0xBE0001F9,0x9A001144,0x13450A2,0x13450A2,0x13450A2,0xFA0021A1,0xFA0021A1,0xC6001821,0xCC002553,0xCC002553,0xAE001012,0x92001AE4,0x1FF850A2, -0x1FF850A2,0x90003275,0x7E00340D,0x660050A5,0xFEAC3F3D,0xFCC8492A,0xD050A2,0xFE902EFD,0xFE6C1DB1,0xFE3C0FE6,0xFE3C0ADE,0xFE0C00EC,0xFE943DAD,0xFE742C9D,0xFE101595,0xAE001012,0x1B850A2,0x1381142,0x1381142,0x1381142,0x1381142,0xFF0005AA,0xFF0005AA,0xFF0005AA,0xF8D80001,0xF8D80001,0xCAD80001,0x1D01142,0x1D01142,0x1D01142,0xFE640121,0xFE640121, -0xCA7C0001,0x6DF81142,0x6DF81142,0xBE0001F9,0x9A001144,0x1D01142,0x1D01142,0x1D01142,0xFE640121,0xFE640121,0xCA7C0001,0x6DF81142,0x6DF81142,0xBE0001F9,0x9A001144,0x6DF81142,0x6DF81142,0xBE0001F9,0x9A001144,0x9A001144,0xFF200E1D,0xFF2C0ED5,0x1381142,0xFEF80BD1,0xFED407E9,0xFEA804A0,0xFE900274,0xFE280088,0xFD100E72,0xFEE80B5A,0x45FC1142,0xBE0001F9, -0x45FC1142,0x1C8054A,0xFFB0034D,0xFFA000FA,0xFF940001,0xB1FC0548,0xFF7C01F9,0xFF640000,0xD9FC0548,0xFEC00000,0xE4000548,0xB1FC0548,0xFF7C01F9,0xFF640000,0xD9FC0548,0xFEC00000,0xE4000548,0xD9FC0548,0xFEC00000,0xE4000548,0xE4000548,0xB1FC0548,0xFF7C01F9,0xFF640000,0xD9FC0548,0xFEC00000,0xE4000548,0xD9FC0548,0xFEC00000,0xE4000548,0xE4000548,0xD9FC0548, -0xFEC00000,0xE4000548,0xE4000548,0xE4000548,0xFFB404D9,0x1E80548,0xFDC804E2,0xFFB00422,0xFF7C034D,0xFF480195,0xFEFC0000,0xFE740000,0xF5C004E2,0xFFA00401,0xFF200031,0xE4000548,0xCFFC0548,0x6C1735,0x6C1735,0x6C1735,0x6C1735,0x6C1735,0x6C1735,0x6C1735,0x6C1735,0x6C1735,0x6C1735,0xFE1801B1,0xFE1801B1,0xFE1801B1,0xFE1801B1,0xFE1801B1, -0xFE1801B1,0xA0000001,0xA0000001,0xA0000001,0x6C000000,0xA01735,0xA01735,0xA01735,0xA01735,0xA01735,0xA01735,0x800008C8,0x800008C8,0x800008C8,0x62000464,0x1441735,0x1441735,0x1441735,0x4A000E48,0x36001735,0xF8601200,0x6C1735,0x6C1735,0xFE4C0BE4,0xFE3407B4,0xFE240454,0xFE240454,0xFE040014,0xFE3C1031,0xFC300B48,0xC6000099,0x800008C8, -0xE41735,}; -static const uint32_t g_etc1_to_bc7_m6_table244[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0xEC0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000, -0x35C0000,0x33FC0000,0x33FC0000,0x33FC0000,0x74000001,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x35C0000,0x33FC0000,0x33FC0000,0x33FC0000,0x74000001,0x33FC0000,0x33FC0000,0x33FC0000,0x74000001,0x74000001,0xFC0000,0xEC0000,0xEC0000,0x5100000,0x1280000,0x1400000,0x1400000,0x18C0000,0x5100000,0x1280000,0x1F40000,0x33FC0000, -0x1F40000,0x1A80000,0x1A80000,0x1A80000,0x1A80000,0x7DFC0000,0x7DFC0000,0x7DFC0000,0xBFFC0000,0xBFFC0000,0xD2000001,0x7DFC0000,0x7DFC0000,0x7DFC0000,0xBFFC0000,0xBFFC0000,0xD2000001,0xBFFC0000,0xBFFC0000,0xD2000001,0xD2000001,0x7DFC0000,0x7DFC0000,0x7DFC0000,0xBFFC0000,0xBFFC0000,0xD2000001,0xBFFC0000,0xBFFC0000,0xD2000001,0xD2000001,0xBFFC0000, -0xBFFC0000,0xD2000001,0xD2000001,0xD2000001,0x1EC0000,0x1C40000,0x1A80000,0x5FFC0000,0x97FC0000,0xAFFC0000,0xB7FC0000,0xC5F80000,0x2DFC0000,0x7DFC0000,0xAFFC0000,0xD2000001,0xAFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x12495C1,0xFF00782D,0xFEE4609C,0xFEDC5828,0xFEDC5FA9,0xFEB44028,0xFEA43425,0xFE942B8D,0xFE7C1CD4,0xE8741ED1,0xFEC46399,0xFE8C3BF7,0xFE742C2E,0xFE4C1CB6,0xFE2C07B5,0xEE2407F6,0xFE202B26,0xF800116E,0xDC000859,0xC200265A,0x3B095C1,0xFE706A14,0xFE4C582B,0xFE14410D,0xFC00269A,0xE20020E5,0xFA00488A,0xEE002A26,0xD2001C6A,0xBA0035A3,0x5DFC95C1, -0xCA006AA4,0xBE00521D,0xA600608A,0x920095C3,0xFF007A21,0xFF0C8C29,0xF9208FF4,0xFED45B79,0xFE903AF9,0xFE5C1BF9,0xFE4C0F71,0xFE140288,0xFEF07883,0xFEB0571E,0xFE141976,0xD2001C6A,0x33FC95C1,0x184265D,0xFF681CC8,0xFF58147D,0xFF481144,0xFF5017C9,0xFF2C0B94,0xFF180668,0xFF08070D,0xFEEC0004,0xE8EC039D,0x49FC265A,0xFF1417D8,0xFEF01144,0xFEB80E36,0xFE8801BD, -0xE87C039D,0xA7F8265A,0xF800116E,0xDC000859,0xC200265A,0x49FC265A,0xFF1417D8,0xFEF01144,0xFEB80E36,0xFE8801BD,0xE87C039D,0xA7F8265A,0xF800116E,0xDC000859,0xC200265A,0xA7F8265A,0xF800116E,0xDC000859,0xC200265A,0xC200265A,0xFF742111,0xF98023FD,0xFB842485,0xFF401AEA,0xFF1012F4,0xFEC80956,0xFEA8031D,0xFE480112,0xFF5C2106,0xFF401A06,0xFE7C11D2,0xDC000859, -0x8FFC265A,0xDC5828,0xDC5828,0xDC5828,0xDC5828,0xFEA43425,0xFEA43425,0xFEA43425,0xFE7C1CD4,0xFE7C1CD4,0xD2741B35,0xFE742C2E,0xFE742C2E,0xFE742C2E,0xFE2C07B5,0xFE2C07B5,0xDE2805CA,0xF4001142,0xF4001142,0xCA000195,0xA4001142,0x3445828,0x3445828,0x3445828,0xFA00268D,0xFA00268D,0xCC001BE3,0xD80027C5,0xD80027C5,0xBA00113A,0x9E001C1A,0x27FC5828, -0x27FC5828,0x96003783,0x84003833,0x6C00582B,0xFEBC4689,0xFECC50B4,0xDC5828,0xFE9835F1,0xFE6C2405,0xFE5414FE,0xFE4C0F71,0xFE140288,0xFEA04502,0xFE7433A5,0xFE141895,0xBA00113A,0x1D45828,0x1481144,0x1481144,0x1481144,0x1481144,0xFF180668,0xFF180668,0xFF180668,0xFEEC0004,0xFEEC0004,0xD2EC0001,0x3E81142,0x3E81142,0x3E81142,0xFE8801BD,0xFE8801BD, -0xD2900001,0x79FC1142,0x79FC1142,0xCA000195,0xA4001142,0x3E81142,0x3E81142,0x3E81142,0xFE8801BD,0xFE8801BD,0xD2900001,0x79FC1142,0x79FC1142,0xCA000195,0xA4001142,0x79FC1142,0x79FC1142,0xCA000195,0xA4001142,0xA4001142,0xFB340E74,0xFB440F20,0x1481144,0xFF180C35,0xFEF008A2,0xFEB80562,0xFEA8031D,0xFE480112,0xF9280EC9,0xFF040BEA,0x57FC1142,0xCA000195, -0x57FC1142,0x1D4039D,0xFFC4022D,0xFFAC00AD,0xFFA80000,0xBDFC039D,0xFF940152,0xFF7C0001,0xDFF8039D,0xFEF80000,0xE800039D,0xBDFC039D,0xFF940152,0xFF7C0001,0xDFF8039D,0xFEF80000,0xE800039D,0xDFF8039D,0xFEF80000,0xE800039D,0xE800039D,0xBDFC039D,0xFF940152,0xFF7C0001,0xDFF8039D,0xFEF80000,0xE800039D,0xDFF8039D,0xFEF80000,0xE800039D,0xE800039D,0xDFF8039D, -0xFEF80000,0xE800039D,0xE800039D,0xE800039D,0xFDCC0349,0x1F0039D,0xFFCC0355,0xFDBC02D4,0xFFA80242,0xFF5C0119,0xFF280000,0xFEB80000,0xF7CC0349,0xFFAC02C5,0xFF50001D,0xE800039D,0xD7FC039D,0x741B34,0x741B34,0x741B34,0x741B34,0x741B34,0x741B34,0x741B34,0x741B34,0x741B34,0x741B34,0xFE240328,0xFE240328,0xFE240328,0xFE240328,0xFE240328, -0xFE240328,0xAE000000,0xAE000000,0xAE000000,0x74000001,0xB01B32,0xB01B32,0xB01B32,0xB01B32,0xB01B32,0xB01B32,0x8C000A55,0x8C000A55,0x8C000A55,0x6800052D,0x1641B32,0x1641B32,0x1641B32,0x500010BD,0x3A001B32,0xFC681589,0x741B34,0x741B34,0xFE4C0F05,0xFE3C0A68,0xFE340665,0xFE340665,0xFE0C009D,0xFE3C13D0,0xFE340E29,0xD60000B9,0x8C000A55, -0xF81B32,}; -static const uint32_t g_etc1_to_bc7_m6_table245[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0xFC0000,0x3740000,0x3740000,0x3740000,0x3740000,0x3740000, -0x3740000,0x3FFC0000,0x3FFC0000,0x3FFC0000,0x7C000001,0x3740000,0x3740000,0x3740000,0x3740000,0x3740000,0x3740000,0x3FFC0000,0x3FFC0000,0x3FFC0000,0x7C000001,0x3FFC0000,0x3FFC0000,0x3FFC0000,0x7C000001,0x7C000001,0x10C0000,0xFC0000,0xFC0000,0x1240000,0x13C0000,0x3540000,0x3540000,0x1A80000,0x1240000,0x13C0000,0xDFC0000,0x3FFC0000, -0xDFC0000,0x1B80000,0x1B80000,0x1B80000,0x1B80000,0x95FC0000,0x95FC0000,0x95FC0000,0xCBFC0000,0xCBFC0000,0xDA000001,0x95FC0000,0x95FC0000,0x95FC0000,0xCBFC0000,0xCBFC0000,0xDA000001,0xCBFC0000,0xCBFC0000,0xDA000001,0xDA000001,0x95FC0000,0x95FC0000,0x95FC0000,0xCBFC0000,0xCBFC0000,0xDA000001,0xCBFC0000,0xCBFC0000,0xDA000001,0xDA000001,0xCBFC0000, -0xCBFC0000,0xDA000001,0xDA000001,0xDA000001,0x9FC0000,0x3D40000,0x1B80000,0x7DFC0000,0xABFC0000,0xBFF80000,0xC5FC0000,0xD1F40000,0x55FC0000,0x95FC0000,0xBFF80000,0xDA000001,0xBFF80000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x12C9859,0xFF0C7CE9,0xFEF06704,0xFEE85F34,0xFEE863E1,0xFEC44648,0xFEB03B0D,0xFE942FCD,0xFE8821D8,0xEC7C2169,0xFEC465F9,0xFE9840B3,0xFE8031CA,0xFE581F56,0xFE380B41,0xF22C0846,0xFE2C29FE,0xFE041148,0xE6000602,0xC80022C6,0x3BC9859,0xFE7C6FCC,0xFE5C5F33,0xFE204581,0xFE082BC5,0xE80022E1,0xFE044866,0xEE002B26,0xD8001AEE,0xC00032E7,0x63FC9859, -0xD0006E98,0xBE00534D,0xAC006016,0x9600985B,0xFF007DA1,0xFB248F01,0xFD2892C4,0xFED45F89,0xFEA03FAE,0xFE6820C1,0xFE501419,0xFE2004B9,0xFEF87BA9,0xFEC45BAD,0xFE201CEC,0xD8001AEE,0x3BFC9859,0x19022C5,0xFF741AD8,0xFF6413E5,0xFF581144,0xFF5C15CD,0xFF380B84,0xFF300728,0xFF14063D,0xFEFC0031,0xECFC0265,0x59FC22C5,0xFF2016B4,0xFF081144,0xFED80D2D,0xFEA0025D, -0xEC900265,0xADFC22C5,0xFE081142,0xE6000602,0xC80022C6,0x59FC22C5,0xFF2016B4,0xFF081144,0xFED80D2D,0xFEA0025D,0xEC900265,0xADFC22C5,0xFE081142,0xE6000602,0xC80022C6,0xADFC22C5,0xFE081142,0xE6000602,0xC80022C6,0xC80022C6,0xFF781E0E,0xFF8C207D,0xFF8C211D,0xFF5C18D9,0xFF2411F8,0xFEEC0969,0xFED003E8,0xFE64019A,0xFD741E52,0xFF4817E4,0xFEA011BB,0xE6000602, -0x99FC22C5,0xE85F34,0xE85F34,0xE85F34,0xE85F34,0xFEB03B0D,0xFEB03B0D,0xFEB03B0D,0xFE8821D8,0xFE8821D8,0xDA7C1F05,0xFE8031CA,0xFE8031CA,0xFE8031CA,0xFE380B41,0xFE380B41,0xE62C0726,0xFE041148,0xFE041148,0xD000013D,0xAC001142,0x1545F33,0x1545F33,0x1545F33,0xFE082BC5,0xFE082BC5,0xD8001F8B,0xE20029C6,0xE20029C6,0xC000126A,0xA2001D03,0x2FFC5F33, -0x2FFC5F33,0x9C003C4B,0x8A003C13,0x72005F33,0xFECC4D68,0xF8E0580D,0xE85F34,0xFEAC3CD5,0xFE8029D5,0xFE5C1A05,0xFE501419,0xFE2004B9,0xFEB44BB9,0xFE903A4A,0xFE201C0B,0xC000126A,0x1E85F33,0x1581144,0x1581144,0x1581144,0x1581144,0xFF300728,0xFF300728,0xFF300728,0xFEFC0031,0xFEFC0031,0xDAFC0001,0x7FC1142,0x7FC1142,0x7FC1142,0xFEA0025D,0xFEA0025D, -0xDAA00001,0x85FC1142,0x85FC1142,0xD000013D,0xAC001142,0x7FC1142,0x7FC1142,0x7FC1142,0xFEA0025D,0xFEA0025D,0xDAA00001,0x85FC1142,0x85FC1142,0xD000013D,0xAC001142,0x85FC1142,0x85FC1142,0xD000013D,0xAC001142,0xAC001142,0xFF3C0EA4,0xFF4C0F40,0x1581144,0xFF240CB2,0xFF04093A,0xFEE40632,0xFED003E8,0xFE64019A,0xFF340ECD,0xFF180C3D,0x65FC1142,0xD000013D, -0x65FC1142,0x1DC0265,0xFFD00171,0xFFC00074,0xFFB80000,0xC9FC0265,0xFFAC00DA,0xFF940001,0xE5F80265,0xFF280000,0xEC000265,0xC9FC0265,0xFFAC00DA,0xFF940001,0xE5F80265,0xFF280000,0xEC000265,0xE5F80265,0xFF280000,0xEC000265,0xEC000265,0xC9FC0265,0xFFAC00DA,0xFF940001,0xE5F80265,0xFF280000,0xEC000265,0xE5F80265,0xFF280000,0xEC000265,0xEC000265,0xE5F80265, -0xFF280000,0xEC000265,0xEC000265,0xEC000265,0xFFD00225,0x1FC0265,0xF5D80244,0xFFC001D4,0xFFA80172,0xFF8400B4,0xFF500000,0xFEF40000,0xFBD40221,0xFFC001C4,0xFF700014,0xEC000265,0xDFF80265,0x7C1F04,0x7C1F04,0x7C1F04,0x7C1F04,0x7C1F04,0x7C1F04,0x7C1F04,0x7C1F04,0x7C1F04,0x7C1F04,0xFE2404C8,0xFE2404C8,0xFE2404C8,0xFE2404C8,0xFE2404C8, -0xFE2404C8,0xBA000000,0xBA000000,0xBA000000,0x7C000001,0xBC1F02,0xBC1F02,0xBC1F02,0xBC1F02,0xBC1F02,0xBC1F02,0x98000BD5,0x98000BD5,0x98000BD5,0x6E0005F1,0x17C1F02,0x17C1F02,0x17C1F02,0x56001315,0x3E001F02,0xFE6C18FD,0x7C1F04,0x7C1F04,0xFE581220,0xFE500D24,0xFE3C0894,0xFE3C0894,0xFE140164,0xFC541735,0xFE341139,0xE60000CD,0x98000BD5, -0x10C1F02,}; -static const uint32_t g_etc1_to_bc7_m6_table246[] = { -0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1, -0x1,0x1,0x1,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x10C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000, -0x38C0000,0x4BFC0000,0x4BFC0000,0x4BFC0000,0x84000001,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x38C0000,0x4BFC0000,0x4BFC0000,0x4BFC0000,0x84000001,0x4BFC0000,0x4BFC0000,0x4BFC0000,0x84000001,0x84000001,0x71C0000,0x10C0000,0x10C0000,0x1380000,0x1500000,0x16C0000,0x16C0000,0x3C00000,0x1380000,0x1500000,0x1DF80000,0x4BFC0000, -0x1DF80000,0x1C80000,0x1C80000,0x1C80000,0x1C80000,0xAFFC0000,0xAFFC0000,0xAFFC0000,0xD7FC0000,0xD7FC0000,0xE2000001,0xAFFC0000,0xAFFC0000,0xAFFC0000,0xD7FC0000,0xD7FC0000,0xE2000001,0xD7FC0000,0xD7FC0000,0xE2000001,0xE2000001,0xAFFC0000,0xAFFC0000,0xAFFC0000,0xD7FC0000,0xD7FC0000,0xE2000001,0xD7FC0000,0xD7FC0000,0xE2000001,0xE2000001,0xD7FC0000, -0xD7FC0000,0xE2000001,0xE2000001,0xE2000001,0x43FC0000,0xBE40000,0x1C80000,0x9BFC0000,0xBDFC0000,0xCDFC0000,0xD3F80000,0xDBF80000,0x7DFC0000,0xAFFC0000,0xCDFC0000,0xE2000001,0xCDFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1349B71,0xFF18821D,0xFEFC6DCC,0xFEF06694,0xFEE868A1,0xFEC84CE6,0xFEC44258,0xFEA03499,0xFE942774,0xF0842481,0xFED0690D,0xFE9845F3,0xFE8C37EE,0xFE6422AE,0xFE400F8D,0xF6340916,0xFE3829B6,0xFE0811D2,0xEA000401,0xCC001F86,0x1C89B71,0xFE8875EC,0xFE706693,0xFE2C4A85,0xFE0831B5,0xEE00257D,0xFE0848EA,0xF4002CA6,0xE20019DE,0xC600308B,0x69FC9B71, -0xD6007334,0xC40054F9,0xB200601A,0x9A009B73,0xFF0C818D,0xFF2C9219,0xFF2C962C,0xFED46499,0xFEA84503,0xFE7025F5,0xFE5C1968,0xFE2807C2,0xFEF87F79,0xFED06046,0xFE3020B2,0xE20019DE,0x41FC9B71,0x19C1F85,0xFF801918,0xFF701365,0xFF681144,0xFF681419,0xFF4C0B8A,0xFF4407D9,0xFF2C05B5,0xFF140089,0xF10C016D,0x6BFC1F85,0xFF3815A4,0xFF201144,0xFEF00C35,0xFEB8031D, -0xF0A4016D,0xB7F81F85,0xFE381142,0xEA000401,0xCC001F86,0x6BFC1F85,0xFF3815A4,0xFF201144,0xFEF00C35,0xFEB8031D,0xF0A4016D,0xB7F81F85,0xFE381142,0xEA000401,0xCC001F86,0xB7F81F85,0xFE381142,0xEA000401,0xCC001F86,0xCC001F86,0xFF801BA1,0xFF8C1D9D,0xF5981E64,0xFF5C1729,0xFF401123,0xFEFC0989,0xFEE404A5,0xFE8C025D,0xFF781B86,0xFF5C1652,0xFED011A5,0xEA000401, -0xA3FC1F85,0xF06694,0xF06694,0xF06694,0xF06694,0xFEC44258,0xFEC44258,0xFEC44258,0xFE942774,0xFE942774,0xE2842315,0xFE8C37EE,0xFE8C37EE,0xFE8C37EE,0xFE400F8D,0xFE400F8D,0xF03408A6,0xFE0811D2,0xFE0811D2,0xDC0000F5,0xB4001142,0x1686693,0x1686693,0x1686693,0xFE0831B5,0xFE0831B5,0xDE002373,0xEE002C26,0xEE002C26,0xCC0013AA,0xA8001E33,0x39F86693, -0x39F86693,0xA600416D,0x90004023,0x78006693,0xFECC54C8,0xFEEC5F3D,0xF06694,0xFEB443E1,0xFE943075,0xFE681FB0,0xFE5C1968,0xFE2807C2,0xFEB45309,0xFE98413E,0xFE301FEE,0xCC0013AA,0x3FC6693,0x1681144,0x1681144,0x1681144,0x1681144,0xFF4407D9,0xFF4407D9,0xFF4407D9,0xFF140089,0xFF140089,0xE30C0001,0x1FFC1142,0x1FFC1142,0x1FFC1142,0xFEB8031D,0xFEB8031D, -0xE2B00001,0x91FC1142,0x91FC1142,0xDC0000F5,0xB4001142,0x1FFC1142,0x1FFC1142,0x1FFC1142,0xFEB8031D,0xFEB8031D,0xE2B00001,0x91FC1142,0x91FC1142,0xDC0000F5,0xB4001142,0x91FC1142,0x91FC1142,0xDC0000F5,0xB4001142,0xB4001142,0xFF580EC9,0xFB640F79,0x1681144,0xFF440D22,0xFF1809DA,0xFEF406DA,0xFEE404A5,0xFE8C025D,0xFD4C0F20,0xFF240CC8,0x75FC1142,0xDC0000F5, -0x75FC1142,0x1E4016D,0xFFDC00DD,0xFFCC0044,0xFFC80000,0xD5FC016D,0xFFB80082,0xFFAC0001,0xEBF8016D,0xFF580000,0xF000016D,0xD5FC016D,0xFFB80082,0xFFAC0001,0xEBF8016D,0xFF580000,0xF000016D,0xEBF8016D,0xFF580000,0xF000016D,0xF000016D,0xD5FC016D,0xFFB80082,0xFFAC0001,0xEBF8016D,0xFF580000,0xF000016D,0xEBF8016D,0xFF580000,0xF000016D,0xF000016D,0xEBF8016D, -0xFF580000,0xF000016D,0xF000016D,0xF000016D,0xF9E00152,0x27FC016D,0xF9E00154,0xFFD40120,0xFFBC00DA,0xFF98006D,0xFF780000,0xFF300000,0xFFDC0139,0xFFCC010D,0xFF90000A,0xF000016D,0xE5FC016D,0x842314,0x842314,0x842314,0x842314,0x842314,0x842314,0x842314,0x842314,0x842314,0x842314,0xFE3006C4,0xFE3006C4,0xFE3006C4,0xFE3006C4,0xFE3006C4, -0xFE3006C4,0xC6000000,0xC6000000,0xC6000000,0x84000001,0x2C42312,0x2C42312,0x2C42312,0x2C42312,0x2C42312,0x2C42312,0xA2000D39,0xA2000D39,0xA2000D39,0x7A0006B9,0x1942312,0x1942312,0x1942312,0x5C001595,0x42002312,0xFE6C1CED,0x842314,0x842314,0xFE681589,0xFE501004,0xFE3C0B14,0xFE3C0B14,0xFE1802A1,0xFE581AC1,0xFE3C14B4,0xF60000EA,0xA2000D39, -0x11C2312,}; -static const uint32_t g_etc1_to_bc7_m6_table247[] = { -0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x100000,0x200000, -0x200000,0x200000,0x200000,0x4000001,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0xC0000,0x100000,0x180000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x11C0000,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x3A40000, -0x3A40000,0x57FC0000,0x57FC0000,0x57FC0000,0x8C000001,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x3A40000,0x57FC0000,0x57FC0000,0x57FC0000,0x8C000001,0x57FC0000,0x57FC0000,0x57FC0000,0x8C000001,0x8C000001,0xF2C0000,0x11C0000,0x11C0000,0x5480000,0x1640000,0x3800000,0x3800000,0x1DC0000,0x5480000,0x1640000,0x2BFC0000,0x57FC0000, -0x2BFC0000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0xC7FC0000,0xC7FC0000,0xC7FC0000,0xE3FC0000,0xE3FC0000,0xEA000001,0xC7FC0000,0xC7FC0000,0xC7FC0000,0xE3FC0000,0xE3FC0000,0xEA000001,0xE3FC0000,0xE3FC0000,0xEA000001,0xEA000001,0xC7FC0000,0xC7FC0000,0xC7FC0000,0xE3FC0000,0xE3FC0000,0xEA000001,0xE3FC0000,0xE3FC0000,0xEA000001,0xEA000001,0xE3FC0000, -0xE3FC0000,0xEA000001,0xEA000001,0xEA000001,0x7BFC0000,0x1F80000,0x1D80000,0xB9FC0000,0xD1FC0000,0xDDF80000,0xDFFC0000,0xE5FC0000,0xA3FC0000,0xC7FC0000,0xDDF80000,0xEA000001,0xDDF80000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x14097B9,0xFF248115,0xFF086EE8,0xFF006878,0xFF006825,0xFEDC4F04,0xFED0456C,0xFEAC3621,0xFEA02A0C,0xF49424D5,0xFEDC67C5,0xFEB047DF,0xFE983B2E,0xFE702442,0xFE5812B1,0xFA3C0922,0xFE38290A,0xFE2012C6,0xEE040282,0xD20C1C9A,0x1DC97B9,0xFEA0761C,0xFE846878,0xFE384B75,0xFE1434A9,0xF400251D,0xFE084686,0xFA002B02,0xE20015EA,0xCC002B8F,0x73F897B9, -0xDC0071AC,0xCA0050F9,0xB8005AEA,0x9E0097BB,0xFF207F85,0xFF2C8F25,0xF73C935D,0xFEF064CD,0xFEB446BE,0xFE7C292B,0xFE681CD5,0xFE400A4D,0xFF107DEB,0xFED0602E,0xFE3C238C,0xE20015EA,0x4DFC97B9,0x1A41C9D,0xFF8C1788,0xFF7C12FD,0xFF781144,0xFF7412AD,0xFF5C0BA2,0xFF500895,0xFF38056D,0xFF2C0121,0xF51C00B5,0x7BFC1C9A,0xFF4C14D3,0xFF381144,0xFF080B7D,0xFED8040D, -0xF4B800B5,0xBFF81C9A,0xFE6C1142,0xEC000266,0xD2001C9A,0x7BFC1C9A,0xFF4C14D3,0xFF381144,0xFF080B7D,0xFED8040D,0xF4B800B5,0xBFF81C9A,0xFE6C1142,0xEC000266,0xD2001C9A,0xBFF81C9A,0xFE6C1142,0xEC000266,0xD2001C9A,0xD2001C9A,0xFF901929,0xF9A01B05,0xFBA41B98,0xFF781595,0xFF481062,0xFF200A13,0xFEFC0585,0xFEA8031D,0xFF88194D,0xFF6814FA,0xFEE41193,0xEC000266, -0xADFC1C9A,0x1006878,0x1006878,0x1006878,0x1006878,0xFED0456C,0xFED0456C,0xFED0456C,0xFEA02A0C,0xFEA02A0C,0xEA942421,0xFE983B2E,0xFE983B2E,0xFE983B2E,0xFE5812B1,0xFE5812B1,0xF640090E,0xFE2012C6,0xFE2012C6,0xE20400DA,0xBC0C1142,0x17C6878,0x17C6878,0x17C6878,0xFE1434A9,0xFE1434A9,0xE8002431,0xFA002AF2,0xFA002AF2,0xD20011F2,0xB4001CBB,0x43F86878, -0x43F86878,0xB2004159,0x9C003F8B,0x7E00687B,0xFEE85775,0xFEEC61A9,0x1006878,0xFEC446F4,0xFE943411,0xFE782328,0xFE681CD5,0xFE400A4D,0xFED055B3,0xFEB04469,0xFE3C22C8,0xD20011F2,0x11FC6878,0x1781144,0x1781144,0x1781144,0x1781144,0xFF500895,0xFF500895,0xFF500895,0xFF2C0121,0xFF2C0121,0xEB1C0001,0x37FC1142,0x37FC1142,0x37FC1142,0xFED8040D,0xFED8040D, -0xEAC00001,0x9DFC1142,0x9DFC1142,0xE40000C1,0xBC001142,0x37FC1142,0x37FC1142,0x37FC1142,0xFED8040D,0xFED8040D,0xEAC00001,0x9DFC1142,0x9DFC1142,0xE40000C1,0xBC001142,0x9DFC1142,0x9DFC1142,0xE40000C1,0xBC001142,0xBC001142,0xFB6C0F20,0xFF6C0FA1,0x1781144,0xFD580D75,0xFF2C0A82,0xFEFC07D5,0xFEFC0585,0xFEA8031D,0xFF500F68,0xFF480D24,0x83FC1142,0xE40000C1, -0x83FC1142,0x1EC00B5,0xFFE80071,0xFFD80024,0xFFD80000,0xE3FC00B5,0xFFCC0044,0xFFC40000,0xF1F800B5,0xFF880000,0xF40000B5,0xE3FC00B5,0xFFCC0044,0xFFC40000,0xF1F800B5,0xFF880000,0xF40000B5,0xF1F800B5,0xFF880000,0xF40000B5,0xF40000B5,0xE3FC00B5,0xFFCC0044,0xFFC40000,0xF1F800B5,0xFF880000,0xF40000B5,0xF1F800B5,0xFF880000,0xF40000B5,0xF40000B5,0xF1F800B5, -0xFF880000,0xF40000B5,0xF40000B5,0xF40000B5,0xFDE800A2,0x67FC00B5,0xFDE800A4,0xFBE40091,0xFFD0006A,0xFFC00034,0xFFA00000,0xFF6C0000,0xFBE800A2,0xFFD80088,0xFFB00005,0xF40000B5,0xEDFC00B5,0x942420,0x942420,0x942420,0x942420,0x942420,0x942420,0x942420,0x942420,0x942420,0x942420,0xFE4407D9,0xFE4407D9,0xFE4407D9,0xFE4407D9,0xFE4407D9, -0xFE4407D9,0xD00C0000,0xD00C0000,0xD00C0000,0x8C0C0001,0xDC2420,0xDC2420,0xDC2420,0xDC2420,0xDC2420,0xDC2420,0xAE000C69,0xAE000C69,0xAE000C69,0x800005B9,0x1BC2420,0x1BC2420,0x1BC2420,0x66001572,0x48002422,0xFC881E08,0x942420,0x942420,0xFE6816CD,0xFE641154,0xFE4C0C44,0xFE4C0C44,0xFE2C0371,0xFA6C1C20,0xFE5015C5,0xFE000095,0xAE000C69, -0x1382420,}; -static const uint32_t g_etc1_to_bc7_m6_table248[] = { -0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x2C0000,0x580000, -0x580000,0x580000,0x580000,0xE000000,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x1C0001,0x200000,0x200000,0x200000,0x2C0000,0x3C0000,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x12C0001,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000, -0x1C00000,0x65F80000,0x65F80000,0x65F80000,0x96000000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x1C00000,0x65F80000,0x65F80000,0x65F80000,0x96000000,0x65F80000,0x65F80000,0x65F80000,0x96000000,0x96000000,0x9400000,0x12C0001,0x12C0001,0x1600000,0x5780000,0x3980000,0x3980000,0x1FC0000,0x1600000,0x5780000,0x3DF80000,0x65F80000, -0x3DF80000,0x1E80001,0x1E80001,0x1E80001,0x1E80001,0xE3FC0000,0xE3FC0000,0xE3FC0000,0xF1F80000,0xF1F80000,0xF4000000,0xE3FC0000,0xE3FC0000,0xE3FC0000,0xF1F80000,0xF1F80000,0xF4000000,0xF1F80000,0xF1F80000,0xF4000000,0xF4000000,0xE3FC0000,0xE3FC0000,0xE3FC0000,0xF1F80000,0xF1F80000,0xF4000000,0xF1F80000,0xF1F80000,0xF4000000,0xF4000000,0xF1F80000, -0xF1F80000,0xF4000000,0xF4000000,0xF4000000,0xBBFC0000,0x67FC0000,0x1E80001,0xDBFC0000,0xE7FC0000,0xEDFC0000,0xEFFC0000,0xF3F40000,0xD1FC0000,0xE3FC0000,0xEDFC0000,0xF4000000,0xEDFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x14C9144,0xFF307DB4,0xFF206E1B,0xFF10687B,0xFF0C65C4,0xFEE84FCF,0xFEDC4763,0xFEC036E6,0xFEB82BFB,0xF8A42456,0xFEF464EC,0xFEC448BB,0xFEB03DB5,0xFE88258F,0xFE7015E6,0xFE500915,0xFE582844,0xFE38142D,0xF4100195,0xD81C19B9,0x1EC9144,0xFEB87431,0xFEA06878,0xFE584B5A,0xFE2C36E6,0xF8042454,0xFE084419,0xFA00296D,0xEE001109,0xD2002588,0x7BFC9144, -0xE0006E48,0xD0004AD6,0xBE005385,0xA6009144,0xFF207B7A,0xFD488994,0xFD488D3C,0xFF0062DC,0xFECC477A,0xFE982BD6,0xFE902035,0xFE580D46,0xFF1079CE,0xFEDC5F8A,0xFE582697,0xEE001109,0x59FC9144,0x1B019BB,0xFFA415E6,0xFF941283,0xFF8C1142,0xFF8C1155,0xFF740BEA,0xFF680989,0xFF4C0593,0xFF3801FD,0xF92C0035,0x8DFC19B8,0xFF6413E5,0xFF541142,0xFF200AFD,0xFEFC0521, -0xFACC0034,0xC7FC19B8,0xFEA01142,0xF2000124,0xD80019B8,0x8DFC19B8,0xFF6413E5,0xFF541142,0xFF200AFD,0xFEFC0521,0xFACC0034,0xC7FC19B8,0xFEA01142,0xF2000124,0xD80019B8,0xC7FC19B8,0xFEA01142,0xF2000124,0xD80019B8,0xD80019B8,0xFFA016ED,0xFFAC1835,0xFFAC18E6,0xFF881403,0xFF5C0FD6,0xFF380A61,0xFF2006AD,0xFED00448,0xFF941715,0xFF841392,0xFF101175,0xF2000124, -0xB9FC19B8,0x110687B,0x110687B,0x110687B,0x110687B,0xFEDC4763,0xFEDC4763,0xFEDC4763,0xFEB82BFB,0xFEB82BFB,0xF4A42422,0xFEB03DB5,0xFEB03DB5,0xFEB03DB5,0xFE7015E6,0xFE7015E6,0xFE500915,0xFE38142D,0xFE38142D,0xEC1800D9,0xC41C1145,0x1986878,0x1986878,0x1986878,0xFE2C36E6,0xFE2C36E6,0xF4082420,0xFA00295D,0xFA00295D,0xE2000EE1,0xC0001A74,0x51F86878, -0x51F86878,0xB8003F52,0xA6003D85,0x88006878,0xFEF458B2,0xFF0C61E3,0x110687B,0xFED448E2,0xFEB0370B,0xFE9026C5,0xFE902035,0xFE580D46,0xFED85741,0xFEBC46B5,0xFE5825EE,0xE2000EE1,0x21FC6878,0x18C1142,0x18C1142,0x18C1142,0x18C1142,0xFF680989,0xFF680989,0xFF680989,0xFF3801FD,0xFF3801FD,0xF52C0001,0x53FC1142,0x53FC1142,0x53FC1142,0xFEFC0521,0xFEFC0521, -0xF4D00001,0xABF81142,0xABF81142,0xEC000080,0xC4001144,0x53FC1142,0x53FC1142,0x53FC1142,0xFEFC0521,0xFEFC0521,0xF4D00001,0xABF81142,0xABF81142,0xEC000080,0xC4001144,0xABF81142,0xABF81142,0xEC000080,0xC4001144,0xC4001144,0xFF740F52,0xFD880FD2,0x18C1142,0xFF5C0DE5,0xFF540B50,0xFF2808B1,0xFF2006AD,0xFED00448,0xFD740F79,0xFF5C0D81,0x95FC1142,0xEC000080, -0x95FC1142,0x1F40032,0xFDF00022,0xFFEC000A,0xFFE80001,0xF1FC0032,0xFFE40011,0xFFE00000,0xF9F80032,0xFFC00000,0xF8000034,0xF1FC0032,0xFFE40011,0xFFE00000,0xF9F80032,0xFFC00000,0xF8000034,0xF9F80032,0xFFC00000,0xF8000034,0xF8000034,0xF1FC0032,0xFFE40011,0xFFE00000,0xF9F80032,0xFFC00000,0xF8000034,0xF9F80032,0xFFC00000,0xF8000034,0xF8000034,0xF9F80032, -0xFFC00000,0xF8000034,0xF8000034,0xF8000034,0xFDF40029,0xB7FC0032,0xF3F40032,0xFFEC0022,0xFFE80019,0xFFDC000D,0xFFCC0000,0xFFB00000,0xFFF00029,0xFFEC0029,0xFFD40001,0xF8000034,0xF7FC0032,0xA42422,0xA42422,0xA42422,0xA42422,0xA42422,0xA42422,0xA42422,0xA42422,0xA42422,0xA42422,0xFE5008B1,0xFE5008B1,0xFE5008B1,0xFE5008B1,0xFE5008B1, -0xFE5008B1,0xD8200001,0xD8200001,0xD8200001,0x961C0001,0x2F42420,0x2F42420,0x2F42420,0x2F42420,0x2F42420,0x2F42420,0xBA000AB5,0xBA000AB5,0xBA000AB5,0x8C000425,0x1F42420,0x1F42420,0x1F42420,0x6C001408,0x52002420,0xFE8C1E6A,0xA42422,0xA42422,0xFE84174D,0xFE781212,0xFE5C0D22,0xFE5C0D22,0xFE400431,0xFE741C52,0xFE641699,0xFE1000D0,0xBA000AB5, -0x1602420,}; -static const uint32_t g_etc1_to_bc7_m6_table249[] = { -0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x440000,0x880000, -0x880000,0x880000,0x880000,0x16000000,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x2C0001,0x300000,0x300000,0x300000,0x440000,0x600000,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x13C0001,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x1D80000, -0x1D80000,0x71F80000,0x71F80000,0x71F80000,0x9E000000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x1D80000,0x71F80000,0x71F80000,0x71F80000,0x9E000000,0x71F80000,0x71F80000,0x71F80000,0x9E000000,0x9E000000,0x1540000,0x13C0001,0x13C0001,0x3700000,0x58C0000,0x1B00000,0x1B00000,0x13FC0000,0x3700000,0x58C0000,0x4BFC0000,0x71F80000, -0x4BFC0000,0x1F80001,0x1F80001,0x1F80001,0x1F80001,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xFDF80000,0xFDF80000,0xFC000000,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xFDF80000,0xFDF80000,0xFC000000,0xFDF80000,0xFDF80000,0xFC000000,0xFC000000,0xFBFC0000,0xFBFC0000,0xFBFC0000,0xFDF80000,0xFDF80000,0xFC000000,0xFDF80000,0xFDF80000,0xFC000000,0xFC000000,0xFDF80000, -0xFDF80000,0xFC000000,0xFC000000,0xFC000000,0xF5FC0000,0xE7FC0000,0x1F80001,0xF9FC0000,0xFBFC0000,0xFDF80000,0xFDF80000,0xFDF80000,0xF7FC0000,0xFBFC0000,0xFDF80000,0xFC000000,0xFDF80000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1588BEC,0xFF447ADF,0xFF2C6D47,0xFF20687B,0xFF186404,0xFF00509F,0xFEF44933,0xFED837A6,0xFECC2E12,0xFCB42426,0xFF0062AC,0xFED049C7,0xFEC43FBB,0xFE942717,0xFE7C18FE,0xFE68098D,0xFE702804,0xFE4C15EB,0xF8240111,0xDE2C1785,0x3FC8BEC,0xFECC72CF,0xFEB86878,0xFE704B2A,0xFE4C3941,0xFC182424,0xFE204349,0xFE082945,0xF4000D79,0xD80020D8,0x85F88BEC, -0xE6006BE4,0xD60045F6,0xC4004D71,0xAC008BEC,0xFF3C7814,0xFF4C849C,0xFF4C888C,0xFF1861D6,0xFED8483B,0xFEAC2E3E,0xFE902345,0xFE6C0FFB,0xFF3476CA,0xFEF45E5F,0xFE7028DB,0xF4000D79,0x63FC8BEC,0x1BC1783,0xFFB014AE,0xFFA0122B,0xFF9C1142,0xFF98106D,0xFF800C3A,0xFF740A69,0xFF6405EB,0xFF4C030A,0xFD3C0005,0x9BFC1783,0xFF7C132D,0xFF6C1142,0xFF380ACD,0xFF140631, -0xFEE00004,0xCFF81783,0xFED41142,0xF8000074,0xDE001784,0x9BFC1783,0xFF7C132D,0xFF6C1142,0xFF380ACD,0xFF140631,0xFEE00004,0xCFF81783,0xFED41142,0xF8000074,0xDE001784,0xCFF81783,0xFED41142,0xF8000074,0xDE001784,0xDE001784,0xFFB41556,0xFFAC1685,0xF7BC1703,0xFF9812DD,0xFF700F7A,0xFF480B1E,0xFF2807B2,0xFEE40559,0xFFA41576,0xFF841292,0xFF301168,0xF8000074, -0xC1FC1783,0x120687B,0x120687B,0x120687B,0x120687B,0xFEF44933,0xFEF44933,0xFEF44933,0xFECC2E12,0xFECC2E12,0xFCB42422,0xFEC43FBB,0xFEC43FBB,0xFEC43FBB,0xFE7C18FE,0xFE7C18FE,0xFE68098D,0xFE4C15EB,0xFE4C15EB,0xF42800D9,0xCC2C1145,0x1B06878,0x1B06878,0x1B06878,0xFE4C3941,0xFE4C3941,0xFC182420,0xFE082945,0xFE082945,0xE8000C8D,0xC6001888,0x5DF46878, -0x5DF46878,0xC4003D82,0xAC003B8D,0x90006878,0xFF045983,0xF71C62B2,0x120687B,0xFEE84AEA,0xFEC4399B,0xFEAC29BA,0xFE902345,0xFE6C0FFB,0xFEF8582E,0xFED0482D,0xFE70284B,0xE8000C8D,0x31FC6878,0x19C1142,0x19C1142,0x19C1142,0x19C1142,0xFF740A69,0xFF740A69,0xFF740A69,0xFF4C030A,0xFF4C030A,0xFD3C0001,0x6BFC1142,0x6BFC1142,0x6BFC1142,0xFF140631,0xFF140631, -0xFCE00001,0xB7F81142,0xB7F81142,0xF8000050,0xCC001144,0x6BFC1142,0x6BFC1142,0x6BFC1142,0xFF140631,0xFF140631,0xFCE00001,0xB7F81142,0xB7F81142,0xF8000050,0xCC001144,0xB7F81142,0xB7F81142,0xF8000050,0xCC001144,0xCC001144,0xFF900F79,0xFF8C101A,0x19C1142,0xFF780E45,0xFF5C0C05,0xFF3809A1,0xFF2807B2,0xFEE40559,0xFF780FCD,0xFF680E10,0xA3FC1142,0xF8000050, -0xA3FC1142,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFF80001,0xFDFC0002,0xFFF80001,0xFFF80000,0xFFF80002,0xFFF00000,0xFC000004,0xFDFC0002,0xFFF80001,0xFFF80000,0xFFF80002,0xFFF00000,0xFC000004,0xFFF80002,0xFFF00000,0xFC000004,0xFC000004,0xFDFC0002,0xFFF80001,0xFFF80000,0xFFF80002,0xFFF00000,0xFC000004,0xFFF80002,0xFFF00000,0xFC000004,0xFC000004,0xFFF80002, -0xFFF00000,0xFC000004,0xFC000004,0xFC000004,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFF80001,0xFFF40001,0xFFF40000,0xFFEC0000,0xFDFC0002,0xFDFC0002,0xFFF40000,0xFC000004,0xFFF80002,0xB42422,0xB42422,0xB42422,0xB42422,0xB42422,0xB42422,0xB42422,0xB42422,0xB42422,0xB42422,0xFE680989,0xFE680989,0xFE680989,0xFE680989,0xFE680989, -0xFE680989,0xE0300001,0xE0300001,0xE0300001,0x9E2C0001,0x30C2420,0x30C2420,0x30C2420,0x30C2420,0x30C2420,0x30C2420,0xCC000949,0xCC000949,0xCC000949,0x980002FD,0xBFC2420,0xBFC2420,0xBFC2420,0x780012C8,0x5A002420,0xFEAC1E85,0xB42422,0xB42422,0xFE9017EA,0xFE8C12CA,0xFE780DFA,0xFE780DFA,0xFE5404ED,0xFE901C99,0xFE7816FA,0xFE200140,0xCC000949, -0x1802420,}; -static const uint32_t g_etc1_to_bc7_m6_table250[] = { -0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0x5C0000,0xB80000, -0xB80000,0xB80000,0xB80000,0x1E000000,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x3C0001,0x8400000,0x8400000,0x8400000,0x5C0000,0x800000,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x14C0001,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x1F00000, -0x1F00000,0x7DF80000,0x7DF80000,0x7DF80000,0xA6000000,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x1F00000,0x7DF80000,0x7DF80000,0x7DF80000,0xA6000000,0x7DF80000,0x7DF80000,0x7DF80000,0xA6000000,0xA6000000,0x1640000,0x14C0001,0x14C0001,0x1840000,0x5A00000,0x3C40000,0x3C40000,0x27FC0000,0x1840000,0x5A00000,0x5BFC0000,0x7DF80000, -0x5BFC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1608334,0xFF4473EF,0xFF3867CB,0xFF2C6383,0xFF245F04,0xFF0C4D93,0xFF0046EB,0xFEE4366A,0xFED82DC2,0xFEC42422,0xFF0C5CE4,0xFEE8465F,0xFED03D53,0xFEAC2617,0xFE941956,0xFE740A69,0xFE7C2534,0xFE5814AB,0xFA3C009A,0xE03C142D,0x13FC8330,0xFEE46CB7,0xFEC86380,0xFE88483A,0xFE643831,0xFE2C2422,0xFE2C3F39,0xFE082639,0xF4000A2D,0xDE001B24,0x8BFC8330, -0xEC006584,0xDC003FEA,0xCA004541,0xB0008330,0xFF3C711C,0xFF4C7D04,0xF9608037,0xFF185C82,0xFEEC452B,0xFEBC2D3F,0xFEA82336,0xFE7410B2,0xFF346F66,0xFF00597C,0xFE7C2771,0xF4000A2D,0x6DFC8330,0x1C4142B,0xFFB411F3,0xFFAC0FDB,0xFFA40F22,0xFFA40E41,0xFF8C0ACE,0xFF800955,0xFF700543,0xFF6402EA,0xFF4C0001,0xA9FC142B,0xFF8810A9,0xFF7C0F20,0xFF4C09A2,0xFF2C05A5, -0xFEF80000,0xD5F8142B,0xFEF00F20,0xFE000010,0xE000142C,0xA9FC142B,0xFF8810A9,0xFF7C0F20,0xFF4C09A2,0xFF2C05A5,0xFEF80000,0xD5F8142B,0xFEF00F20,0xFE000010,0xE000142C,0xD5F8142B,0xFEF00F20,0xFE000010,0xE000142C,0xE000142C,0xFFB41242,0xF9C01343,0xF9C013BB,0xFF98106D,0xFF840D72,0xFF6409D1,0xFF500709,0xFEFC04FD,0xFFB41262,0xFF980FFE,0xFF440F44,0xFE000010, -0xC9FC142B,0x12C6383,0x12C6383,0x12C6383,0x12C6383,0xFF0046EB,0xFF0046EB,0xFF0046EB,0xFED82DC2,0xFED82DC2,0xFEC42422,0xFED03D53,0xFED03D53,0xFED03D53,0xFE941956,0xFE941956,0xFE740A69,0xFE5814AB,0xFE5814AB,0xF838007E,0xD23C0F21,0x1C06380,0x1C06380,0x1C06380,0xFE643831,0xFE643831,0xFE2C2422,0xFE082639,0xFE082639,0xEE0009C5,0xCC001478,0x65F86380, -0x65F86380,0xCA003962,0xB2003611,0x96006380,0xFF1055F5,0xFD285DF2,0x12C6383,0xFEF8483A,0xFED437FA,0xFEBC28FE,0xFEA82336,0xFE7410B2,0xFEF8548A,0xFEDC4611,0xFE7C26E1,0xEE0009C5,0x3DF86380,0x1A40F22,0x1A40F22,0x1A40F22,0x1A40F22,0xFF800955,0xFF800955,0xFF800955,0xFF6402EA,0xFF6402EA,0xFF4C0001,0x7BFC0F20,0x7BFC0F20,0x7BFC0F20,0xFF2C05A5,0xFF2C05A5, -0xFEF80000,0xBFF80F20,0xBFF80F20,0xFC00000D,0xD2000F20,0x7BFC0F20,0x7BFC0F20,0x7BFC0F20,0xFF2C05A5,0xFF2C05A5,0xFEF80000,0xBFF80F20,0xBFF80F20,0xFC00000D,0xD2000F20,0xBFF80F20,0xBFF80F20,0xFC00000D,0xD2000F20,0xD2000F20,0xFF900DB1,0xFBA40E1D,0x1A40F22,0xFF880C92,0xFF700AA5,0xFF5808CA,0xFF500709,0xFEFC04FD,0xFF940DC8,0xFF840C82,0xADFC0F20,0xFC00000D, -0xADFC0F20,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0xC42422,0xC42422,0xC42422,0xC42422,0xC42422,0xC42422,0xC42422,0xC42422,0xC42422,0xC42422,0xFE740A69,0xFE740A69,0xFE740A69,0xFE740A69,0xFE740A69, -0xFE740A69,0xE8400001,0xE8400001,0xE8400001,0xA63C0001,0x3242420,0x3242420,0x3242420,0x3242420,0x3242420,0x3242420,0xD80007F9,0xD80007F9,0xD80007F9,0xA20001F4,0x17FC2420,0x17FC2420,0x17FC2420,0x7E0011A4,0x62002420,0xFEAC1EF5,0xC42422,0xC42422,0xFEA01865,0xFE941379,0xFE880EBA,0xFE880EBA,0xFE6C05D2,0xFEA01D14,0xFE9417A4,0xFE3401BA,0xD80007F9, -0x1A42420,}; -static const uint32_t g_etc1_to_bc7_m6_table251[] = { -0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0x740000,0xE80000, -0xE80000,0xE80000,0xE80000,0x26000000,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x4C0001,0x540000,0x540000,0x540000,0x740000,0xA40000,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0x15C0001,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000, -0xDFC0000,0x89F80000,0x89F80000,0x89F80000,0xAE000000,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0xDFC0000,0x89F80000,0x89F80000,0x89F80000,0xAE000000,0x89F80000,0x89F80000,0x89F80000,0xAE000000,0xAE000000,0x3740000,0x15C0001,0x15C0001,0x1980000,0x5B40000,0x1DC0000,0x1DC0000,0x3BFC0000,0x1980000,0x5B40000,0x69FC0000,0x89F80000, -0x69FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x16879B4,0xFF506BE7,0xFF406114,0xFF385D2B,0xFF305924,0xFF18496F,0xFF0C4373,0xFEF03492,0xFEE42CC6,0xFED42422,0xFF185634,0xFEF441BB,0xFEDC3993,0xFEB8246F,0xFEA018E6,0xFE8C0B51,0xFE9421B4,0xFE701283,0xFC4C003A,0xE44C10AD,0x1FFC79B0,0xFEF0656B,0xFED85D2B,0xFEA0448A,0xFE703629,0xFE442420,0xFE383A6D,0xFE0822E9,0xFA00077D,0xE2001579,0x91FC79B0, -0xF2005E40,0xE0003A48,0xD0003CE5,0xB40079B0,0xFF4C694B,0xFB647402,0xFD6876DF,0xFF2C56B2,0xFF004133,0xFED42B7A,0xFEBC21F6,0xFE84109D,0xFF3467C6,0xFF1053A2,0xFE94250F,0xFA00077D,0x75FC79B0,0x1C810AB,0xFFBC0EC6,0xFFB40D22,0xFFAC0C82,0xFFA40BD1,0xFF9808EE,0xFF8C07B5,0xFF7C0453,0xFF700262,0xFF5C0001,0xAFFC10AB,0xFF940DC1,0xFF880C80,0xFF5807F2,0xFF3804A9, -0xFF100000,0xD7FC10AB,0xFF080C80,0xFE140000,0xE40010AC,0xAFFC10AB,0xFF940DC1,0xFF880C80,0xFF5807F2,0xFF3804A9,0xFF100000,0xD7FC10AB,0xFF080C80,0xFE140000,0xE40010AC,0xD7FC10AB,0xFF080C80,0xFE140000,0xE40010AC,0xE40010AC,0xFFBC0F15,0xFBC40FDB,0xFDC81043,0xFFB00D9E,0xFF980B2E,0xFF6C0809,0xFF5005C9,0xFF20040D,0xFFB40F22,0xFF980D4E,0xFF540C99,0xFE140000, -0xCDFC10AB,0x1385D2B,0x1385D2B,0x1385D2B,0x1385D2B,0xFF0C4373,0xFF0C4373,0xFF0C4373,0xFEE42CC6,0xFEE42CC6,0xFED42422,0xFEDC3993,0xFEDC3993,0xFEDC3993,0xFEA018E6,0xFEA018E6,0xFE8C0B51,0xFE701283,0xFE701283,0xFA480032,0xD64C0C81,0x1D05D2B,0x1D05D2B,0x1D05D2B,0xFE703629,0xFE703629,0xFE442420,0xFE0822E9,0xFE0822E9,0xF4000759,0xD2001024,0x6DF85D2B, -0x6DF85D2B,0xD00034E6,0xB8002FC1,0x9C005D2C,0xFF205092,0xFF2C5816,0x1385D2B,0xFF004403,0xFEEC354B,0xFEBC276E,0xFEBC21F6,0xFE84109D,0xFF104F65,0xFEF441EE,0xFE942496,0xF4000759,0x45FC5D2B,0x1AC0C82,0x1AC0C82,0x1AC0C82,0x1AC0C82,0xFF8C07B5,0xFF8C07B5,0xFF8C07B5,0xFF700262,0xFF700262,0xFF5C0001,0x87FC0C80,0x87FC0C80,0x87FC0C80,0xFF3804A9,0xFF3804A9, -0xFF100000,0xC5F80C80,0xC5F80C80,0xFE140000,0xD6000C80,0x87FC0C80,0x87FC0C80,0x87FC0C80,0xFF3804A9,0xFF3804A9,0xFF100000,0xC5F80C80,0xC5F80C80,0xFE140000,0xD6000C80,0xC5F80C80,0xC5F80C80,0xFE140000,0xD6000C80,0xD6000C80,0xFFA00B50,0xFFAC0B95,0x1AC0C82,0xFF980A69,0xFF8408D1,0xFF640721,0xFF5005C9,0xFF20040D,0xFF940B68,0xFF840A52,0xB5FC0C80,0xFE140000, -0xB5FC0C80,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0xD42422,0xD42422,0xD42422,0xD42422,0xD42422,0xD42422,0xD42422,0xD42422,0xD42422,0xD42422,0xFE8C0B51,0xFE8C0B51,0xFE8C0B51,0xFE8C0B51,0xFE8C0B51, -0xFE8C0B51,0xF0500001,0xF0500001,0xF0500001,0xAE4C0001,0x33C2420,0x33C2420,0x33C2420,0x33C2420,0x33C2420,0x33C2420,0xE800069D,0xE800069D,0xE800069D,0xA8000124,0x23FC2420,0x23FC2420,0x23FC2420,0x8A001074,0x6A002420,0xFECC1F02,0xD42422,0xD42422,0xFEBC18F5,0xFEA81429,0xFE980F82,0xFE980F82,0xFE8006B2,0xFAB41D8D,0xFEA01829,0xFE50027D,0xE800069D, -0x1C82420,}; -static const uint32_t g_etc1_to_bc7_m6_table252[] = { -0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x28C0000,0x1200000, -0x1200000,0x1200000,0x1200000,0x2E000001,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0x600000,0xA640000,0xA640000,0xA640000,0x28C0000,0xCC0000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x1700000,0x29FC0000,0x29FC0000,0x29FC0000,0x29FC0000,0x29FC0000, -0x29FC0000,0x97F80000,0x97F80000,0x97F80000,0xB6000001,0x29FC0000,0x29FC0000,0x29FC0000,0x29FC0000,0x29FC0000,0x29FC0000,0x97F80000,0x97F80000,0x97F80000,0xB6000001,0x97F80000,0x97F80000,0x97F80000,0xB6000001,0xB6000001,0x1880000,0x1700000,0x1700000,0x1AC0000,0x3CC0000,0x1F40000,0x1F40000,0x51FC0000,0x1AC0000,0x3CC0000,0x7BFC0000,0x97F80000, -0x7BFC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1746F9A,0xFF5C636D,0xFF4C59E2,0xFF445671,0xFF4452B0,0xFF244509,0xFF183FC9,0xFF0832A0,0xFEFC2BCC,0xFEE82420,0xFF244F2A,0xFF003CE1,0xFEF43599,0xFECC22EC,0xFEB8187A,0xFEA40C6D,0xFEA01E26,0xFE88107D,0xFE5C0008,0xE6600D21,0x2BFC6F9A,0xFEFC5DB5,0xFEE85671,0xFEAC40A4,0xFE8833FB,0xFE602420,0xFE5835B7,0xFE142035,0xFA000585,0xE2000FFD,0x97FC6F9A, -0xF80056D6,0xE6003482,0xD0003409,0xB8006F9A,0xFF5860F6,0xFF6C6A48,0xFF6C6D09,0xFF2C503A,0xFF0C3CF6,0xFEDC2926,0xFECC20F9,0xFEA01085,0xFF505FB2,0xFF184DB4,0xFEB0221D,0xFA000585,0x7DF86F9A,0x1D00D22,0xFFC40B96,0xFFB80A52,0xFFB809D9,0xFFB00948,0xFFA40709,0xFF980614,0xFF880362,0xFF7C01DD,0xFF700000,0xB7FC0D21,0xFFA00AD2,0xFF9409D9,0xFF700633,0xFF5803BA, -0xFF2C0000,0xDDF40D21,0xFF2009D9,0xFE4C0000,0xE6000D21,0xB7FC0D21,0xFFA00AD2,0xFF9409D9,0xFF700633,0xFF5803BA,0xFF2C0000,0xDDF40D21,0xFF2009D9,0xFE4C0000,0xE6000D21,0xDDF40D21,0xFF2009D9,0xFE4C0000,0xE6000D21,0xE6000D21,0xFFBC0BF4,0xFFCC0C66,0xFFCC0CC6,0xFFB40AAE,0xFF9808C1,0xFF7C063E,0xFF680484,0xFF380334,0xFFC40C0E,0xFFA40A8D,0xFF6409F2,0xFE4C0000, -0xD3FC0D21,0x1445671,0x1445671,0x1445671,0x1445671,0xFF183FC9,0xFF183FC9,0xFF183FC9,0xFEFC2BCC,0xFEFC2BCC,0xFEE82420,0xFEF43599,0xFEF43599,0xFEF43599,0xFEB8187A,0xFEB8187A,0xFEA40C6D,0xFE88107D,0xFE88107D,0xFC600008,0xDA6009D9,0x1E05671,0x1E05671,0x1E05671,0xFE8833FB,0xFE8833FB,0xFE602420,0xFE142035,0xFE142035,0xFA000575,0xD8000BF6,0x75FC5671, -0x75FC5671,0xD6003080,0xBE002931,0xA2005672,0xFF2C4B21,0xF9405231,0x1445671,0xFF103F92,0xFEEC3269,0xFEDC25DD,0xFECC20F9,0xFEA01085,0xFF2049F1,0xFF003DFD,0xFEB021B9,0xFA000575,0x51FC5671,0x1B809D9,0x1B809D9,0x1B809D9,0x1B809D9,0xFF980614,0xFF980614,0xFF980614,0xFF7C01DD,0xFF7C01DD,0xFF700000,0x93FC09D9,0x93FC09D9,0x93FC09D9,0xFF5803BA,0xFF5803BA, -0xFF2C0000,0xCBF809D9,0xCBF809D9,0xFE4C0000,0xDA0009D9,0x93FC09D9,0x93FC09D9,0x93FC09D9,0xFF5803BA,0xFF5803BA,0xFF2C0000,0xCBF809D9,0xCBF809D9,0xFE4C0000,0xDA0009D9,0xCBF809D9,0xCBF809D9,0xFE4C0000,0xDA0009D9,0xDA0009D9,0xF9B00908,0xFFAC0928,0x1B809D9,0xFF980832,0xFF8406F4,0xFF740590,0xFF680484,0xFF380334,0xFFA80908,0xFF980808,0xBDF809D9,0xFE4C0000, -0xBDF809D9,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0xE82420,0xE82420,0xE82420,0xE82420,0xE82420,0xE82420,0xE82420,0xE82420,0xE82420,0xE82420,0xFEA40C6D,0xFEA40C6D,0xFEA40C6D,0xFEA40C6D,0xFEA40C6D, -0xFEA40C6D,0xFA600000,0xFA600000,0xFA600000,0xB6600001,0x1582420,0x1582420,0x1582420,0x1582420,0x1582420,0x1582420,0xFA000565,0xFA000565,0xFA000565,0xB4000082,0x31F82420,0x31F82420,0x31F82420,0x96000F3A,0x72002422,0xF8E01F81,0xE82420,0xE82420,0xFECC1974,0xFEB414F4,0xFEAC109D,0xFEAC109D,0xFE9407B4,0xFEBC1DC9,0xFEB418A0,0xFE60034D,0xFA000565, -0x1EC2420,}; -static const uint32_t g_etc1_to_bc7_m6_table253[] = { -0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x2A40000,0x1500000, -0x1500000,0x1500000,0x1500000,0x36000001,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x700000,0x780000,0x780000,0x780000,0x2A40000,0xEC0000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x1800000,0x41FC0000,0x41FC0000,0x41FC0000,0x41FC0000,0x41FC0000, -0x41FC0000,0xA1FC0000,0xA1FC0000,0xA1FC0000,0xBE000001,0x41FC0000,0x41FC0000,0x41FC0000,0x41FC0000,0x41FC0000,0x41FC0000,0xA1FC0000,0xA1FC0000,0xA1FC0000,0xBE000001,0xA1FC0000,0xA1FC0000,0xA1FC0000,0xBE000001,0xBE000001,0x5980000,0x1800000,0x1800000,0x7BC0000,0x3E00000,0x13FC0000,0x13FC0000,0x65FC0000,0x7BC0000,0x3E00000,0x89FC0000,0xA1FC0000, -0x89FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x17C672A,0xFF685C5D,0xFF5853E2,0xFF5050D1,0xFF444D70,0xFF30415D,0xFF243CB9,0xFF1430F0,0xFF082AE0,0xFEF82420,0xFF304956,0xFF0C38E9,0xFF00325D,0xFED821A0,0xFECC1848,0xFEB00D61,0xFEAC1B66,0xFE940EED,0xFE700004,0xE8700A59,0x37FC672A,0xFF145745,0xFEFC50D1,0xFEB83D84,0xFEA03223,0xFE782420,0xFE703187,0xFE2C1E1D,0xFC0004C5,0xE8000BE9,0x9DFC672A, -0xFC0050E5,0xE6003022,0xD6002CD1,0xBC00672A,0xFF645A02,0xFF6C6288,0xF578652A,0xFF444AE6,0xFF143961,0xFEF0276A,0xFEDC2036,0xFEB010A0,0xFF5058A2,0xFF304877,0xFEB41FE1,0xFC0004C5,0x83FC672A,0x1D40A56,0xFFC8092D,0xFFC00825,0xFFC007C1,0xFFBC074C,0xFFB0058D,0xFFA404C8,0xFF9402A6,0xFF880179,0xFF800000,0xC1FC0A56,0xFFAC0882,0xFFA007C1,0xFF7C04DB,0xFF6402EA, -0xFF400001,0xE1F80A56,0xFF3807C1,0xFE7C0000,0xE8000A59,0xC1FC0A56,0xFFAC0882,0xFFA007C1,0xFF7C04DB,0xFF6402EA,0xFF400001,0xE1F80A56,0xFF3807C1,0xFE7C0000,0xE8000A59,0xE1F80A56,0xFF3807C1,0xFE7C0000,0xE8000A59,0xE8000A59,0xFDCC0965,0xFFCC09C6,0xFFCC0A26,0xFFC00865,0xFFAC06E5,0xFF7C050E,0xFF7C0385,0xFF50028A,0xFFC4096E,0xFFB0085D,0xFF7407D1,0xFE7C0000, -0xD9FC0A56,0x15050D1,0x15050D1,0x15050D1,0x15050D1,0xFF243CB9,0xFF243CB9,0xFF243CB9,0xFF082AE0,0xFF082AE0,0xFEF82420,0xFF00325D,0xFF00325D,0xFF00325D,0xFECC1848,0xFECC1848,0xFEB00D61,0xFE940EED,0xFE940EED,0xFE700004,0xDE7007C1,0x1F450D1,0x1F450D1,0x1F450D1,0xFEA03223,0xFEA03223,0xFE782420,0xFE2C1E1D,0xFE2C1E1D,0xFC0004C1,0xDE0008E2,0x7FF850D1, -0x7FF850D1,0xDC002D24,0xC40023CD,0xA60050D2,0xFF3C469D,0xFF4C4CD1,0x15050D1,0xFF183C30,0xFF002FDD,0xFEE82482,0xFEDC2036,0xFEB010A0,0xFF344580,0xFF103A91,0xFEB41F7D,0xFC0004C1,0x5DF850D1,0x1C007C1,0x1C007C1,0x1C007C1,0x1C007C1,0xFFA404C8,0xFFA404C8,0xFFA404C8,0xFF880179,0xFF880179,0xFF800000,0x9FFC07C1,0x9FFC07C1,0x9FFC07C1,0xFF6402EA,0xFF6402EA, -0xFF400001,0xD1F807C1,0xD1F807C1,0xFE7C0000,0xDE0007C1,0x9FFC07C1,0x9FFC07C1,0x9FFC07C1,0xFF6402EA,0xFF6402EA,0xFF400001,0xD1F807C1,0xD1F807C1,0xFE7C0000,0xDE0007C1,0xD1F807C1,0xD1F807C1,0xFE7C0000,0xDE0007C1,0xDE0007C1,0xFDB80708,0xF7BC0745,0x1C007C1,0xFFA40681,0xFF980568,0xFF7C0465,0xFF7C0385,0xFF50028A,0xFDB0070A,0xFFA4065D,0xC3FC07C1,0xFE7C0000, -0xC3FC07C1,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0xF82420,0xF82420,0xF82420,0xF82420,0xF82420,0xF82420,0xF82420,0xF82420,0xF82420,0xF82420,0xFEB00D61,0xFEB00D61,0xFEB00D61,0xFEB00D61,0xFEB00D61, -0xFEB00D61,0xFE700004,0xFE700004,0xFE700004,0xBE700001,0x1702420,0x1702420,0x1702420,0x1702420,0x1702420,0x1702420,0xFA0004B5,0xFA0004B5,0xFA0004B5,0xBC00002D,0x3DF82420,0x3DF82420,0x3DF82420,0x9C000E2A,0x7A002422,0xFEEC1F85,0xF82420,0xF82420,0xFED81A11,0xFED015B1,0xFEC41174,0xFEC41174,0xFE9C08C8,0xFED81E0A,0xFED01962,0xFE74040D,0xFA0004B5, -0x9FC2420,}; -static const uint32_t g_etc1_to_bc7_m6_table254[] = { -0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x2BC0000,0x1800000, -0x1800000,0x1800000,0x1800000,0x3E000001,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x800000,0x880000,0x880000,0x880000,0x2BC0000,0x1100000,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000,0x1900000,0x59FC0000,0x59FC0000,0x59FC0000,0x59FC0000,0x59FC0000, -0x59FC0000,0xADFC0000,0xADFC0000,0xADFC0000,0xC6000001,0x59FC0000,0x59FC0000,0x59FC0000,0x59FC0000,0x59FC0000,0x59FC0000,0xADFC0000,0xADFC0000,0xADFC0000,0xC6000001,0xADFC0000,0xADFC0000,0xADFC0000,0xC6000001,0xC6000001,0xDA80000,0x1900000,0x1900000,0x1D00000,0x3F40000,0x31FC0000,0x31FC0000,0x79FC0000,0x1D00000,0x3F40000,0x99FC0000,0xADFC0000, -0x99FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x1845F3A,0xFF7455C5,0xFF644E42,0xFF584B89,0xFF504888,0xFF3C3DE9,0xFF3039D9,0xFF202F60,0xFF142A0C,0xFF082420,0xFF4443BD,0xFF183541,0xFF0C2F69,0xFEF02070,0xFED81820,0xFEC40E48,0xFEB81906,0xFEA00DB5,0xFE880034,0xEC8007E1,0x43FC5F3A,0xFF205129,0xFF084B89,0xFED83AA0,0xFEB8306B,0xFE902420,0xFE882DB7,0xFE381C71,0xFE0804BD,0xEA000892,0xA3FC5F3A, -0xFE084B86,0xEC002C5A,0xDC002651,0xC0005F3A,0xFF645372,0xF9805B72,0xF9805D5A,0xFF4445D6,0xFF20362E,0xFEFC25F8,0xFEE81F71,0xFEC010D5,0xFF58526E,0xFF3043C7,0xFEBC1E2D,0xFE0804BD,0x8BFC5F3A,0x1D807E2,0xFFD006F2,0xFFCC0635,0xFFC805E9,0xFFC40576,0xFFBC0441,0xFFB003A4,0xFFA00202,0xFFA00121,0xFF900000,0xC9FC07E1,0xFFB8067A,0xFFAC05E9,0xFF9403AB,0xFF7C0232, -0xFF580001,0xE5F807E1,0xFF5405E9,0xFEAC0000,0xEC0007E1,0xC9FC07E1,0xFFB8067A,0xFFAC05E9,0xFF9403AB,0xFF7C0232,0xFF580001,0xE5F807E1,0xFF5405E9,0xFEAC0000,0xEC0007E1,0xE5F807E1,0xFF5405E9,0xFEAC0000,0xEC0007E1,0xEC0007E1,0xFFD00715,0xF5D80798,0xF5D807BD,0xFFC4065E,0xFFAC0545,0xFFA003D8,0xFF8C02BA,0xFF6401E1,0xFFD0072E,0xFFC0063D,0xFF9005F6,0xFEAC0000, -0xDFF807E1,0x1584B89,0x1584B89,0x1584B89,0x1584B89,0xFF3039D9,0xFF3039D9,0xFF3039D9,0xFF142A0C,0xFF142A0C,0xFF082420,0xFF0C2F69,0xFF0C2F69,0xFF0C2F69,0xFED81820,0xFED81820,0xFEC40E48,0xFEA00DB5,0xFEA00DB5,0xFE880034,0xE28005E9,0x7FC4B86,0x7FC4B86,0x7FC4B86,0xFEB8306B,0xFEB8306B,0xFE902420,0xFE381C71,0xFE381C71,0xFE0804BD,0xE200064D,0x85FC4B86, -0x85FC4B86,0xE0002A76,0xCA001ED9,0xAC004B86,0xFF4C4244,0xFF4C4811,0x1584B89,0xFF2C38D6,0xFF142DA1,0xFEF8232A,0xFEE81F71,0xFEC010D5,0xFF344100,0xFF18376B,0xFEBC1DC9,0xFE0804BD,0x65FC4B86,0x1C805E9,0x1C805E9,0x1C805E9,0x1C805E9,0xFFB003A4,0xFFB003A4,0xFFB003A4,0xFFA00121,0xFFA00121,0xFF900000,0xABFC05E9,0xABFC05E9,0xABFC05E9,0xFF7C0232,0xFF7C0232, -0xFF580001,0xD7F805E9,0xD7F805E9,0xFEAC0000,0xE20005E9,0xABFC05E9,0xABFC05E9,0xABFC05E9,0xFF7C0232,0xFF7C0232,0xFF580001,0xD7F805E9,0xD7F805E9,0xFEAC0000,0xE20005E9,0xD7F805E9,0xD7F805E9,0xFEAC0000,0xE20005E9,0xE20005E9,0xFFBC0550,0xFBC4057D,0x1C805E9,0xFFB404EA,0xFFAC0424,0xFF940371,0xFF8C02BA,0xFF6401E1,0xFFB4055A,0xFFB004E4,0xCBFC05E9,0xFEAC0000, -0xCBFC05E9,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1082420,0x1082420,0x1082420,0x1082420,0x1082420,0x1082420,0x1082420,0x1082420,0x1082420,0x1082420,0xFEC40E48,0xFEC40E48,0xFEC40E48,0xFEC40E48,0xFEC40E48, -0xFEC40E48,0xFE880034,0xFE880034,0xFE880034,0xC6800001,0x1882420,0x1882420,0x1882420,0x1882420,0x1882420,0x1882420,0xFE0804BD,0xFE0804BD,0xFE0804BD,0xC6000002,0x49F82420,0x49F82420,0x49F82420,0xA6000D41,0x82002422,0xF9002000,0x1082420,0x1082420,0xFEE81A90,0xFEE41675,0xFED41248,0xFED41248,0xFEAC09CD,0xFAEC1E85,0xFED019E2,0xFE90052A,0xFE0804BD, -0x19FC2420,}; -static const uint32_t g_etc1_to_bc7_m6_table255[] = { -0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0xD40000,0x1B00000, -0x1B00000,0x1B00000,0x1B00000,0x46000001,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x900000,0x4980000,0x4980000,0x4980000,0xD40000,0x1300000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x1A00000,0x71FC0000,0x71FC0000,0x71FC0000,0x71FC0000,0x71FC0000, -0x71FC0000,0xB9FC0000,0xB9FC0000,0xB9FC0000,0xCE000001,0x71FC0000,0x71FC0000,0x71FC0000,0x71FC0000,0x71FC0000,0x71FC0000,0xB9FC0000,0xB9FC0000,0xB9FC0000,0xCE000001,0xB9FC0000,0xB9FC0000,0xB9FC0000,0xCE000001,0xCE000001,0x1BC0000,0x1A00000,0x1A00000,0x1E40000,0x1BFC0000,0x4FFC0000,0x4FFC0000,0x8DFC0000,0x1E40000,0x1BFC0000,0xA7FC0000,0xB9FC0000, -0xA7FC0000,0x1FC0001,0x1FC0001,0x1FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFBFC0001,0xF7FC0001,0x1FC0001,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0x1FC0001,0xFDFC0001,0xFFFC0000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000, -0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xFFFC0000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0001,0xF7FC0001,0xF7FC0001,0xFDFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0001,0xFDFC0001,0xFFF00000,0xFE000000, -0xFFFC0000,0x18C57CA,0xFF744F85,0xFF6C48F9,0xFF644691,0xFF5C43F0,0xFF443AAD,0xFF4436EC,0xFF2C2DF0,0xFF202950,0xFF182420,0xFF443E8D,0xFF2431E9,0xFF182CBD,0xFEFC1F6C,0xFEEC1816,0xFED00F64,0xFED81711,0xFEB80CAD,0xFE940094,0xEE9005C1,0x4FFC57CA,0xFF2C4B75,0xFF184691,0xFEF037C0,0xFECC2F04,0xFEA82420,0xFEA02A47,0xFE641B02,0xFE140585,0xEE0005E5,0xA9FC57CA, -0xFE284691,0xF2002962,0xDC002041,0xC40057CA,0xFF744D72,0xFD885442,0xFD88560A,0xFF5440F0,0xFF343326,0xFF1024C0,0xFEF81EEA,0xFED81141,0xFF704C95,0xFF503F25,0xFED81CB7,0xFE140585,0x93FC57CA,0x1E005C2,0xFFDC051E,0xFFD4048E,0xFFD00451,0xFFD00402,0xFFC40311,0xFFBC02A8,0xFFAC0176,0xFFAC00CD,0xFFA00000,0xCFFC05C1,0xFFC004C1,0xFFB80451,0xFFA002A3,0xFF88019A, -0xFF700001,0xE7FC05C1,0xFF6C0451,0xFEE00000,0xEE0005C1,0xCFFC05C1,0xFFC004C1,0xFFB80451,0xFFA002A3,0xFF88019A,0xFF700001,0xE7FC05C1,0xFF6C0451,0xFEE00000,0xEE0005C1,0xE7FC05C1,0xFF6C0451,0xFEE00000,0xEE0005C1,0xEE0005C1,0xFFD8052A,0xF7DC0584,0xF7DC05A5,0xFFD404BA,0xFFC003D1,0xFFA802C2,0xFFA001F9,0xFF7C0161,0xFFD0053E,0xFFC80479,0xFF98045A,0xFEE00000, -0xE1FC05C1,0x1644691,0x1644691,0x1644691,0x1644691,0xFF4436EC,0xFF4436EC,0xFF4436EC,0xFF202950,0xFF202950,0xFF182420,0xFF182CBD,0xFF182CBD,0xFF182CBD,0xFEEC1816,0xFEEC1816,0xFED00F64,0xFEB80CAD,0xFEB80CAD,0xFE940094,0xE6900451,0x15FC4691,0x15FC4691,0x15FC4691,0xFECC2F04,0xFECC2F04,0xFEA82420,0xFE641B02,0xFE641B02,0xFE140585,0xE6000461,0x8DFC4691, -0x8DFC4691,0xE6002822,0xD0001A55,0xB2004692,0xFF4C3E24,0xF96043A4,0x1644691,0xFF3C35A5,0xFF202BD5,0xFF08226D,0xFEF81EEA,0xFED81141,0xFF3C3D49,0xFF303446,0xFED81C66,0xFE140585,0x6FFC4691,0x1D00451,0x1D00451,0x1D00451,0x1D00451,0xFFBC02A8,0xFFBC02A8,0xFFBC02A8,0xFFAC00CD,0xFFAC00CD,0xFFA00000,0xB7FC0451,0xB7FC0451,0xB7FC0451,0xFF88019A,0xFF88019A, -0xFF700001,0xDDF40451,0xDDF40451,0xFEE00000,0xE6000451,0xB7FC0451,0xB7FC0451,0xB7FC0451,0xFF88019A,0xFF88019A,0xFF700001,0xDDF40451,0xDDF40451,0xFEE00000,0xE6000451,0xDDF40451,0xDDF40451,0xFEE00000,0xE6000451,0xE6000451,0xF9CC03F5,0xFFCC03F5,0x1D00451,0xFFC4039D,0xFFAC0304,0xFFA80271,0xFFA001F9,0xFF7C0161,0xFBC803F5,0xFBC0039D,0xD3FC0451,0xFEE00000, -0xD3FC0451,0x1FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFDFC0002,0xFFFC0001,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFE000000,0xFFFC0000,0xFFFC0000,0xFE000000,0xFE000000,0xFFFC0000, -0xFFFC0000,0xFE000000,0xFE000000,0xFE000000,0xFDFC0002,0xF7FC0002,0xF7FC0002,0xFDFC0002,0xFFFC0001,0xFFFC0000,0xFFFC0000,0xFFFC0000,0xFDFC0002,0xFDFC0002,0xFFFC0000,0xFE000000,0xFFFC0000,0x1182420,0x1182420,0x1182420,0x1182420,0x1182420,0x1182420,0x1182420,0x1182420,0x1182420,0x1182420,0xFED00F64,0xFED00F64,0xFED00F64,0xFED00F64,0xFED00F64, -0xFED00F64,0xFE940094,0xFE940094,0xFE940094,0xCE900001,0x1A02420,0x1A02420,0x1A02420,0x1A02420,0x1A02420,0x1A02420,0xFE140585,0xFE140585,0xFE140585,0xCE0C0001,0x55F82420,0x55F82420,0x55F82420,0xB2000C49,0x8A002422,0xFF0C2008,0x1182420,0x1182420,0xFF041B34,0xFEF81741,0xFEDC133D,0xFEDC133D,0xFEC00AE1,0xFEF41EAD,0xFEE41AAA,0xFE9C0631,0xFE140585, -0x27FC2420,}; -const uint32_t *g_etc1_to_bc7_m6_table[] = { -g_etc1_to_bc7_m6_table0, g_etc1_to_bc7_m6_table1, g_etc1_to_bc7_m6_table2, g_etc1_to_bc7_m6_table3, g_etc1_to_bc7_m6_table4, g_etc1_to_bc7_m6_table5, g_etc1_to_bc7_m6_table6, g_etc1_to_bc7_m6_table7, g_etc1_to_bc7_m6_table8, g_etc1_to_bc7_m6_table9, g_etc1_to_bc7_m6_table10, g_etc1_to_bc7_m6_table11, g_etc1_to_bc7_m6_table12, g_etc1_to_bc7_m6_table13, g_etc1_to_bc7_m6_table14, g_etc1_to_bc7_m6_table15, -g_etc1_to_bc7_m6_table16, g_etc1_to_bc7_m6_table17, g_etc1_to_bc7_m6_table18, g_etc1_to_bc7_m6_table19, g_etc1_to_bc7_m6_table20, g_etc1_to_bc7_m6_table21, g_etc1_to_bc7_m6_table22, g_etc1_to_bc7_m6_table23, g_etc1_to_bc7_m6_table24, g_etc1_to_bc7_m6_table25, g_etc1_to_bc7_m6_table26, g_etc1_to_bc7_m6_table27, g_etc1_to_bc7_m6_table28, g_etc1_to_bc7_m6_table29, g_etc1_to_bc7_m6_table30, g_etc1_to_bc7_m6_table31, -g_etc1_to_bc7_m6_table32, g_etc1_to_bc7_m6_table33, g_etc1_to_bc7_m6_table34, g_etc1_to_bc7_m6_table35, g_etc1_to_bc7_m6_table36, g_etc1_to_bc7_m6_table37, g_etc1_to_bc7_m6_table38, g_etc1_to_bc7_m6_table39, g_etc1_to_bc7_m6_table40, g_etc1_to_bc7_m6_table41, g_etc1_to_bc7_m6_table42, g_etc1_to_bc7_m6_table43, g_etc1_to_bc7_m6_table44, g_etc1_to_bc7_m6_table45, g_etc1_to_bc7_m6_table46, g_etc1_to_bc7_m6_table47, -g_etc1_to_bc7_m6_table48, g_etc1_to_bc7_m6_table49, g_etc1_to_bc7_m6_table50, g_etc1_to_bc7_m6_table51, g_etc1_to_bc7_m6_table52, g_etc1_to_bc7_m6_table53, g_etc1_to_bc7_m6_table54, g_etc1_to_bc7_m6_table55, g_etc1_to_bc7_m6_table56, g_etc1_to_bc7_m6_table57, g_etc1_to_bc7_m6_table58, g_etc1_to_bc7_m6_table59, g_etc1_to_bc7_m6_table60, g_etc1_to_bc7_m6_table61, g_etc1_to_bc7_m6_table62, g_etc1_to_bc7_m6_table63, -g_etc1_to_bc7_m6_table64, g_etc1_to_bc7_m6_table65, g_etc1_to_bc7_m6_table66, g_etc1_to_bc7_m6_table67, g_etc1_to_bc7_m6_table68, g_etc1_to_bc7_m6_table69, g_etc1_to_bc7_m6_table70, g_etc1_to_bc7_m6_table71, g_etc1_to_bc7_m6_table72, g_etc1_to_bc7_m6_table73, g_etc1_to_bc7_m6_table74, g_etc1_to_bc7_m6_table75, g_etc1_to_bc7_m6_table76, g_etc1_to_bc7_m6_table77, g_etc1_to_bc7_m6_table78, g_etc1_to_bc7_m6_table79, -g_etc1_to_bc7_m6_table80, g_etc1_to_bc7_m6_table81, g_etc1_to_bc7_m6_table82, g_etc1_to_bc7_m6_table83, g_etc1_to_bc7_m6_table84, g_etc1_to_bc7_m6_table85, g_etc1_to_bc7_m6_table86, g_etc1_to_bc7_m6_table87, g_etc1_to_bc7_m6_table88, g_etc1_to_bc7_m6_table89, g_etc1_to_bc7_m6_table90, g_etc1_to_bc7_m6_table91, g_etc1_to_bc7_m6_table92, g_etc1_to_bc7_m6_table93, g_etc1_to_bc7_m6_table94, g_etc1_to_bc7_m6_table95, -g_etc1_to_bc7_m6_table96, g_etc1_to_bc7_m6_table97, g_etc1_to_bc7_m6_table98, g_etc1_to_bc7_m6_table99, g_etc1_to_bc7_m6_table100, g_etc1_to_bc7_m6_table101, g_etc1_to_bc7_m6_table102, g_etc1_to_bc7_m6_table103, g_etc1_to_bc7_m6_table104, g_etc1_to_bc7_m6_table105, g_etc1_to_bc7_m6_table106, g_etc1_to_bc7_m6_table107, g_etc1_to_bc7_m6_table108, g_etc1_to_bc7_m6_table109, g_etc1_to_bc7_m6_table110, g_etc1_to_bc7_m6_table111, -g_etc1_to_bc7_m6_table112, g_etc1_to_bc7_m6_table113, g_etc1_to_bc7_m6_table114, g_etc1_to_bc7_m6_table115, g_etc1_to_bc7_m6_table116, g_etc1_to_bc7_m6_table117, g_etc1_to_bc7_m6_table118, g_etc1_to_bc7_m6_table119, g_etc1_to_bc7_m6_table120, g_etc1_to_bc7_m6_table121, g_etc1_to_bc7_m6_table122, g_etc1_to_bc7_m6_table123, g_etc1_to_bc7_m6_table124, g_etc1_to_bc7_m6_table125, g_etc1_to_bc7_m6_table126, g_etc1_to_bc7_m6_table127, -g_etc1_to_bc7_m6_table128, g_etc1_to_bc7_m6_table129, g_etc1_to_bc7_m6_table130, g_etc1_to_bc7_m6_table131, g_etc1_to_bc7_m6_table132, g_etc1_to_bc7_m6_table133, g_etc1_to_bc7_m6_table134, g_etc1_to_bc7_m6_table135, g_etc1_to_bc7_m6_table136, g_etc1_to_bc7_m6_table137, g_etc1_to_bc7_m6_table138, g_etc1_to_bc7_m6_table139, g_etc1_to_bc7_m6_table140, g_etc1_to_bc7_m6_table141, g_etc1_to_bc7_m6_table142, g_etc1_to_bc7_m6_table143, -g_etc1_to_bc7_m6_table144, g_etc1_to_bc7_m6_table145, g_etc1_to_bc7_m6_table146, g_etc1_to_bc7_m6_table147, g_etc1_to_bc7_m6_table148, g_etc1_to_bc7_m6_table149, g_etc1_to_bc7_m6_table150, g_etc1_to_bc7_m6_table151, g_etc1_to_bc7_m6_table152, g_etc1_to_bc7_m6_table153, g_etc1_to_bc7_m6_table154, g_etc1_to_bc7_m6_table155, g_etc1_to_bc7_m6_table156, g_etc1_to_bc7_m6_table157, g_etc1_to_bc7_m6_table158, g_etc1_to_bc7_m6_table159, -g_etc1_to_bc7_m6_table160, g_etc1_to_bc7_m6_table161, g_etc1_to_bc7_m6_table162, g_etc1_to_bc7_m6_table163, g_etc1_to_bc7_m6_table164, g_etc1_to_bc7_m6_table165, g_etc1_to_bc7_m6_table166, g_etc1_to_bc7_m6_table167, g_etc1_to_bc7_m6_table168, g_etc1_to_bc7_m6_table169, g_etc1_to_bc7_m6_table170, g_etc1_to_bc7_m6_table171, g_etc1_to_bc7_m6_table172, g_etc1_to_bc7_m6_table173, g_etc1_to_bc7_m6_table174, g_etc1_to_bc7_m6_table175, -g_etc1_to_bc7_m6_table176, g_etc1_to_bc7_m6_table177, g_etc1_to_bc7_m6_table178, g_etc1_to_bc7_m6_table179, g_etc1_to_bc7_m6_table180, g_etc1_to_bc7_m6_table181, g_etc1_to_bc7_m6_table182, g_etc1_to_bc7_m6_table183, g_etc1_to_bc7_m6_table184, g_etc1_to_bc7_m6_table185, g_etc1_to_bc7_m6_table186, g_etc1_to_bc7_m6_table187, g_etc1_to_bc7_m6_table188, g_etc1_to_bc7_m6_table189, g_etc1_to_bc7_m6_table190, g_etc1_to_bc7_m6_table191, -g_etc1_to_bc7_m6_table192, g_etc1_to_bc7_m6_table193, g_etc1_to_bc7_m6_table194, g_etc1_to_bc7_m6_table195, g_etc1_to_bc7_m6_table196, g_etc1_to_bc7_m6_table197, g_etc1_to_bc7_m6_table198, g_etc1_to_bc7_m6_table199, g_etc1_to_bc7_m6_table200, g_etc1_to_bc7_m6_table201, g_etc1_to_bc7_m6_table202, g_etc1_to_bc7_m6_table203, g_etc1_to_bc7_m6_table204, g_etc1_to_bc7_m6_table205, g_etc1_to_bc7_m6_table206, g_etc1_to_bc7_m6_table207, -g_etc1_to_bc7_m6_table208, g_etc1_to_bc7_m6_table209, g_etc1_to_bc7_m6_table210, g_etc1_to_bc7_m6_table211, g_etc1_to_bc7_m6_table212, g_etc1_to_bc7_m6_table213, g_etc1_to_bc7_m6_table214, g_etc1_to_bc7_m6_table215, g_etc1_to_bc7_m6_table216, g_etc1_to_bc7_m6_table217, g_etc1_to_bc7_m6_table218, g_etc1_to_bc7_m6_table219, g_etc1_to_bc7_m6_table220, g_etc1_to_bc7_m6_table221, g_etc1_to_bc7_m6_table222, g_etc1_to_bc7_m6_table223, -g_etc1_to_bc7_m6_table224, g_etc1_to_bc7_m6_table225, g_etc1_to_bc7_m6_table226, g_etc1_to_bc7_m6_table227, g_etc1_to_bc7_m6_table228, g_etc1_to_bc7_m6_table229, g_etc1_to_bc7_m6_table230, g_etc1_to_bc7_m6_table231, g_etc1_to_bc7_m6_table232, g_etc1_to_bc7_m6_table233, g_etc1_to_bc7_m6_table234, g_etc1_to_bc7_m6_table235, g_etc1_to_bc7_m6_table236, g_etc1_to_bc7_m6_table237, g_etc1_to_bc7_m6_table238, g_etc1_to_bc7_m6_table239, -g_etc1_to_bc7_m6_table240, g_etc1_to_bc7_m6_table241, g_etc1_to_bc7_m6_table242, g_etc1_to_bc7_m6_table243, g_etc1_to_bc7_m6_table244, g_etc1_to_bc7_m6_table245, g_etc1_to_bc7_m6_table246, g_etc1_to_bc7_m6_table247, g_etc1_to_bc7_m6_table248, g_etc1_to_bc7_m6_table249, g_etc1_to_bc7_m6_table250, g_etc1_to_bc7_m6_table251, g_etc1_to_bc7_m6_table252, g_etc1_to_bc7_m6_table253, g_etc1_to_bc7_m6_table254, g_etc1_to_bc7_m6_table255, -}; diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc index 3e7610ff53..8244550959 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_5.inc @@ -491,4 +491,4 @@ {17,31,10897},{16,31,12077},{13,31,6285},{13,31,6285},{8,31,68},{4,31,7686},{0,31,1341},{27,31,968},{27,31,968},{27,31,968},{25,31,325},{31,21,1513},{23,31,605},{23,31,605},{11,31,0},{31,26,1513},{11,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{16,0,9248},{16,0,9248},{16,0,9248},{16,0,9248},{12,31,3626}, {12,31,3626},{12,31,3626},{8,31,68},{0,31,1341},{0,31,1341},{21,31,17476},{20,31,14998},{20,31,14098},{18,31,10672},{20,31,16018},{15,31,8154},{15,31,6218},{9,31,200},{10,31,11338},{0,31,1613},{28,31,1041},{27,31,801},{27,31,680},{26,31,232},{29,29,1473},{26,31,753},{24,31,442},{14,31,0},{31,28,1473},{14,31,0},{20,31,14098},{20,31,14098},{20,31,14098},{18,31,10672},{17,31,11453},{15,31,6218},{15,31,6218}, {9,31,200},{6,31,7270},{0,31,1613},{27,31,680},{27,31,680},{27,31,680},{26,31,232},{28,28,1105},{24,31,442},{24,31,442},{14,31,0},{28,28,1105},{14,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{31,31,0},{0,31,0},{31,31,0},{0,31,0},{17,0,9248},{17,0,9248},{17,0,9248},{17,0,9248},{13,31,3929},{13,31,3929},{13,31,3929},{9,31,200},{0,31,1613}, -{0,31,1613},
\ No newline at end of file +{0,31,1613}, diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc index 2441fbe859..fad45fe22d 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_dxt1_6.inc @@ -491,4 +491,4 @@ {34,63,10841},{34,63,12089},{26,63,6206},{26,63,6206},{17,63,74},{9,63,7678},{0,63,1341},{54,63,937},{54,63,937},{54,63,937},{51,63,305},{63,43,1513},{47,63,605},{47,63,605},{22,63,1},{62,53,1513},{22,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{32,0,9256},{32,0,9256},{32,0,9256},{32,0,9256},{23,63,3650}, {23,63,3650},{23,63,3650},{17,63,74},{0,63,1341},{0,63,1341},{43,63,17392},{40,63,15021},{40,63,14060},{37,63,10673},{40,63,16013},{32,63,8261},{29,63,6166},{19,63,194},{20,63,11338},{1,63,1594},{57,63,1041},{56,63,822},{54,63,697},{52,63,234},{63,51,1473},{51,63,737},{49,63,442},{28,63,1},{63,57,1473},{28,63,1},{40,63,14060},{40,63,14060},{40,63,14060},{37,63,10673},{34,63,11401},{29,63,6166},{29,63,6166}, {19,63,194},{12,63,7270},{1,63,1594},{54,63,697},{54,63,697},{54,63,697},{52,63,234},{63,46,1105},{49,63,442},{49,63,442},{28,63,1},{63,54,1105},{28,63,1},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{63,63,0},{0,63,0},{63,63,0},{0,63,0},{34,0,9256},{34,0,9256},{34,0,9256},{34,0,9256},{26,63,3898},{26,63,3898},{26,63,3898},{19,63,194},{1,63,1594}, -{1,63,1594},
\ No newline at end of file +{1,63,1594}, diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_pvrtc2_45.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_pvrtc2_45.inc index 0bca0bbddc..fbaf988d78 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_pvrtc2_45.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_pvrtc2_45.inc @@ -478,4 +478,4 @@ {8,31,11312},{8,31,11249},{7,31,6499},{7,31,6499},{4,31,260},{3,31,10457},{0,31,2642},{13,31,925},{13,31,925},{13,31,925},{12,31,397},{15,22,1513},{11,31,794},{11,31,794},{7,31,4},{14,27,1513},{7,31,4},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{8,0,9376},{8,0,9376},{8,0,9376},{8,0,9376},{6,31,3074}, {6,31,3074},{6,31,3074},{4,31,260},{0,31,2642},{0,31,2642},{8,31,58848},{7,31,39683},{6,31,25130},{6,31,19007},{8,31,54849},{6,31,27132},{5,31,8569},{4,31,756},{4,31,51302},{0,31,5046},{13,31,1078},{13,31,806},{13,31,637},{12,31,365},{15,26,1473},{12,31,978},{12,31,617},{8,31,9},{14,29,1473},{8,31,9},{9,31,13604},{9,31,13604},{9,31,13604},{8,31,11184},{8,31,10433},{7,31,6339},{7,31,6339}, {5,31,424},{4,31,9713},{0,31,2930},{13,31,637},{13,31,637},{13,31,637},{12,31,365},{14,27,1105},{12,31,617},{12,31,617},{8,31,9},{13,29,1105},{8,31,9},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{15,31,0},{0,31,0},{15,31,0},{0,31,0},{8,0,9248},{8,0,9248},{8,0,9248},{8,0,9248},{6,31,3330},{6,31,3330},{6,31,3330},{5,31,424},{0,31,2930}, -{0,31,2930},
\ No newline at end of file +{0,31,2930}, diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc index 10c94153ad..3b9d7022e7 100644 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_tables_pvrtc2_alpha_33.inc @@ -478,4 +478,4 @@ {4,7,11305},{4,7,11209},{3,7,6489},{3,7,6489},{2,7,272},{1,7,10377},{0,7,2642},{6,7,1040},{6,7,1040},{6,7,1040},{6,7,416},{7,6,1537},{6,7,929},{6,7,929},{3,7,9},{7,6,1513},{3,7,9},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9280},{4,0,9280},{4,0,9280},{4,0,9280},{3,7,3125}, {3,7,3125},{3,7,3125},{2,7,272},{0,7,2642},{0,7,2642},{4,7,59414},{4,7,41414},{3,7,24952},{3,7,19100},{4,7,55014},{3,7,27085},{2,7,10021},{2,7,656},{1,7,52310},{0,7,5046},{7,7,1142},{7,7,1070},{7,7,1021},{6,7,416},{7,7,1538},{6,7,1025},{6,7,625},{4,7,4},{6,7,1529},{4,7,4},{5,7,13964},{5,7,13964},{5,7,13964},{4,7,11305},{4,7,10505},{3,7,6665},{3,7,6665}, {2,7,592},{2,7,9973},{0,7,2930},{7,7,1021},{7,7,1021},{7,7,1021},{6,7,416},{7,6,1105},{6,7,625},{6,7,625},{4,7,4},{6,7,1129},{4,7,4},{7,7,242},{7,7,170},{7,7,121},{7,7,49},{7,7,242},{7,7,98},{7,7,49},{0,7,0},{7,7,98},{0,7,0},{4,0,9280},{4,0,9280},{4,0,9280},{4,0,9280},{3,7,3301},{3,7,3301},{3,7,3301},{2,7,592},{0,7,2930}, -{0,7,2930},
\ No newline at end of file +{0,7,2930}, diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h b/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h new file mode 100644 index 0000000000..d501a2af6e --- /dev/null +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_uastc.h @@ -0,0 +1,297 @@ +// basisu_transcoder_uastc.h +#pragma once +#include "basisu_transcoder_internal.h" + +namespace basist +{ + struct color_quad_u8 + { + uint8_t m_c[4]; + }; + + const uint32_t TOTAL_UASTC_MODES = 19; + const uint32_t UASTC_MODE_INDEX_SOLID_COLOR = 8; + + const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS2 = 30; + const uint32_t TOTAL_ASTC_BC7_COMMON_PARTITIONS3 = 11; + const uint32_t TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS = 19; + + extern const uint8_t g_uastc_mode_weight_bits[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_weight_ranges[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_endpoint_ranges[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_subsets[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_planes[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_comps[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_etc1_bias[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_bc1_hint0[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_bc1_hint1[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_has_alpha[TOTAL_UASTC_MODES]; + extern const uint8_t g_uastc_mode_is_la[TOTAL_UASTC_MODES]; + + struct astc_bc7_common_partition2_desc + { + uint8_t m_bc7; + uint16_t m_astc; + bool m_invert; + }; + + extern const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2]; + + struct bc73_astc2_common_partition_desc + { + uint8_t m_bc73; + uint16_t m_astc2; + uint8_t k; // 0-5 - how to modify the BC7 3-subset pattern to match the ASTC pattern (LSB=invert) + }; + + extern const bc73_astc2_common_partition_desc g_bc7_3_astc2_common_partitions[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS]; + + struct astc_bc7_common_partition3_desc + { + uint8_t m_bc7; + uint16_t m_astc; + uint8_t m_astc_to_bc7_perm; // converts ASTC to BC7 partition using g_astc_bc7_partition_index_perm_tables[][] + }; + + extern const astc_bc7_common_partition3_desc g_astc_bc7_common_partitions3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3]; + + extern const uint8_t g_astc_bc7_patterns2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][16]; + extern const uint8_t g_astc_bc7_patterns3[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][16]; + extern const uint8_t g_bc7_3_astc2_patterns2[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][16]; + + extern const uint8_t g_astc_bc7_pattern2_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS2][3]; + extern const uint8_t g_astc_bc7_pattern3_anchors[TOTAL_ASTC_BC7_COMMON_PARTITIONS3][3]; + extern const uint8_t g_bc7_3_astc2_patterns2_anchors[TOTAL_BC7_3_ASTC2_COMMON_PARTITIONS][3]; + + extern const uint32_t g_uastc_mode_huff_codes[TOTAL_UASTC_MODES + 1][2]; + + extern const uint8_t g_astc_to_bc7_partition_index_perm_tables[6][3]; + extern const uint8_t g_bc7_to_astc_partition_index_perm_tables[6][3]; // inverse of g_astc_to_bc7_partition_index_perm_tables + + extern const uint8_t* s_uastc_to_bc1_weights[6]; + + uint32_t bc7_convert_partition_index_3_to_2(uint32_t p, uint32_t k); + + inline uint32_t astc_interpolate(uint32_t l, uint32_t h, uint32_t w, bool srgb) + { + if (srgb) + { + l = (l << 8) | 0x80; + h = (h << 8) | 0x80; + } + else + { + l = (l << 8) | l; + h = (h << 8) | h; + } + + uint32_t k = (l * (64 - w) + h * w + 32) >> 6; + + return k >> 8; + } + + struct astc_block_desc + { + int m_weight_range; // weight BISE range + + int m_subsets; // number of ASTC partitions + int m_partition_seed; // partition pattern seed + int m_cem; // color endpoint mode used by all subsets + + int m_ccs; // color component selector (dual plane only) + bool m_dual_plane; // true if dual plane + + // Weight and endpoint BISE values. + // Note these values are NOT linear, they must be BISE encoded. See Table 97 and Table 107. + uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order + uint8_t m_weights[64]; // weight index values, raster order, in P0 P1, P0 P1, etc. or P0, P0, P0, P0, etc. order + }; + + const uint32_t BC7ENC_TOTAL_ASTC_RANGES = 21; + + // See tables 81, 93, 18.13.Endpoint Unquantization + const uint32_t TOTAL_ASTC_RANGES = 21; + extern const int g_astc_bise_range_table[TOTAL_ASTC_RANGES][3]; + + struct astc_quant_bin + { + uint8_t m_unquant; // unquantized value + uint8_t m_index; // sorted index + }; + + extern astc_quant_bin g_astc_unquant[BC7ENC_TOTAL_ASTC_RANGES][256]; // [ASTC encoded endpoint index] + + int astc_get_levels(int range); + bool astc_is_valid_endpoint_range(uint32_t range); + uint32_t unquant_astc_endpoint(uint32_t packed_bits, uint32_t packed_trits, uint32_t packed_quints, uint32_t range); + uint32_t unquant_astc_endpoint_val(uint32_t packed_val, uint32_t range); + + const uint8_t* get_anchor_indices(uint32_t subsets, uint32_t mode, uint32_t common_pattern, const uint8_t*& pPartition_pattern); + + // BC7 + const uint32_t BC7ENC_BLOCK_SIZE = 16; + + struct bc7_block + { + uint64_t m_qwords[2]; + }; + + struct bc7_optimization_results + { + uint32_t m_mode; + uint32_t m_partition; + uint8_t m_selectors[16]; + uint8_t m_alpha_selectors[16]; + color_quad_u8 m_low[3]; + color_quad_u8 m_high[3]; + uint32_t m_pbits[3][2]; + uint32_t m_index_selector; + uint32_t m_rotation; + }; + + extern const uint32_t g_bc7_weights1[2]; + extern const uint32_t g_bc7_weights2[4]; + extern const uint32_t g_bc7_weights3[8]; + extern const uint32_t g_bc7_weights4[16]; + extern const uint32_t g_astc_weights4[16]; + extern const uint32_t g_astc_weights5[32]; + extern const uint32_t g_astc_weights_3levels[3]; + extern const uint8_t g_bc7_partition1[16]; + extern const uint8_t g_bc7_partition2[64 * 16]; + extern const uint8_t g_bc7_partition3[64 * 16]; + extern const uint8_t g_bc7_table_anchor_index_second_subset[64]; + extern const uint8_t g_bc7_table_anchor_index_third_subset_1[64]; + extern const uint8_t g_bc7_table_anchor_index_third_subset_2[64]; + extern const uint8_t g_bc7_num_subsets[8]; + extern const uint8_t g_bc7_partition_bits[8]; + extern const uint8_t g_bc7_color_index_bitcount[8]; + extern const uint8_t g_bc7_mode_has_p_bits[8]; + extern const uint8_t g_bc7_mode_has_shared_p_bits[8]; + extern const uint8_t g_bc7_color_precision_table[8]; + extern const int8_t g_bc7_alpha_precision_table[8]; + extern const uint8_t g_bc7_alpha_index_bitcount[8]; + + inline bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); } + inline int get_bc7_color_index_size(int mode, int index_selection_bit) { return g_bc7_color_index_bitcount[mode] + index_selection_bit; } + inline int get_bc7_alpha_index_size(int mode, int index_selection_bit) { return g_bc7_alpha_index_bitcount[mode] - index_selection_bit; } + + struct endpoint_err + { + uint16_t m_error; uint8_t m_lo; uint8_t m_hi; + }; + + extern endpoint_err g_bc7_mode_6_optimal_endpoints[256][2]; // [c][pbit] + const uint32_t BC7ENC_MODE_6_OPTIMAL_INDEX = 5; + + extern endpoint_err g_bc7_mode_5_optimal_endpoints[256]; // [c] + const uint32_t BC7ENC_MODE_5_OPTIMAL_INDEX = 1; + + // Packs a BC7 block from a high-level description. Handles all BC7 modes. + void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults); + + // Packs an ASTC block + // Constraints: Always 4x4, all subset CEM's must be equal, only tested with LDR CEM's. + bool pack_astc_block(uint32_t* pDst, const astc_block_desc* pBlock, uint32_t mode); + + void pack_astc_solid_block(void* pDst_block, const color32& color); + +#ifdef _DEBUG + int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block); +#endif + + struct uastc_block + { + union + { + uint8_t m_bytes[16]; + uint32_t m_dwords[4]; + +#ifndef __EMSCRIPTEN__ + uint64_t m_qwords[2]; +#endif + }; + }; + + struct unpacked_uastc_block + { + astc_block_desc m_astc; + + uint32_t m_mode; + uint32_t m_common_pattern; + + color32 m_solid_color; + + bool m_bc1_hint0; + bool m_bc1_hint1; + + bool m_etc1_flip; + bool m_etc1_diff; + uint32_t m_etc1_inten0; + uint32_t m_etc1_inten1; + + uint32_t m_etc1_bias; + + uint32_t m_etc2_hints; + + uint32_t m_etc1_selector; + uint32_t m_etc1_r, m_etc1_g, m_etc1_b; + }; + + color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock); + + struct decoder_etc_block; + struct eac_block; + + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb); + bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb); + + bool unpack_uastc(const uastc_block& blk, color32* pPixels, bool srgb); + bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool undo_blue_contract, bool read_hints = true); + + bool transcode_uastc_to_astc(const uastc_block& src_blk, void* pDst); + + bool transcode_uastc_to_bc7(const unpacked_uastc_block& unpacked_src_blk, bc7_optimization_results& dst_blk); + bool transcode_uastc_to_bc7(const uastc_block& src_blk, bc7_optimization_results& dst_blk); + bool transcode_uastc_to_bc7(const uastc_block& src_blk, void* pDst); + + void transcode_uastc_to_etc1(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst); + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst); + bool transcode_uastc_to_etc1(const uastc_block& src_blk, void* pDst, uint32_t channel); + + void transcode_uastc_to_etc2_eac_a8(unpacked_uastc_block& unpacked_src_blk, color32 block_pixels[4][4], void* pDst); + bool transcode_uastc_to_etc2_rgba(const uastc_block& src_blk, void* pDst); + + // Packs 16 scalar values to BC4. Same PSNR as stb_dxt's BC4 encoder, around 13% faster. + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride); + + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb); + + enum + { + cEncodeBC1HighQuality = 1, + cEncodeBC1HigherQuality = 2, + cEncodeBC1UseSelectors = 4, + }; + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags); + + // Alternate PCA-free encoder, around 15% faster, same (or slightly higher) avg. PSNR + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags); + + void transcode_uastc_to_bc1_hint0(const unpacked_uastc_block& unpacked_src_blk, void* pDst); + void transcode_uastc_to_bc1_hint1(const unpacked_uastc_block& unpacked_src_blk, const color32 block_pixels[4][4], void* pDst, bool high_quality); + + bool transcode_uastc_to_bc1(const uastc_block& src_blk, void* pDst, bool high_quality); + bool transcode_uastc_to_bc3(const uastc_block& src_blk, void* pDst, bool high_quality); + bool transcode_uastc_to_bc4(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0); + bool transcode_uastc_to_bc5(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1); + + bool transcode_uastc_to_etc2_eac_r11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0); + bool transcode_uastc_to_etc2_eac_rg11(const uastc_block& src_blk, void* pDst, bool high_quality, uint32_t chan0, uint32_t chan1); + + bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha); + bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality); + + // uastc_init() MUST be called before using this module. + void uastc_init(); + +} // namespace basist diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_filter.cpp b/thirdparty/embree-aarch64/common/algorithms/parallel_filter.cpp deleted file mode 100644 index acddc0ff81..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_filter.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "parallel_filter.h" -#include "../sys/regression.h" -#include <map> - -namespace embree -{ - struct parallel_filter_regression_test : public RegressionTest - { - parallel_filter_regression_test(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - auto pred = [&]( uint32_t v ) { return (v & 0x3) == 0; }; - - for (size_t N=10; N<1000000; N=size_t(2.1*N)) - { - size_t N0 = rand() % N; - - /* initialize array with random numbers */ - std::vector<uint32_t> src(N); - std::map<uint32_t,int> m; - for (size_t i=0; i<N; i++) src[i] = rand(); - - /* count elements up */ - for (size_t i=N0; i<N; i++) - if (pred(src[i])) - m[src[i]] = 0; - for (size_t i=N0; i<N; i++) - if (pred(src[i])) - m[src[i]]++; - - /* filter array */ - //size_t M = sequential_filter(src.data(),N0,N,pred); - size_t M = parallel_filter(src.data(),N0,N,size_t(1024),pred); - - /* check if filtered data is correct */ - for (size_t i=N0; i<M; i++) { - passed &= pred(src[i]); - m[src[i]]--; - } - for (size_t i=N0; i<M; i++) - passed &= (m[src[i]] == 0); - } - - return passed; - } - }; - - parallel_filter_regression_test parallel_filter_regression("parallel_filter_regression"); -} diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_for.cpp b/thirdparty/embree-aarch64/common/algorithms/parallel_for.cpp deleted file mode 100644 index ef070ebc4d..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_for.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "parallel_for.h" -#include "../sys/regression.h" - -namespace embree -{ - struct parallel_for_regression_test : public RegressionTest - { - parallel_for_regression_test(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - - const size_t M = 10; - for (size_t N=10; N<10000000; N=size_t(2.1*N)) - { - /* sequentially calculate sum of squares */ - size_t sum0 = 0; - for (size_t i=0; i<N; i++) { - sum0 += i*i; - } - - /* parallel calculation of sum of squares */ - for (size_t m=0; m<M; m++) - { - std::atomic<size_t> sum1(0); - parallel_for( size_t(0), size_t(N), size_t(1024), [&](const range<size_t>& r) - { - size_t s = 0; - for (size_t i=r.begin(); i<r.end(); i++) - s += i*i; - sum1 += s; - }); - passed = sum0 == sum1; - } - } - - return passed; - } - }; - - parallel_for_regression_test parallel_for_regression("parallel_for_regression_test"); -} diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_for_for.cpp b/thirdparty/embree-aarch64/common/algorithms/parallel_for_for.cpp deleted file mode 100644 index 0337611b35..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_for_for.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "parallel_for_for.h" -#include "../sys/regression.h" - -namespace embree -{ - struct parallel_for_for_regression_test : public RegressionTest - { - parallel_for_for_regression_test(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - - /* create vector with random numbers */ - size_t sum0 = 0; - size_t K = 0; - const size_t M = 1000; - std::vector<std::vector<size_t>* > array2(M); - for (size_t i=0; i<M; i++) { - const size_t N = rand() % 1024; - K+=N; - array2[i] = new std::vector<size_t>(N); - for (size_t j=0; j<N; j++) - sum0 += (*array2[i])[j] = rand(); - } - - /* array to test global index */ - std::vector<atomic<size_t>> verify_k(K); - for (size_t i=0; i<K; i++) verify_k[i].store(0); - - /* add all numbers using parallel_for_for */ - std::atomic<size_t> sum1(0); - parallel_for_for( array2, size_t(1), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k) -> size_t - { - size_t s = 0; - for (size_t i=r.begin(); i<r.end(); i++) { - s += (*v)[i]; - verify_k[k++]++; - } - sum1 += s; - return sum1; - }); - passed &= (sum0 == sum1); - - /* check global index */ - for (size_t i=0; i<K; i++) - passed &= (verify_k[i] == 1); - - /* delete vectors again */ - for (size_t i=0; i<array2.size(); i++) - delete array2[i]; - - return passed; - } - }; - - parallel_for_for_regression_test parallel_for_for_regression("parallel_for_for_regression_test"); -} diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_for_for_prefix_sum.cpp b/thirdparty/embree-aarch64/common/algorithms/parallel_for_for_prefix_sum.cpp deleted file mode 100644 index 0169d8e481..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_for_for_prefix_sum.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "parallel_for_for_prefix_sum.h" -#include "../sys/regression.h" - -namespace embree -{ - struct parallel_for_for_prefix_sum_regression_test : public RegressionTest - { - parallel_for_for_prefix_sum_regression_test(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - - /* create vector with random numbers */ - const size_t M = 10; - std::vector<atomic<size_t>> flattened; - typedef std::vector<std::vector<size_t>* > ArrayArray; - ArrayArray array2(M); - size_t K = 0; - for (size_t i=0; i<M; i++) { - const size_t N = rand() % 10; - K += N; - array2[i] = new std::vector<size_t>(N); - for (size_t j=0; j<N; j++) - (*array2[i])[j] = rand() % 10; - } - - /* array to test global index */ - std::vector<atomic<size_t>> verify_k(K); - for (size_t i=0; i<K; i++) verify_k[i].store(0); - - ParallelForForPrefixSumState<size_t> state(array2,size_t(1)); - - /* dry run only counts */ - size_t S = parallel_for_for_prefix_sum0( state, array2, size_t(0), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k, size_t i) -> size_t - { - size_t s = 0; - for (size_t i=r.begin(); i<r.end(); i++) { - s += (*v)[i]; - verify_k[k++]++; - } - return s; - }, [](size_t v0, size_t v1) { return v0+v1; }); - - /* create properly sized output array */ - flattened.resize(S); - for (auto& a : flattened) a.store(0); - - /* now we actually fill the flattened array */ - parallel_for_for_prefix_sum1( state, array2, size_t(0), [&](std::vector<size_t>* v, const range<size_t>& r, size_t k, size_t i, const size_t base) -> size_t - { - size_t s = 0; - for (size_t i=r.begin(); i<r.end(); i++) { - for (size_t j=0; j<(*v)[i]; j++) { - flattened[base+s+j]++; - } - s += (*v)[i]; - verify_k[k++]++; - } - return s; - }, [](size_t v0, size_t v1) { return v0+v1; }); - - /* check global index */ - for (size_t i=0; i<K; i++) - passed &= (verify_k[i] == 2); - - /* check if each element was assigned exactly once */ - for (size_t i=0; i<flattened.size(); i++) - passed &= (flattened[i] == 1); - - /* delete arrays again */ - for (size_t i=0; i<array2.size(); i++) - delete array2[i]; - - return passed; - } - }; - - parallel_for_for_prefix_sum_regression_test parallel_for_for_prefix_sum_regression("parallel_for_for_prefix_sum_regression_test"); -} diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_map.cpp b/thirdparty/embree-aarch64/common/algorithms/parallel_map.cpp deleted file mode 100644 index 09dc303f81..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_map.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "parallel_map.h" -#include "../sys/regression.h" - -namespace embree -{ - struct parallel_map_regression_test : public RegressionTest - { - parallel_map_regression_test(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - - /* create key/value vectors with random numbers */ - const size_t N = 10000; - std::vector<uint32_t> keys(N); - std::vector<uint32_t> vals(N); - for (size_t i=0; i<N; i++) keys[i] = 2*unsigned(i)*647382649; - for (size_t i=0; i<N; i++) std::swap(keys[i],keys[rand()%N]); - for (size_t i=0; i<N; i++) vals[i] = 2*rand(); - - /* create map */ - parallel_map<uint32_t,uint32_t> map; - map.init(keys,vals); - - /* check that all keys are properly mapped */ - for (size_t i=0; i<N; i++) { - const uint32_t* val = map.lookup(keys[i]); - passed &= val && (*val == vals[i]); - } - - /* check that these keys are not in the map */ - for (size_t i=0; i<N; i++) { - passed &= !map.lookup(keys[i]+1); - } - - return passed; - } - }; - - parallel_map_regression_test parallel_map_regression("parallel_map_regression_test"); -} diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_partition.cpp b/thirdparty/embree-aarch64/common/algorithms/parallel_partition.cpp deleted file mode 100644 index eb20c4465d..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_partition.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "parallel_partition.h" -#include "../sys/regression.h" - -namespace embree -{ - struct parallel_partition_regression_test : public RegressionTest - { - parallel_partition_regression_test(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - - for (size_t i=0; i<100; i++) - { - /* create random permutation */ - size_t N = std::rand() % 1000000; - std::vector<unsigned> array(N); - for (unsigned i=0; i<N; i++) array[i] = i; - for (auto& v : array) std::swap(v,array[std::rand()%array.size()]); - size_t split = std::rand() % (N+1); - - /* perform parallel partitioning */ - size_t left_sum = 0, right_sum = 0; - size_t mid = parallel_partitioning(array.data(),0,array.size(),0,left_sum,right_sum, - [&] ( size_t i ) { return i < split; }, - [] ( size_t& sum, unsigned v) { sum += v; }, - [] ( size_t& sum, size_t v) { sum += v; }, - 128); - - /*serial_partitioning(array.data(),0,array.size(),left_sum,right_sum, - [&] ( size_t i ) { return i < split; }, - [] ( size_t& left_sum, int v) { left_sum += v; });*/ - - /* verify result */ - passed &= mid == split; - passed &= left_sum == split*(split-1)/2; - passed &= right_sum == N*(N-1)/2-left_sum; - for (size_t i=0; i<split; i++) passed &= array[i] < split; - for (size_t i=split; i<N; i++) passed &= array[i] >= split; - } - - return passed; - } - }; - - parallel_partition_regression_test parallel_partition_regression("parallel_partition_regression_test"); -} diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.cpp b/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.cpp deleted file mode 100644 index 685952c3dc..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "parallel_prefix_sum.h" -#include "../sys/regression.h" - -namespace embree -{ - struct parallel_prefix_sum_regression_test : public RegressionTest - { - parallel_prefix_sum_regression_test(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - const size_t M = 10; - - for (size_t N=10; N<10000000; N=size_t(2.1*N)) - { - /* initialize array with random numbers */ - uint32_t sum0 = 0; - std::vector<uint32_t> src(N); - for (size_t i=0; i<N; i++) { - sum0 += src[i] = rand(); - } - - /* calculate parallel prefix sum */ - std::vector<uint32_t> dst(N); - for (auto& v : dst) v = 0; - - for (size_t i=0; i<M; i++) { - uint32_t sum1 = parallel_prefix_sum(src,dst,N,0,std::plus<uint32_t>()); - passed &= (sum0 == sum1); - } - - /* check if prefix sum is correct */ - for (size_t i=0, sum=0; i<N; sum+=src[i++]) - passed &= ((uint32_t)sum == dst[i]); - } - - return passed; - } - }; - - parallel_prefix_sum_regression_test parallel_prefix_sum_regression("parallel_prefix_sum_regression"); -} diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_reduce.cpp b/thirdparty/embree-aarch64/common/algorithms/parallel_reduce.cpp deleted file mode 100644 index 331fe4288e..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_reduce.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "parallel_reduce.h" -#include "../sys/regression.h" - -namespace embree -{ - struct parallel_reduce_regression_test : public RegressionTest - { - parallel_reduce_regression_test(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - - const size_t M = 10; - for (size_t N=10; N<10000000; N=size_t(2.1*N)) - { - /* sequentially calculate sum of squares */ - size_t sum0 = 0; - for (size_t i=0; i<N; i++) { - sum0 += i*i; - } - - /* parallel calculation of sum of squares */ - for (size_t m=0; m<M; m++) - { - size_t sum1 = parallel_reduce( size_t(0), size_t(N), size_t(1024), size_t(0), [&](const range<size_t>& r) -> size_t - { - size_t s = 0; - for (size_t i=r.begin(); i<r.end(); i++) - s += i*i; - return s; - }, - [](const size_t v0, const size_t v1) { - return v0+v1; - }); - passed = sum0 == sum1; - } - } - return passed; - } - }; - - parallel_reduce_regression_test parallel_reduce_regression("parallel_reduce_regression_test"); -} diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_set.cpp b/thirdparty/embree-aarch64/common/algorithms/parallel_set.cpp deleted file mode 100644 index 20b639c1c9..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_set.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "parallel_set.h" -#include "../sys/regression.h" - -namespace embree -{ - struct parallel_set_regression_test : public RegressionTest - { - parallel_set_regression_test(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - - /* create vector with random numbers */ - const size_t N = 10000; - std::vector<uint32_t> unsorted(N); - for (size_t i=0; i<N; i++) unsorted[i] = 2*rand(); - - /* created set from numbers */ - parallel_set<uint32_t> sorted; - sorted.init(unsorted); - - /* check that all elements are in the set */ - for (size_t i=0; i<N; i++) { - passed &= sorted.lookup(unsorted[i]); - } - - /* check that these elements are not in the set */ - for (size_t i=0; i<N; i++) { - passed &= !sorted.lookup(unsorted[i]+1); - } - - return passed; - } - }; - - parallel_set_regression_test parallel_set_regression("parallel_set_regression_test"); -} diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_sort.cpp b/thirdparty/embree-aarch64/common/algorithms/parallel_sort.cpp deleted file mode 100644 index 5e7ec79ac1..0000000000 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_sort.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "parallel_sort.h" -#include "../sys/regression.h" - -namespace embree -{ - template<typename Key> - struct RadixSortRegressionTest : public RegressionTest - { - RadixSortRegressionTest(const char* name) : RegressionTest(name) { - registerRegressionTest(this); - } - - bool run () - { - bool passed = true; - const size_t M = 10; - - for (size_t N=10; N<1000000; N=size_t(2.1*N)) - { - std::vector<Key> src(N); memset(src.data(),0,N*sizeof(Key)); - std::vector<Key> tmp(N); memset(tmp.data(),0,N*sizeof(Key)); - for (size_t i=0; i<N; i++) src[i] = uint64_t(rand())*uint64_t(rand()); - - /* calculate checksum */ - Key sum0 = 0; for (size_t i=0; i<N; i++) sum0 += src[i]; - - /* sort numbers */ - for (size_t i=0; i<M; i++) { - radix_sort<Key>(src.data(),tmp.data(),N); - } - - /* calculate checksum */ - Key sum1 = 0; for (size_t i=0; i<N; i++) sum1 += src[i]; - if (sum0 != sum1) passed = false; - - /* check if numbers are sorted */ - for (size_t i=1; i<N; i++) - passed &= src[i-1] <= src[i]; - } - - return passed; - } - }; - - RadixSortRegressionTest<uint32_t> test_u32("RadixSortRegressionTestU32"); - RadixSortRegressionTest<uint64_t> test_u64("RadixSortRegressionTestU64"); -} diff --git a/thirdparty/embree-aarch64/common/math/AVX2NEON.h b/thirdparty/embree-aarch64/common/math/AVX2NEON.h deleted file mode 100644 index e8698ac56d..0000000000 --- a/thirdparty/embree-aarch64/common/math/AVX2NEON.h +++ /dev/null @@ -1,986 +0,0 @@ -#pragma once - -#include "SSE2NEON.h" - - -#define AVX2NEON_ABI static inline __attribute__((always_inline)) - - -struct __m256d; - -struct __m256 { - __m128 lo,hi; - __m256() {} -}; - - - - -struct __m256i { - __m128i lo,hi; - explicit __m256i(const __m256 a) : lo(__m128i(a.lo)),hi(__m128i(a.hi)) {} - operator __m256() const {__m256 res; res.lo = __m128(lo);res.hi = __m128(hi); return res;} - __m256i() {} -}; - - - - -struct __m256d { - float64x2_t lo,hi; - __m256d() {} - __m256d(const __m256& a) : lo(float64x2_t(a.lo)),hi(float64x2_t(a.hi)) {} - __m256d(const __m256i& a) : lo(float64x2_t(a.lo)),hi(float64x2_t(a.hi)) {} -}; - -#define UNARY_AVX_OP(type,func,basic_func) AVX2NEON_ABI type func(const type& a) {type res;res.lo=basic_func(a.lo);res.hi=basic_func(a.hi);return res;} - - -#define BINARY_AVX_OP(type,func,basic_func) AVX2NEON_ABI type func(const type& a,const type& b) {type res;res.lo=basic_func(a.lo,b.lo);res.hi=basic_func(a.hi,b.hi);return res;} -#define BINARY_AVX_OP_CAST(type,func,basic_func,bdst,bsrc) AVX2NEON_ABI type func(const type& a,const type& b) {type res;res.lo=bdst(basic_func(bsrc(a.lo),bsrc(b.lo)));res.hi=bdst(basic_func(bsrc(a.hi),bsrc(b.hi)));return res;} - -#define TERNARY_AVX_OP(type,func,basic_func) AVX2NEON_ABI type func(const type& a,const type& b,const type& c) {type res;res.lo=basic_func(a.lo,b.lo,c.lo);res.hi=basic_func(a.hi,b.hi,c.hi);return res;} - - -#define CAST_SIMD_TYPE(to,name,from,basic_dst) AVX2NEON_ABI to name(const from& a) { to res; res.lo = basic_dst(a.lo); res.hi=basic_dst(a.hi); return res;} - - - -#define _mm_stream_load_si128 _mm_load_si128 -#define _mm256_stream_load_si256 _mm256_load_si256 - - -AVX2NEON_ABI -__m128 _mm_blend_ps (__m128 a, __m128 b, const int imm8) -{ - __m128 res; - for (int i=0;i<4;i++) - { - if (imm8 & (1<<i)) - { - res[i] = b[i]; - } - else{ - res[i] = a[i]; - } - } - - return res; -} - -AVX2NEON_ABI -__m128i _mm_blend_epi32 (__m128i a, __m128i b, const int imm8) -{ - __m128i res; - for (int i=0;i<4;i++) - { - if (imm8 & (1<<i)) - { - res[i] = b[i]; - } - else{ - res[i] = a[i]; - } - } - return res; -} - -AVX2NEON_ABI -__m128 _mm_cmpngt_ps (__m128 a, __m128 b) -{ - return __m128(vmvnq_s32(__m128i(_mm_cmpgt_ps(a,b)))); -} - - -AVX2NEON_ABI -__m128i _mm_loadl_epi64 (__m128i const* mem_addr) -{ - int64x2_t y; - y[0] = *(int64_t *)mem_addr; - y[1] = 0; - return __m128i(y); -} - -AVX2NEON_ABI -int _mm_movemask_popcnt(__m128 a) -{ - return __builtin_popcount(_mm_movemask_ps(a)); -} - -AVX2NEON_ABI -__m128 _mm_maskload_ps (float const * mem_addr, __m128i mask) -{ - __m128 res; - for (int i=0;i<4;i++) { - if (mask[i] & 0x80000000) res[i] = mem_addr[i]; else res[i] = 0; - } - return res; -} - -AVX2NEON_ABI -void _mm_maskstore_ps (float * mem_addr, __m128i mask, __m128 a) -{ - for (int i=0;i<4;i++) { - if (mask[i] & 0x80000000) mem_addr[i] = a[i]; - } -} - -AVX2NEON_ABI -void _mm_maskstore_epi32 (int * mem_addr, __m128i mask, __m128i a) -{ - for (int i=0;i<4;i++) { - if (mask[i] & 0x80000000) mem_addr[i] = a[i]; - } -} - -AVX2NEON_ABI -__m128 _mm_fnmsub_ps (__m128 a, __m128 b, __m128 c) -{ - return vnegq_f32(vfmaq_f32(c,a,b)); -} - -#define _mm_fnmsub_ss _mm_fnmsub_ps - -AVX2NEON_ABI -__m128 _mm_fnmadd_ps (__m128 a, __m128 b, __m128 c) -{ - return vfmsq_f32(c,a,b); -} - -#define _mm_fnmadd_ss _mm_fnmadd_ps - - -AVX2NEON_ABI -__m128 _mm_broadcast_ss (float const * mem_addr) -{ - return vdupq_n_f32(*mem_addr); -} - - -AVX2NEON_ABI -__m128 _mm_fmsub_ps (__m128 a, __m128 b, __m128 c) -{ - return vfmaq_f32(vnegq_f32(c),a,b); -} - -#define _mm_fmsub_ss _mm_fmsub_ps -#define _mm_fmadd_ps _mm_madd_ps -#define _mm_fmadd_ss _mm_madd_ps - - - -template<int code> -AVX2NEON_ABI float32x4_t dpps_neon(const float32x4_t& a,const float32x4_t& b) -{ - float v; - v = 0; - v += (code & 0x10) ? a[0]*b[0] : 0; - v += (code & 0x20) ? a[1]*b[1] : 0; - v += (code & 0x40) ? a[2]*b[2] : 0; - v += (code & 0x80) ? a[3]*b[3] : 0; - float32x4_t res; - res[0] = (code & 0x1) ? v : 0; - res[1] = (code & 0x2) ? v : 0; - res[2] = (code & 0x4) ? v : 0; - res[3] = (code & 0x8) ? v : 0; - return res; -} - -template<> -inline float32x4_t dpps_neon<0x7f>(const float32x4_t& a,const float32x4_t& b) -{ - float v; - float32x4_t m = _mm_mul_ps(a,b); - m[3] = 0; - v = vaddvq_f32(m); - return _mm_set1_ps(v); -} - -template<> -inline float32x4_t dpps_neon<0xff>(const float32x4_t& a,const float32x4_t& b) -{ - float v; - float32x4_t m = _mm_mul_ps(a,b); - v = vaddvq_f32(m); - return _mm_set1_ps(v); -} - -#define _mm_dp_ps(a,b,c) dpps_neon<c>((a),(b)) - - - -AVX2NEON_ABI -__m128 _mm_cmpnge_ps (__m128 a, __m128 b) -{ - return __m128(vmvnq_s32(__m128i(_mm_cmpge_ps(a,b)))); -} - - -AVX2NEON_ABI -__m128 _mm_permutevar_ps (__m128 a, __m128i b) -{ - __m128 x; - for (int i=0;i<4;i++) - { - x[i] = a[b[i&3]]; - } - return x; -} - -AVX2NEON_ABI -__m256i _mm256_setzero_si256() -{ - __m256i res; - res.lo = res.hi = vdupq_n_s32(0); - return res; -} - -AVX2NEON_ABI -__m256 _mm256_setzero_ps() -{ - __m256 res; - res.lo = res.hi = vdupq_n_f32(0.0f); - return res; -} - -AVX2NEON_ABI -__m256i _mm256_undefined_si256() -{ - return _mm256_setzero_si256(); -} - -AVX2NEON_ABI -__m256 _mm256_undefined_ps() -{ - return _mm256_setzero_ps(); -} - -CAST_SIMD_TYPE(__m256d,_mm256_castps_pd,__m256,float64x2_t) -CAST_SIMD_TYPE(__m256i,_mm256_castps_si256,__m256,__m128i) -CAST_SIMD_TYPE(__m256, _mm256_castsi256_ps, __m256i,__m128) -CAST_SIMD_TYPE(__m256, _mm256_castpd_ps ,__m256d,__m128) -CAST_SIMD_TYPE(__m256d, _mm256_castsi256_pd, __m256i,float64x2_t) -CAST_SIMD_TYPE(__m256i, _mm256_castpd_si256, __m256d,__m128i) - - - - -AVX2NEON_ABI -__m128 _mm256_castps256_ps128 (__m256 a) -{ - return a.lo; -} - -AVX2NEON_ABI -__m256i _mm256_castsi128_si256 (__m128i a) -{ - __m256i res; - res.lo = a ; - res.hi = vdupq_n_s32(0); - return res; -} - -AVX2NEON_ABI -__m128i _mm256_castsi256_si128 (__m256i a) -{ - return a.lo; -} - -AVX2NEON_ABI -__m256 _mm256_castps128_ps256 (__m128 a) -{ - __m256 res; - res.lo = a; - res.hi = vdupq_n_f32(0); - return res; -} - - -AVX2NEON_ABI -__m256 _mm256_broadcast_ss (float const * mem_addr) -{ - __m256 res; - res.lo = res.hi = vdupq_n_f32(*mem_addr); - return res; -} - - - -AVX2NEON_ABI -__m256i _mm256_set_epi32 (int e7, int e6, int e5, int e4, int e3, int e2, int e1, int e0) -{ - __m128i lo = {e0,e1,e2,e3}, hi = {e4,e5,e6,e7}; - __m256i res; - res.lo = lo; res.hi = hi; - return res; - -} - -AVX2NEON_ABI -__m256i _mm256_set1_epi32 (int a) -{ - __m256i res; - res.lo = res.hi = vdupq_n_s32(a); - return res; -} - - - - -AVX2NEON_ABI -int _mm256_movemask_ps(const __m256& v) -{ - return (_mm_movemask_ps(v.hi) << 4) | _mm_movemask_ps(v.lo); -} - -template<int imm8> -AVX2NEON_ABI -__m256 __mm256_permute_ps (const __m256& a) -{ - __m256 res; - res.lo = _mm_shuffle_ps(a.lo,a.lo,imm8); - res.hi = _mm_shuffle_ps(a.hi,a.hi,imm8); - return res; - -} - -#define _mm256_permute_ps(a,c) __mm256_permute_ps<c>(a) - - -template<int imm8> -AVX2NEON_ABI -__m256 __mm256_shuffle_ps (const __m256 a,const __m256& b) -{ - __m256 res; - res.lo = _mm_shuffle_ps(a.lo,b.lo,imm8); - res.hi = _mm_shuffle_ps(a.hi,b.hi,imm8); - return res; - -} - -#define _mm256_shuffle_ps(a,b,c) __mm256_shuffle_ps<c>(a,b) - -AVX2NEON_ABI -__m256i _mm256_set1_epi64x (long long a) -{ - __m256i res; - int64x2_t t = vdupq_n_s64(a); - res.lo = res.hi = __m128i(t); - return res; -} - - -AVX2NEON_ABI -__m256 _mm256_permute2f128_ps (__m256 a, __m256 b, int imm8) -{ - __m256 res; - __m128 tmp; - switch (imm8 & 0x7) - { - case 0: tmp = a.lo; break; - case 1: tmp = a.hi; break; - case 2: tmp = b.lo; break; - case 3: tmp = b.hi; break; - } - if (imm8 & 0x8) - tmp = _mm_setzero_ps(); - - - - res.lo = tmp; - imm8 >>= 4; - - switch (imm8 & 0x7) - { - case 0: tmp = a.lo; break; - case 1: tmp = a.hi; break; - case 2: tmp = b.lo; break; - case 3: tmp = b.hi; break; - } - if (imm8 & 0x8) - tmp = _mm_setzero_ps(); - - res.hi = tmp; - - return res; -} - -AVX2NEON_ABI -__m256 _mm256_moveldup_ps (__m256 a) -{ - __m256 res; - res.lo[0] = res.lo[1] = a.lo[0]; - res.lo[2] = res.lo[3] = a.lo[2]; - res.hi[0] = res.hi[1] = a.hi[0]; - res.hi[2] = res.hi[3] = a.hi[2]; - return res; - -} - -AVX2NEON_ABI -__m256 _mm256_movehdup_ps (__m256 a) -{ - __m256 res; - res.lo[0] = res.lo[1] = a.lo[1]; - res.lo[2] = res.lo[3] = a.lo[3]; - res.hi[0] = res.hi[1] = a.hi[1]; - res.hi[2] = res.hi[3] = a.hi[3]; - return res; -} - -AVX2NEON_ABI -__m256 _mm256_insertf128_ps (__m256 a, __m128 b, int imm8) -{ - __m256 res = a; - if (imm8 & 1) res.hi = b; - else res.lo = b; - return res; -} - - -AVX2NEON_ABI -__m128 _mm256_extractf128_ps (__m256 a, const int imm8) -{ - if (imm8 & 1) return a.hi; - return a.lo; -} - - -AVX2NEON_ABI -__m256d _mm256_movedup_pd (__m256d a) -{ - __m256d res; - res.hi = a.hi; - res.lo[0] = res.lo[1] = a.lo[0]; - return res; -} - -AVX2NEON_ABI -__m256i _mm256_abs_epi32(__m256i a) -{ - __m256i res; - res.lo = vabsq_s32(a.lo); - res.hi = vabsq_s32(a.hi); - return res; -} - -UNARY_AVX_OP(__m256,_mm256_sqrt_ps,_mm_sqrt_ps) -UNARY_AVX_OP(__m256,_mm256_rsqrt_ps,_mm_rsqrt_ps) -UNARY_AVX_OP(__m256,_mm256_rcp_ps,_mm_rcp_ps) -UNARY_AVX_OP(__m256,_mm256_floor_ps,vrndmq_f32) -UNARY_AVX_OP(__m256,_mm256_ceil_ps,vrndpq_f32) - - -BINARY_AVX_OP(__m256i,_mm256_add_epi32,_mm_add_epi32) -BINARY_AVX_OP(__m256i,_mm256_sub_epi32,_mm_sub_epi32) -BINARY_AVX_OP(__m256i,_mm256_mullo_epi32,_mm_mullo_epi32) - -BINARY_AVX_OP(__m256i,_mm256_min_epi32,_mm_min_epi32) -BINARY_AVX_OP(__m256i,_mm256_max_epi32,_mm_max_epi32) -BINARY_AVX_OP_CAST(__m256i,_mm256_min_epu32,vminq_u32,__m128i,uint32x4_t) -BINARY_AVX_OP_CAST(__m256i,_mm256_max_epu32,vmaxq_u32,__m128i,uint32x4_t) - -BINARY_AVX_OP(__m256,_mm256_min_ps,_mm_min_ps) -BINARY_AVX_OP(__m256,_mm256_max_ps,_mm_max_ps) - -BINARY_AVX_OP(__m256,_mm256_add_ps,_mm_add_ps) -BINARY_AVX_OP(__m256,_mm256_mul_ps,_mm_mul_ps) -BINARY_AVX_OP(__m256,_mm256_sub_ps,_mm_sub_ps) -BINARY_AVX_OP(__m256,_mm256_div_ps,_mm_div_ps) - -BINARY_AVX_OP(__m256,_mm256_and_ps,_mm_and_ps) -BINARY_AVX_OP(__m256,_mm256_andnot_ps,_mm_andnot_ps) -BINARY_AVX_OP(__m256,_mm256_or_ps,_mm_or_ps) -BINARY_AVX_OP(__m256,_mm256_xor_ps,_mm_xor_ps) - -BINARY_AVX_OP_CAST(__m256d,_mm256_and_pd,vandq_s64,float64x2_t,int64x2_t) -BINARY_AVX_OP_CAST(__m256d,_mm256_or_pd,vorrq_s64,float64x2_t,int64x2_t) -BINARY_AVX_OP_CAST(__m256d,_mm256_xor_pd,veorq_s64,float64x2_t,int64x2_t) - - - -BINARY_AVX_OP(__m256i,_mm256_and_si256,_mm_and_si128) -BINARY_AVX_OP(__m256i,_mm256_or_si256,_mm_or_si128) -BINARY_AVX_OP(__m256i,_mm256_xor_si256,_mm_xor_si128) - - -BINARY_AVX_OP(__m256,_mm256_unpackhi_ps,_mm_unpackhi_ps) -BINARY_AVX_OP(__m256,_mm256_unpacklo_ps,_mm_unpacklo_ps) -TERNARY_AVX_OP(__m256,_mm256_blendv_ps,_mm_blendv_ps) - - -TERNARY_AVX_OP(__m256,_mm256_fmadd_ps,_mm_fmadd_ps) -TERNARY_AVX_OP(__m256,_mm256_fnmadd_ps,_mm_fnmadd_ps) -TERNARY_AVX_OP(__m256,_mm256_fmsub_ps,_mm_fmsub_ps) -TERNARY_AVX_OP(__m256,_mm256_fnmsub_ps,_mm_fnmsub_ps) - - -BINARY_AVX_OP(__m256i,_mm256_unpackhi_epi32,_mm_unpackhi_epi32) -BINARY_AVX_OP(__m256i,_mm256_unpacklo_epi32,_mm_unpacklo_epi32) - - -BINARY_AVX_OP(__m256i,_mm256_cmpeq_epi32,_mm_cmpeq_epi32) -BINARY_AVX_OP(__m256i,_mm256_cmpgt_epi32,_mm_cmpgt_epi32) -BINARY_AVX_OP(__m256,_mm256_cmpeq_ps,_mm_cmpeq_ps) -BINARY_AVX_OP(__m256,_mm256_cmpneq_ps,_mm_cmpneq_ps) -BINARY_AVX_OP(__m256,_mm256_cmpnlt_ps,_mm_cmpnlt_ps) -BINARY_AVX_OP(__m256,_mm256_cmpngt_ps,_mm_cmpngt_ps) -BINARY_AVX_OP(__m256,_mm256_cmpge_ps,_mm_cmpge_ps) -BINARY_AVX_OP(__m256,_mm256_cmpnge_ps,_mm_cmpnge_ps) -BINARY_AVX_OP(__m256,_mm256_cmplt_ps,_mm_cmplt_ps) -BINARY_AVX_OP(__m256,_mm256_cmple_ps,_mm_cmple_ps) -BINARY_AVX_OP(__m256,_mm256_cmpgt_ps,_mm_cmpgt_ps) -BINARY_AVX_OP(__m256,_mm256_cmpnle_ps,_mm_cmpnle_ps) - - -AVX2NEON_ABI -__m256i _mm256_cvtps_epi32 (__m256 a) -{ - __m256i res; - res.lo = _mm_cvtps_epi32(a.lo); - res.hi = _mm_cvtps_epi32(a.hi); - return res; - -} - -AVX2NEON_ABI -__m256i _mm256_cvttps_epi32 (__m256 a) -{ - __m256i res; - res.lo = _mm_cvttps_epi32(a.lo); - res.hi = _mm_cvttps_epi32(a.hi); - return res; - -} - -AVX2NEON_ABI -__m256 _mm256_loadu_ps (float const * mem_addr) -{ - __m256 res; - res.lo = *(__m128 *)(mem_addr + 0); - res.hi = *(__m128 *)(mem_addr + 4); - return res; -} -#define _mm256_load_ps _mm256_loadu_ps - - -AVX2NEON_ABI -int _mm256_testz_ps (const __m256& a, const __m256& b) -{ - __m256 t = a; - if (&a != &b) - t = _mm256_and_ps(a,b); - - __m128i l = vshrq_n_s32(__m128i(t.lo),31); - __m128i h = vshrq_n_s32(__m128i(t.hi),31); - return vaddvq_s32(vaddq_s32(l,h)) == 0; -} - - -AVX2NEON_ABI -__m256i _mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) -{ - __m256i res; - int64x2_t t0 = {e0,e1}; - int64x2_t t1 = {e2,e3}; - res.lo = __m128i(t0); - res.hi = __m128i(t1); - return res; -} - -AVX2NEON_ABI -__m256d _mm256_setzero_pd () -{ - __m256d res; - res.lo = res.hi = vdupq_n_f64(0); - return res; -} - -AVX2NEON_ABI -int _mm256_movemask_pd (__m256d a) -{ - int res = 0; - uint64x2_t x; - x = uint64x2_t(a.lo); - res |= (x[0] >> 63) ? 1 : 0; - res |= (x[0] >> 63) ? 2 : 0; - x = uint64x2_t(a.hi); - res |= (x[0] >> 63) ? 4 : 0; - res |= (x[0] >> 63) ? 8 : 0; - return res; -} - -AVX2NEON_ABI -__m256i _mm256_cmpeq_epi64 (__m256i a, __m256i b) -{ - __m256i res; - res.lo = __m128i(vceqq_s64(int64x2_t(a.lo),int64x2_t(b.lo))); - res.hi = __m128i(vceqq_s64(int64x2_t(a.hi),int64x2_t(b.hi))); - return res; -} - -AVX2NEON_ABI -__m256i _mm256_cmpeq_pd (__m256d a, __m256d b) -{ - __m256i res; - res.lo = __m128i(vceqq_f64(a.lo,b.lo)); - res.hi = __m128i(vceqq_f64(a.hi,b.hi)); - return res; -} - - -AVX2NEON_ABI -int _mm256_testz_pd (const __m256d& a, const __m256d& b) -{ - __m256d t = a; - - if (&a != &b) - t = _mm256_and_pd(a,b); - - return _mm256_movemask_pd(t) == 0; -} - -AVX2NEON_ABI -__m256d _mm256_blendv_pd (__m256d a, __m256d b, __m256d mask) -{ - __m256d res; - uint64x2_t t = uint64x2_t(mask.lo); - res.lo[0] = (t[0] >> 63) ? b.lo[0] : a.lo[0]; - res.lo[1] = (t[1] >> 63) ? b.lo[1] : a.lo[1]; - t = uint64x2_t(mask.hi); - res.hi[0] = (t[0] >> 63) ? b.hi[0] : a.hi[0]; - res.hi[1] = (t[1] >> 63) ? b.hi[1] : a.hi[1]; - return res; -} - -template<int imm8> -__m256 __mm256_dp_ps (__m256 a, __m256 b) -{ - __m256 res; - res.lo = _mm_dp_ps(a.lo,b.lo,imm8); - res.hi = _mm_dp_ps(a.hi,b.hi,imm8); - return res; -} - -#define _mm256_dp_ps(a,b,c) __mm256_dp_ps<c>(a,b) - -AVX2NEON_ABI -double _mm256_permute4x64_pd_select(__m256d a, const int imm8) -{ - switch (imm8 & 3) { - case 0: - return a.lo[0]; - case 1: - return a.lo[1]; - case 2: - return a.hi[0]; - case 3: - return a.hi[1]; - } - __builtin_unreachable(); - return 0; -} - -AVX2NEON_ABI -__m256d _mm256_permute4x64_pd (__m256d a, const int imm8) -{ - __m256d res; - res.lo[0] = _mm256_permute4x64_pd_select(a,imm8 >> 0); - res.lo[1] = _mm256_permute4x64_pd_select(a,imm8 >> 2); - res.hi[0] = _mm256_permute4x64_pd_select(a,imm8 >> 4); - res.hi[1] = _mm256_permute4x64_pd_select(a,imm8 >> 6); - - return res; -} - -AVX2NEON_ABI -__m256i _mm256_insertf128_si256 (__m256i a, __m128i b, int imm8) -{ - return __m256i(_mm256_insertf128_ps((__m256)a,(__m128)b,imm8)); -} - - -AVX2NEON_ABI -__m256i _mm256_loadu_si256 (__m256i const * mem_addr) -{ - __m256i res; - res.lo = *(__m128i *)((int32_t *)mem_addr + 0); - res.hi = *(__m128i *)((int32_t *)mem_addr + 4); - return res; -} - -#define _mm256_load_si256 _mm256_loadu_si256 - -AVX2NEON_ABI -void _mm256_storeu_ps (float * mem_addr, __m256 a) -{ - *(__m128 *)(mem_addr + 0) = a.lo; - *(__m128 *)(mem_addr + 4) = a.hi; - -} - -#define _mm256_store_ps _mm256_storeu_ps -#define _mm256_stream_ps _mm256_storeu_ps - - -AVX2NEON_ABI -void _mm256_storeu_si256 (__m256i * mem_addr, __m256i a) -{ - *(__m128i *)((int *)mem_addr + 0) = a.lo; - *(__m128i *)((int *)mem_addr + 4) = a.hi; - -} - -#define _mm256_store_si256 _mm256_storeu_si256 - - - -AVX2NEON_ABI -__m256 _mm256_maskload_ps (float const * mem_addr, __m256i mask) -{ - __m256 res; - res.lo = _mm_maskload_ps(mem_addr,mask.lo); - res.hi = _mm_maskload_ps(mem_addr + 4,mask.hi); - return res; - -} - - -AVX2NEON_ABI -__m256i _mm256_cvtepu8_epi32 (__m128i a) -{ - __m256i res; - uint8x16_t x = uint8x16_t(a); - for (int i=0;i<4;i++) - { - res.lo[i] = x[i]; - res.hi[i] = x[i+4]; - } - return res; -} - - -AVX2NEON_ABI -__m256i _mm256_cvtepi8_epi32 (__m128i a) -{ - __m256i res; - int8x16_t x = int8x16_t(a); - for (int i=0;i<4;i++) - { - res.lo[i] = x[i]; - res.hi[i] = x[i+4]; - } - return res; -} - - -AVX2NEON_ABI -__m256i _mm256_cvtepu16_epi32 (__m128i a) -{ - __m256i res; - uint16x8_t x = uint16x8_t(a); - for (int i=0;i<4;i++) - { - res.lo[i] = x[i]; - res.hi[i] = x[i+4]; - } - return res; -} - -AVX2NEON_ABI -__m256i _mm256_cvtepi16_epi32 (__m128i a) -{ - __m256i res; - int16x8_t x = int16x8_t(a); - for (int i=0;i<4;i++) - { - res.lo[i] = x[i]; - res.hi[i] = x[i+4]; - } - return res; -} - - - -AVX2NEON_ABI -void _mm256_maskstore_epi32 (int* mem_addr, __m256i mask, __m256i a) -{ - _mm_maskstore_epi32(mem_addr,mask.lo,a.lo); - _mm_maskstore_epi32(mem_addr + 4,mask.hi,a.hi); -} - -AVX2NEON_ABI -__m256i _mm256_slli_epi32 (__m256i a, int imm8) -{ - __m256i res; - res.lo = _mm_slli_epi32(a.lo,imm8); - res.hi = _mm_slli_epi32(a.hi,imm8); - return res; -} - - -AVX2NEON_ABI -__m256i _mm256_srli_epi32 (__m256i a, int imm8) -{ - __m256i res; - res.lo = _mm_srli_epi32(a.lo,imm8); - res.hi = _mm_srli_epi32(a.hi,imm8); - return res; -} - -AVX2NEON_ABI -__m256i _mm256_srai_epi32 (__m256i a, int imm8) -{ - __m256i res; - res.lo = _mm_srai_epi32(a.lo,imm8); - res.hi = _mm_srai_epi32(a.hi,imm8); - return res; -} - - -AVX2NEON_ABI -__m256i _mm256_sllv_epi32 (__m256i a, __m256i count) -{ - __m256i res; - res.lo = vshlq_s32(a.lo,count.lo); - res.hi = vshlq_s32(a.hi,count.hi); - return res; - -} - - -AVX2NEON_ABI -__m256i _mm256_srav_epi32 (__m256i a, __m256i count) -{ - __m256i res; - res.lo = vshlq_s32(a.lo,vnegq_s32(count.lo)); - res.hi = vshlq_s32(a.hi,vnegq_s32(count.hi)); - return res; - -} - -AVX2NEON_ABI -__m256i _mm256_srlv_epi32 (__m256i a, __m256i count) -{ - __m256i res; - res.lo = __m128i(vshlq_u32(uint32x4_t(a.lo),vnegq_s32(count.lo))); - res.hi = __m128i(vshlq_u32(uint32x4_t(a.hi),vnegq_s32(count.hi))); - return res; - -} - - -AVX2NEON_ABI -__m256i _mm256_permute2f128_si256 (__m256i a, __m256i b, int imm8) -{ - return __m256i(_mm256_permute2f128_ps(__m256(a),__m256(b),imm8)); -} - - -AVX2NEON_ABI -__m128i _mm256_extractf128_si256 (__m256i a, const int imm8) -{ - if (imm8 & 1) return a.hi; - return a.lo; -} - -AVX2NEON_ABI -__m256 _mm256_set1_ps(float x) -{ - __m256 res; - res.lo = res.hi = vdupq_n_f32(x); - return res; -} - -AVX2NEON_ABI -__m256 _mm256_set_ps (float e7, float e6, float e5, float e4, float e3, float e2, float e1, float e0) -{ - __m256 res; - res.lo = _mm_set_ps(e3,e2,e1,e0); - res.hi = _mm_set_ps(e7,e6,e5,e4); - return res; -} - -AVX2NEON_ABI -__m256 _mm256_broadcast_ps (__m128 const * mem_addr) -{ - __m256 res; - res.lo = res.hi = *mem_addr; - return res; -} - -AVX2NEON_ABI -__m256 _mm256_cvtepi32_ps (__m256i a) -{ - __m256 res; - res.lo = _mm_cvtepi32_ps(a.lo); - res.hi = _mm_cvtepi32_ps(a.hi); - return res; -} -AVX2NEON_ABI -void _mm256_maskstore_ps (float * mem_addr, __m256i mask, __m256 a) -{ - for (int i=0;i<4;i++) { - if (mask.lo[i] & 0x80000000) mem_addr[i] = a.lo[i]; - if (mask.hi[i] & 0x80000000) mem_addr[i+4] = a.hi[i]; - } -} - -AVX2NEON_ABI -__m256d _mm256_andnot_pd (__m256d a, __m256d b) -{ - __m256d res; - res.lo = float64x2_t(_mm_andnot_ps(__m128(a.lo),__m128(b.lo))); - res.hi = float64x2_t(_mm_andnot_ps(__m128(a.hi),__m128(b.hi))); - return res; -} - -AVX2NEON_ABI -__m256 _mm256_blend_ps (__m256 a, __m256 b, const int imm8) -{ - __m256 res; - res.lo = _mm_blend_ps(a.lo,b.lo,imm8 & 0xf); - res.hi = _mm_blend_ps(a.hi,b.hi,imm8 >> 4); - return res; - -} - - -AVX2NEON_ABI -__m256i _mm256_blend_epi32 (__m256i a, __m256i b, const int imm8) -{ - __m256i res; - res.lo = _mm_blend_epi32(a.lo,b.lo,imm8 & 0xf); - res.hi = _mm_blend_epi32(a.hi,b.hi,imm8 >> 4); - return res; - -} - -AVX2NEON_ABI -__m256i _mm256_i32gather_epi32 (int const* base_addr, __m256i vindex, const int scale) -{ - __m256i res; - for (int i=0;i<4;i++) - { - res.lo[i] = *(int *)((char *) base_addr + (vindex.lo[i]*scale)); - res.hi[i] = *(int *)((char *) base_addr + (vindex.hi[i]*scale)); - } - return res; -} - - -AVX2NEON_ABI -__m256i _mm256_mask_i32gather_epi32 (__m256i src, int const* base_addr, __m256i vindex, __m256i mask, const int scale) -{ - __m256i res = _mm256_setzero_si256(); - for (int i=0;i<4;i++) - { - if (mask.lo[i] >> 31) res.lo[i] = *(int *)((char *) base_addr + (vindex.lo[i]*scale)); - if (mask.hi[i] >> 31) res.hi[i] = *(int *)((char *) base_addr + (vindex.hi[i]*scale)); - } - - return res; - -} - - diff --git a/thirdparty/embree-aarch64/common/math/SSE2NEON.h b/thirdparty/embree-aarch64/common/math/SSE2NEON.h deleted file mode 100644 index 2013151d31..0000000000 --- a/thirdparty/embree-aarch64/common/math/SSE2NEON.h +++ /dev/null @@ -1,1753 +0,0 @@ -#ifndef SSE2NEON_H -#define SSE2NEON_H - -// This header file provides a simple API translation layer -// between SSE intrinsics to their corresponding ARM NEON versions -// -// This header file does not (yet) translate *all* of the SSE intrinsics. -// Since this is in support of a specific porting effort, I have only -// included the intrinsics I needed to get my port to work. -// -// Questions/Comments/Feedback send to: jratcliffscarab@gmail.com -// -// If you want to improve or add to this project, send me an -// email and I will probably approve your access to the depot. -// -// Project is located here: -// -// https://github.com/jratcliff63367/sse2neon -// -// Show your appreciation for open source by sending me a bitcoin tip to the following -// address. -// -// TipJar: 1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p : -// https://blockchain.info/address/1PzgWDSyq4pmdAXRH8SPUtta4SWGrt4B1p -// -// -// Contributors to this project are: -// -// John W. Ratcliff : jratcliffscarab@gmail.com -// Brandon Rowlett : browlett@nvidia.com -// Ken Fast : kfast@gdeb.com -// Eric van Beurden : evanbeurden@nvidia.com -// -// -// ********************************************************************************************************************* -// Release notes for January 20, 2017 version: -// -// The unit tests have been refactored. They no longer assert on an error, instead they return a pass/fail condition -// The unit-tests now test 10,000 random float and int values against each intrinsic. -// -// SSE2NEON now supports 95 SSE intrinsics. 39 of them have formal unit tests which have been implemented and -// fully tested on NEON/ARM. The remaining 56 still need unit tests implemented. -// -// A struct is now defined in this header file called 'SIMDVec' which can be used by applications which -// attempt to access the contents of an _m128 struct directly. It is important to note that accessing the __m128 -// struct directly is bad coding practice by Microsoft: @see: https://msdn.microsoft.com/en-us/library/ayeb3ayc.aspx -// -// However, some legacy source code may try to access the contents of an __m128 struct directly so the developer -// can use the SIMDVec as an alias for it. Any casting must be done manually by the developer, as you cannot -// cast or otherwise alias the base NEON data type for intrinsic operations. -// -// A bug was found with the _mm_shuffle_ps intrinsic. If the shuffle permutation was not one of the ones with -// a custom/unique implementation causing it to fall through to the default shuffle implementation it was failing -// to return the correct value. This is now fixed. -// -// A bug was found with the _mm_cvtps_epi32 intrinsic. This converts floating point values to integers. -// It was not honoring the correct rounding mode. In SSE the default rounding mode when converting from float to int -// is to use 'round to even' otherwise known as 'bankers rounding'. ARMv7 did not support this feature but ARMv8 does. -// As it stands today, this header file assumes ARMv8. If you are trying to target really old ARM devices, you may get -// a build error. -// -// Support for a number of new intrinsics was added, however, none of them yet have unit-tests to 100% confirm they are -// producing the correct results on NEON. These unit tests will be added as soon as possible. -// -// Here is the list of new instrinsics which have been added: -// -// _mm_cvtss_f32 : extracts the lower order floating point value from the parameter -// _mm_add_ss : adds the scalar single - precision floating point values of a and b -// _mm_div_ps : Divides the four single - precision, floating - point values of a and b. -// _mm_div_ss : Divides the scalar single - precision floating point value of a by b. -// _mm_sqrt_ss : Computes the approximation of the square root of the scalar single - precision floating point value of in. -// _mm_rsqrt_ps : Computes the approximations of the reciprocal square roots of the four single - precision floating point values of in. -// _mm_comilt_ss : Compares the lower single - precision floating point scalar values of a and b using a less than operation -// _mm_comigt_ss : Compares the lower single - precision floating point scalar values of a and b using a greater than operation. -// _mm_comile_ss : Compares the lower single - precision floating point scalar values of a and b using a less than or equal operation. -// _mm_comige_ss : Compares the lower single - precision floating point scalar values of a and b using a greater than or equal operation. -// _mm_comieq_ss : Compares the lower single - precision floating point scalar values of a and b using an equality operation. -// _mm_comineq_s : Compares the lower single - precision floating point scalar values of a and b using an inequality operation -// _mm_unpackhi_epi8 : Interleaves the upper 8 signed or unsigned 8 - bit integers in a with the upper 8 signed or unsigned 8 - bit integers in b. -// _mm_unpackhi_epi16: Interleaves the upper 4 signed or unsigned 16 - bit integers in a with the upper 4 signed or unsigned 16 - bit integers in b. -// -// ********************************************************************************************************************* -/* -** The MIT license: -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and associated documentation files (the "Software"), to deal -** in the Software without restriction, including without limitation the rights -** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -** copies of the Software, and to permit persons to whom the Software is furnished -** to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in all -** copies or substantial portions of the Software. - -** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -** WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -** CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -#pragma once - -#define GCC 1 -#define ENABLE_CPP_VERSION 0 - -// enable precise emulation of _mm_min_ps and _mm_max_ps? -// This would slow down the computation a bit, but gives consistent result with x86 SSE2. -// (e.g. would solve a hole or NaN pixel in the rendering result) -#define USE_PRECISE_MINMAX_IMPLEMENTATION (1) - -#if GCC -#define FORCE_INLINE inline __attribute__((always_inline)) -#define ALIGN_STRUCT(x) __attribute__((aligned(x))) -#else -#define FORCE_INLINE inline -#define ALIGN_STRUCT(x) __declspec(align(x)) -#endif - -#include <stdint.h> -#include "arm_neon.h" -#if defined(__aarch64__) -#include "constants.h" -#endif - - -#if !defined(__has_builtin) -#define __has_builtin(x) (0) -#endif - -/*******************************************************/ -/* MACRO for shuffle parameter for _mm_shuffle_ps(). */ -/* Argument fp3 is a digit[0123] that represents the fp*/ -/* from argument "b" of mm_shuffle_ps that will be */ -/* placed in fp3 of result. fp2 is the same for fp2 in */ -/* result. fp1 is a digit[0123] that represents the fp */ -/* from argument "a" of mm_shuffle_ps that will be */ -/* places in fp1 of result. fp0 is the same for fp0 of */ -/* result */ -/*******************************************************/ -#if defined(__aarch64__) -#define _MN_SHUFFLE(fp3,fp2,fp1,fp0) ( (uint8x16_t){ (((fp3)*4)+0), (((fp3)*4)+1), (((fp3)*4)+2), (((fp3)*4)+3), (((fp2)*4)+0), (((fp2)*4)+1), (((fp2)*4)+2), (((fp2)*4)+3), (((fp1)*4)+0), (((fp1)*4)+1), (((fp1)*4)+2), (((fp1)*4)+3), (((fp0)*4)+0), (((fp0)*4)+1), (((fp0)*4)+2), (((fp0)*4)+3) } ) -#define _MF_SHUFFLE(fp3,fp2,fp1,fp0) ( (uint8x16_t){ (((fp3)*4)+0), (((fp3)*4)+1), (((fp3)*4)+2), (((fp3)*4)+3), (((fp2)*4)+0), (((fp2)*4)+1), (((fp2)*4)+2), (((fp2)*4)+3), (((fp1)*4)+16+0), (((fp1)*4)+16+1), (((fp1)*4)+16+2), (((fp1)*4)+16+3), (((fp0)*4)+16+0), (((fp0)*4)+16+1), (((fp0)*4)+16+2), (((fp0)*4)+16+3) } ) -#endif - -#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \ - ((fp1) << 2) | ((fp0))) - -typedef float32x4_t __m128; -typedef int32x4_t __m128i; - -// union intended to allow direct access to an __m128 variable using the names that the MSVC -// compiler provides. This union should really only be used when trying to access the members -// of the vector as integer values. GCC/clang allow native access to the float members through -// a simple array access operator (in C since 4.6, in C++ since 4.8). -// -// Ideally direct accesses to SIMD vectors should not be used since it can cause a performance -// hit. If it really is needed however, the original __m128 variable can be aliased with a -// pointer to this union and used to access individual components. The use of this union should -// be hidden behind a macro that is used throughout the codebase to access the members instead -// of always declaring this type of variable. -typedef union ALIGN_STRUCT(16) SIMDVec -{ - float m128_f32[4]; // as floats - do not to use this. Added for convenience. - int8_t m128_i8[16]; // as signed 8-bit integers. - int16_t m128_i16[8]; // as signed 16-bit integers. - int32_t m128_i32[4]; // as signed 32-bit integers. - int64_t m128_i64[2]; // as signed 64-bit integers. - uint8_t m128_u8[16]; // as unsigned 8-bit integers. - uint16_t m128_u16[8]; // as unsigned 16-bit integers. - uint32_t m128_u32[4]; // as unsigned 32-bit integers. - uint64_t m128_u64[2]; // as unsigned 64-bit integers. - double m128_f64[2]; // as signed double -} SIMDVec; - -// ****************************************** -// CPU stuff -// ****************************************** - -typedef SIMDVec __m128d; - -#include <stdlib.h> - -#ifndef _MM_MASK_MASK -#define _MM_MASK_MASK 0x1f80 -#define _MM_MASK_DIV_ZERO 0x200 -#define _MM_FLUSH_ZERO_ON 0x8000 -#define _MM_DENORMALS_ZERO_ON 0x40 -#define _MM_MASK_DENORM 0x100 -#endif -#define _MM_SET_EXCEPTION_MASK(x) -#define _MM_SET_FLUSH_ZERO_MODE(x) -#define _MM_SET_DENORMALS_ZERO_MODE(x) - -FORCE_INLINE void _mm_pause() -{ -} - -FORCE_INLINE void _mm_mfence() -{ - __sync_synchronize(); -} - -#define _MM_HINT_T0 3 -#define _MM_HINT_T1 2 -#define _MM_HINT_T2 1 -#define _MM_HINT_NTA 0 - -FORCE_INLINE void _mm_prefetch(const void* ptr, unsigned int level) -{ - __builtin_prefetch(ptr); - -} - -FORCE_INLINE void* _mm_malloc(int size, int align) -{ - void *ptr; - // align must be multiple of sizeof(void *) for posix_memalign. - if (align < sizeof(void *)) { - align = sizeof(void *); - } - - if ((align % sizeof(void *)) != 0) { - // fallback to malloc - ptr = malloc(size); - } else { - if (posix_memalign(&ptr, align, size)) { - return 0; - } - } - - return ptr; -} - -FORCE_INLINE void _mm_free(void* ptr) -{ - free(ptr); -} - -FORCE_INLINE int _mm_getcsr() -{ - return 0; -} - -FORCE_INLINE void _mm_setcsr(int val) -{ - return; -} - -// ****************************************** -// Set/get methods -// ****************************************** - -// extracts the lower order floating point value from the parameter : https://msdn.microsoft.com/en-us/library/bb514059%28v=vs.120%29.aspx?f=255&MSPPError=-2147217396 -#if defined(__aarch64__) -FORCE_INLINE float _mm_cvtss_f32(const __m128& x) -{ - return x[0]; -} -#else -FORCE_INLINE float _mm_cvtss_f32(__m128 a) -{ - return vgetq_lane_f32(a, 0); -} -#endif - -// Sets the 128-bit value to zero https://msdn.microsoft.com/en-us/library/vstudio/ys7dw0kh(v=vs.100).aspx -FORCE_INLINE __m128i _mm_setzero_si128() -{ - return vdupq_n_s32(0); -} - -// Clears the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/tk1t2tbz(v=vs.100).aspx -FORCE_INLINE __m128 _mm_setzero_ps(void) -{ - return vdupq_n_f32(0); -} - -// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set1_ps(float _w) -{ - return vdupq_n_f32(_w); -} - -// Sets the four single-precision, floating-point values to w. https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx -FORCE_INLINE __m128 _mm_set_ps1(float _w) -{ - return vdupq_n_f32(_w); -} - -// Sets the four single-precision, floating-point values to the four inputs. https://msdn.microsoft.com/en-us/library/vstudio/afh0zf75(v=vs.100).aspx -#if defined(__aarch64__) -FORCE_INLINE __m128 _mm_set_ps(const float w, const float z, const float y, const float x) -{ - float32x4_t t = { x, y, z, w }; - return t; -} - -// Sets the four single-precision, floating-point values to the four inputs in reverse order. https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx -FORCE_INLINE __m128 _mm_setr_ps(const float w, const float z , const float y , const float x ) -{ - float32x4_t t = { w, z, y, x }; - return t; -} -#else -FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x) -{ - float __attribute__((aligned(16))) data[4] = { x, y, z, w }; - return vld1q_f32(data); -} - -// Sets the four single-precision, floating-point values to the four inputs in reverse order. https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx -FORCE_INLINE __m128 _mm_setr_ps(float w, float z , float y , float x ) -{ - float __attribute__ ((aligned (16))) data[4] = { w, z, y, x }; - return vld1q_f32(data); -} -#endif - -// Sets the 4 signed 32-bit integer values to i. https://msdn.microsoft.com/en-us/library/vstudio/h4xscxat(v=vs.100).aspx -FORCE_INLINE __m128i _mm_set1_epi32(int _i) -{ - return vdupq_n_s32(_i); -} - -//Set the first lane to of 4 signed single-position, floating-point number to w -#if defined(__aarch64__) -FORCE_INLINE __m128 _mm_set_ss(float _w) -{ - float32x4_t res = {_w, 0, 0, 0}; - return res; -} - -// Sets the 4 signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx -FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0) -{ - int32x4_t t = {i0,i1,i2,i3}; - return t; -} -#else -FORCE_INLINE __m128 _mm_set_ss(float _w) -{ - __m128 val = _mm_setzero_ps(); - return vsetq_lane_f32(_w, val, 0); -} - -// Sets the 4 signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx -FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0) -{ - int32_t __attribute__((aligned(16))) data[4] = { i0, i1, i2, i3 }; - return vld1q_s32(data); -} -#endif - -// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx -FORCE_INLINE void _mm_store_ps(float *p, __m128 a) -{ - vst1q_f32(p, a); -} - -// Stores four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx -FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a) -{ - vst1q_f32(p, a); -} - -FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a) -{ - vst1q_s32((int32_t*) p,a); -} - -// Stores four 32-bit integer values as (as a __m128i value) at the address p. https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx -FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a ) -{ - vst1q_s32((int32_t*) p,a); -} - -// Stores the lower single - precision, floating - point value. https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx -FORCE_INLINE void _mm_store_ss(float *p, __m128 a) -{ - vst1q_lane_f32(p, a, 0); -} - -// Reads the lower 64 bits of b and stores them into the lower 64 bits of a. https://msdn.microsoft.com/en-us/library/hhwf428f%28v=vs.90%29.aspx -FORCE_INLINE void _mm_storel_epi64(__m128i* a, __m128i b) -{ - *a = (__m128i)vsetq_lane_s64((int64_t)vget_low_s32(b), *(int64x2_t*)a, 0); -} - -// Loads a single single-precision, floating-point value, copying it into all four words https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx -FORCE_INLINE __m128 _mm_load1_ps(const float * p) -{ - return vld1q_dup_f32(p); -} - -// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx -FORCE_INLINE __m128 _mm_load_ps(const float * p) -{ - return vld1q_f32(p); -} - -// Loads four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_loadu_ps(const float * p) -{ - // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are equivalent for neon - return vld1q_f32(p); -} - -// Loads an single - precision, floating - point value into the low word and clears the upper three words. https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_load_ss(const float * p) -{ - __m128 result = vdupq_n_f32(0); - return vsetq_lane_f32(*p, result, 0); -} - -FORCE_INLINE __m128i _mm_loadu_si128(__m128i *p) -{ - return (__m128i)vld1q_s32((const int32_t*) p); -} - - -// ****************************************** -// Logic/Binary operations -// ****************************************** - -// Compares for inequality. https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b) -{ - return (__m128)vmvnq_s32((__m128i)vceqq_f32(a, b)); -} - -// Computes the bitwise AND-NOT of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx -FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b) -{ - return (__m128)vbicq_s32((__m128i)b, (__m128i)a); // *NOTE* argument swap -} - -// Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the 128-bit value in a. https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx -FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b) -{ - return (__m128i)vbicq_s32(b, a); // *NOTE* argument swap -} - -// Computes the bitwise AND of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx -FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b) -{ - return (__m128i)vandq_s32(a, b); -} - -// Computes the bitwise AND of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx -FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b) -{ - return (__m128)vandq_s32((__m128i)a, (__m128i)b); -} - -// Computes the bitwise OR of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx -FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) -{ - return (__m128)vorrq_s32((__m128i)a, (__m128i)b); -} - -// Computes bitwise EXOR (exclusive-or) of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx -FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b) -{ - return (__m128)veorq_s32((__m128i)a, (__m128i)b); -} - -// Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx -FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b) -{ - return (__m128i)vorrq_s32(a, b); -} - -// Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in b. https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx -FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b) -{ - return veorq_s32(a, b); -} - -// NEON does not provide this method -// Creates a 4-bit mask from the most significant bits of the four single-precision, floating-point values. https://msdn.microsoft.com/en-us/library/vstudio/4490ys29(v=vs.100).aspx -FORCE_INLINE int _mm_movemask_ps(__m128 a) -{ -#if ENABLE_CPP_VERSION // I am not yet convinced that the NEON version is faster than the C version of this - uint32x4_t &ia = *(uint32x4_t *)&a; - return (ia[0] >> 31) | ((ia[1] >> 30) & 2) | ((ia[2] >> 29) & 4) | ((ia[3] >> 28) & 8); -#else - -#if defined(__aarch64__) - uint32x4_t t2 = vandq_u32(vreinterpretq_u32_f32(a), embree::movemask_mask); - return vaddvq_u32(t2); -#else - static const uint32x4_t movemask = { 1, 2, 4, 8 }; - static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; - uint32x4_t t0 = vreinterpretq_u32_f32(a); - uint32x4_t t1 = vtstq_u32(t0, highbit); - uint32x4_t t2 = vandq_u32(t1, movemask); - uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2)); - return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1); -#endif - -#endif -} - -#if defined(__aarch64__) -FORCE_INLINE int _mm_movemask_popcnt_ps(__m128 a) -{ - uint32x4_t t2 = vandq_u32(vreinterpretq_u32_f32(a), embree::movemask_mask); - t2 = vreinterpretq_u32_u8(vcntq_u8(vreinterpretq_u8_u32(t2))); - return vaddvq_u32(t2); - -} -#endif - -// Takes the upper 64 bits of a and places it in the low end of the result -// Takes the lower 64 bits of b and places it into the high end of the result. -FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b) -{ - return vcombine_f32(vget_high_f32(a), vget_low_f32(b)); -} - -// takes the lower two 32-bit values from a and swaps them and places in high end of result -// takes the higher two 32 bit values from b and swaps them and places in low end of result. -FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b) -{ - return vcombine_f32(vrev64_f32(vget_low_f32(a)), vrev64_f32(vget_high_f32(b))); -} - -// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the high -FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b) -{ - return vcombine_f32(vget_low_f32(a), vget_high_f32(b)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b) -{ - return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 1)), vdup_n_f32(vgetq_lane_f32(b, 0))); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b) -{ - return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 2)), vdup_n_f32(vgetq_lane_f32(b, 0))); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b) -{ - return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 0)), vdup_n_f32(vgetq_lane_f32(b, 2))); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b) -{ - float32_t a0 = vgetq_lane_f32(a, 0); - float32_t a2 = vgetq_lane_f32(a, 2); - float32x2_t aVal = vdup_n_f32(a2); - aVal = vset_lane_f32(a0, aVal, 1); - return vcombine_f32(aVal, vget_high_f32(b)); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b) -{ - return vcombine_f32(vdup_n_f32(vgetq_lane_f32(a, 3)), vdup_n_f32(vgetq_lane_f32(b, 1))); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b) -{ - float32_t b0 = vgetq_lane_f32(b, 0); - float32_t b2 = vgetq_lane_f32(b, 2); - float32x2_t bVal = vdup_n_f32(b0); - bVal = vset_lane_f32(b2, bVal, 1); - return vcombine_f32(vget_low_f32(a), bVal); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b) -{ - float32_t b0 = vgetq_lane_f32(b, 0); - float32_t b2 = vgetq_lane_f32(b, 2); - float32x2_t bVal = vdup_n_f32(b0); - bVal = vset_lane_f32(b2, bVal, 1); - return vcombine_f32(vrev64_f32(vget_low_f32(a)), bVal); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b) -{ - float32_t b0 = vgetq_lane_f32(b, 0); - float32_t b2 = vgetq_lane_f32(b, 2); - float32x2_t bVal = vdup_n_f32(b0); - bVal = vset_lane_f32(b2, bVal, 1); - return vcombine_f32(vget_high_f32(a), bVal); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0321(__m128 a, __m128 b) -{ - float32x2_t a21 = vget_high_f32(vextq_f32(a, a, 3)); - float32x2_t b03 = vget_low_f32(vextq_f32(b, b, 3)); - return vcombine_f32(a21, b03); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_2103(__m128 a, __m128 b) -{ - float32x2_t a03 = vget_low_f32(vextq_f32(a, a, 3)); - float32x2_t b21 = vget_high_f32(vextq_f32(b, b, 3)); - return vcombine_f32(a03, b21); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_1010(__m128 a, __m128 b) -{ - float32x2_t a10 = vget_low_f32(a); - float32x2_t b10 = vget_low_f32(b); - return vcombine_f32(a10, b10); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_1001(__m128 a, __m128 b) -{ - float32x2_t a01 = vrev64_f32(vget_low_f32(a)); - float32x2_t b10 = vget_low_f32(b); - return vcombine_f32(a01, b10); -} - -FORCE_INLINE __m128 _mm_shuffle_ps_0101(__m128 a, __m128 b) -{ - float32x2_t a01 = vrev64_f32(vget_low_f32(a)); - float32x2_t b01 = vrev64_f32(vget_low_f32(b)); - return vcombine_f32(a01, b01); -} - -// NEON does not support a general purpose permute intrinsic -// Currently I am not sure whether the C implementation is faster or slower than the NEON version. -// Note, this has to be expanded as a template because the shuffle value must be an immediate value. -// The same is true on SSE as well. -// Selects four specific single-precision, floating-point values from a and b, based on the mask i. https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx -template <int i> -FORCE_INLINE __m128 _mm_shuffle_ps_default(const __m128& a, const __m128& b) -{ -#if ENABLE_CPP_VERSION // I am not convinced that the NEON version is faster than the C version yet. - __m128 ret; - ret[0] = a[i & 0x3]; - ret[1] = a[(i >> 2) & 0x3]; - ret[2] = b[(i >> 4) & 0x03]; - ret[3] = b[(i >> 6) & 0x03]; - return ret; -#else -# if __has_builtin(__builtin_shufflevector) - return __builtin_shufflevector( \ - a, b, (i) & (0x3), ((i) >> 2) & 0x3, - (((i) >> 4) & 0x3) + 4, (((i) >> 6) & 0x3) + 4); -# else - const int i0 = (i >> 0)&0x3; - const int i1 = (i >> 2)&0x3; - const int i2 = (i >> 4)&0x3; - const int i3 = (i >> 6)&0x3; - - if (&a == &b) - { - if (i0 == i1 && i0 == i2 && i0 == i3) - { - return (float32x4_t)vdupq_laneq_f32(a,i0); - } - static const uint8_t tbl[16] = { - (i0*4) + 0,(i0*4) + 1,(i0*4) + 2,(i0*4) + 3, - (i1*4) + 0,(i1*4) + 1,(i1*4) + 2,(i1*4) + 3, - (i2*4) + 0,(i2*4) + 1,(i2*4) + 2,(i2*4) + 3, - (i3*4) + 0,(i3*4) + 1,(i3*4) + 2,(i3*4) + 3 - }; - - return (float32x4_t)vqtbl1q_s8(int8x16_t(b),*(uint8x16_t *)tbl); - - } - else - { - - static const uint8_t tbl[16] = { - (i0*4) + 0,(i0*4) + 1,(i0*4) + 2,(i0*4) + 3, - (i1*4) + 0,(i1*4) + 1,(i1*4) + 2,(i1*4) + 3, - (i2*4) + 0 + 16,(i2*4) + 1 + 16,(i2*4) + 2 + 16,(i2*4) + 3 + 16, - (i3*4) + 0 + 16,(i3*4) + 1 + 16,(i3*4) + 2 + 16,(i3*4) + 3 + 16 - }; - - return float32x4_t(vqtbl2q_s8((int8x16x2_t){int8x16_t(a),int8x16_t(b)},*(uint8x16_t *)tbl)); - } -# endif //builtin(shufflevector) -#endif -} - -template <int i > -FORCE_INLINE __m128 _mm_shuffle_ps_function(const __m128& a, const __m128& b) -{ - switch (i) - { - case _MM_SHUFFLE(1, 0, 3, 2): - return _mm_shuffle_ps_1032(a, b); - break; - case _MM_SHUFFLE(2, 3, 0, 1): - return _mm_shuffle_ps_2301(a, b); - break; - case _MM_SHUFFLE(3, 2, 1, 0): - return _mm_shuffle_ps_3210(a, b); - break; - case _MM_SHUFFLE(0, 0, 1, 1): - return _mm_shuffle_ps_0011(a, b); - break; - case _MM_SHUFFLE(0, 0, 2, 2): - return _mm_shuffle_ps_0022(a, b); - break; - case _MM_SHUFFLE(2, 2, 0, 0): - return _mm_shuffle_ps_2200(a, b); - break; - case _MM_SHUFFLE(3, 2, 0, 2): - return _mm_shuffle_ps_3202(a, b); - break; - case _MM_SHUFFLE(1, 1, 3, 3): - return _mm_shuffle_ps_1133(a, b); - break; - case _MM_SHUFFLE(2, 0, 1, 0): - return _mm_shuffle_ps_2010(a, b); - break; - case _MM_SHUFFLE(2, 0, 0, 1): - return _mm_shuffle_ps_2001(a, b); - break; - case _MM_SHUFFLE(2, 0, 3, 2): - return _mm_shuffle_ps_2032(a, b); - break; - case _MM_SHUFFLE(0, 3, 2, 1): - return _mm_shuffle_ps_0321(a, b); - break; - case _MM_SHUFFLE(2, 1, 0, 3): - return _mm_shuffle_ps_2103(a, b); - break; - case _MM_SHUFFLE(1, 0, 1, 0): - return _mm_shuffle_ps_1010(a, b); - break; - case _MM_SHUFFLE(1, 0, 0, 1): - return _mm_shuffle_ps_1001(a, b); - break; - case _MM_SHUFFLE(0, 1, 0, 1): - return _mm_shuffle_ps_0101(a, b); - break; - } - return _mm_shuffle_ps_default<i>(a, b); -} - -# if __has_builtin(__builtin_shufflevector) -#define _mm_shuffle_ps(a,b,i) _mm_shuffle_ps_default<i>(a,b) -# else -#define _mm_shuffle_ps(a,b,i) _mm_shuffle_ps_function<i>(a,b) -#endif - -// Takes the upper 64 bits of a and places it in the low end of the result -// Takes the lower 64 bits of b and places it into the high end of the result. -FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a, __m128i b) -{ - return vcombine_s32(vget_high_s32(a), vget_low_s32(b)); -} - -// takes the lower two 32-bit values from a and swaps them and places in low end of result -// takes the higher two 32 bit values from b and swaps them and places in high end of result. -FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a, __m128i b) -{ - return vcombine_s32(vrev64_s32(vget_low_s32(a)), vrev64_s32(vget_high_s32(b))); -} - -// shift a right by 32 bits, and put the lower 32 bits of a into the upper 32 bits of b -// when a and b are the same, rotates the least significant 32 bits into the most signficant 32 bits, and shifts the rest down -FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a, __m128i b) -{ - return vextq_s32(a, b, 1); -} - -// shift a left by 32 bits, and put the upper 32 bits of b into the lower 32 bits of a -// when a and b are the same, rotates the most significant 32 bits into the least signficant 32 bits, and shifts the rest up -FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a, __m128i b) -{ - return vextq_s32(a, b, 3); -} - -// gets the lower 64 bits of a, and places it in the upper 64 bits -// gets the lower 64 bits of b and places it in the lower 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a, __m128i b) -{ - return vcombine_s32(vget_low_s32(a), vget_low_s32(a)); -} - -// gets the lower 64 bits of a, and places it in the upper 64 bits -// gets the lower 64 bits of b, swaps the 0 and 1 elements, and places it in the lower 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a, __m128i b) -{ - return vcombine_s32(vrev64_s32(vget_low_s32(a)), vget_low_s32(b)); -} - -// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the upper 64 bits -// gets the lower 64 bits of b, swaps the 0 and 1 elements, and places it in the lower 64 bits -FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a, __m128i b) -{ - return vcombine_s32(vrev64_s32(vget_low_s32(a)), vrev64_s32(vget_low_s32(b))); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a, __m128i b) -{ - return vcombine_s32(vdup_n_s32(vgetq_lane_s32(a, 1)), vdup_n_s32(vgetq_lane_s32(b, 2))); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a, __m128i b) -{ - return vcombine_s32(vdup_n_s32(vgetq_lane_s32(a, 2)), vrev64_s32(vget_low_s32(b))); -} - -FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a, __m128i b) -{ - return vcombine_s32(vget_high_s32(a), vdup_n_s32(vgetq_lane_s32(b, 3))); -} - -template <int i > -FORCE_INLINE __m128i _mm_shuffle_epi32_default(__m128i a, __m128i b) -{ -#if ENABLE_CPP_VERSION - __m128i ret; - ret[0] = a[i & 0x3]; - ret[1] = a[(i >> 2) & 0x3]; - ret[2] = b[(i >> 4) & 0x03]; - ret[3] = b[(i >> 6) & 0x03]; - return ret; -#else - __m128i ret = vmovq_n_s32(vgetq_lane_s32(a, i & 0x3)); - ret = vsetq_lane_s32(vgetq_lane_s32(a, (i >> 2) & 0x3), ret, 1); - ret = vsetq_lane_s32(vgetq_lane_s32(b, (i >> 4) & 0x3), ret, 2); - ret = vsetq_lane_s32(vgetq_lane_s32(b, (i >> 6) & 0x3), ret, 3); - return ret; -#endif -} - -template <int i > -FORCE_INLINE __m128i _mm_shuffle_epi32_function(__m128i a, __m128i b) -{ - switch (i) - { - case _MM_SHUFFLE(1, 0, 3, 2): return _mm_shuffle_epi_1032(a, b); break; - case _MM_SHUFFLE(2, 3, 0, 1): return _mm_shuffle_epi_2301(a, b); break; - case _MM_SHUFFLE(0, 3, 2, 1): return _mm_shuffle_epi_0321(a, b); break; - case _MM_SHUFFLE(2, 1, 0, 3): return _mm_shuffle_epi_2103(a, b); break; - case _MM_SHUFFLE(1, 0, 1, 0): return _mm_shuffle_epi_1010(a, b); break; - case _MM_SHUFFLE(1, 0, 0, 1): return _mm_shuffle_epi_1001(a, b); break; - case _MM_SHUFFLE(0, 1, 0, 1): return _mm_shuffle_epi_0101(a, b); break; - case _MM_SHUFFLE(2, 2, 1, 1): return _mm_shuffle_epi_2211(a, b); break; - case _MM_SHUFFLE(0, 1, 2, 2): return _mm_shuffle_epi_0122(a, b); break; - case _MM_SHUFFLE(3, 3, 3, 2): return _mm_shuffle_epi_3332(a, b); break; - default: return _mm_shuffle_epi32_default<i>(a, b); - } -} - -template <int i > -FORCE_INLINE __m128i _mm_shuffle_epi32_splat(__m128i a) -{ - return vdupq_n_s32(vgetq_lane_s32(a, i)); -} - -template <int i> -FORCE_INLINE __m128i _mm_shuffle_epi32_single(__m128i a) -{ - switch (i) - { - case _MM_SHUFFLE(0, 0, 0, 0): return _mm_shuffle_epi32_splat<0>(a); break; - case _MM_SHUFFLE(1, 1, 1, 1): return _mm_shuffle_epi32_splat<1>(a); break; - case _MM_SHUFFLE(2, 2, 2, 2): return _mm_shuffle_epi32_splat<2>(a); break; - case _MM_SHUFFLE(3, 3, 3, 3): return _mm_shuffle_epi32_splat<3>(a); break; - default: return _mm_shuffle_epi32_function<i>(a, a); - } -} - -// Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx -#define _mm_shuffle_epi32(a,i) _mm_shuffle_epi32_single<i>(a) - -template <int i> -FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a) -{ - int16x8_t ret = (int16x8_t)a; - int16x4_t highBits = vget_high_s16(ret); - ret = vsetq_lane_s16(vget_lane_s16(highBits, i & 0x3), ret, 4); - ret = vsetq_lane_s16(vget_lane_s16(highBits, (i >> 2) & 0x3), ret, 5); - ret = vsetq_lane_s16(vget_lane_s16(highBits, (i >> 4) & 0x3), ret, 6); - ret = vsetq_lane_s16(vget_lane_s16(highBits, (i >> 6) & 0x3), ret, 7); - return (__m128i)ret; -} - -// Shuffles the upper 4 signed or unsigned 16 - bit integers in a as specified by imm. https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx -#define _mm_shufflehi_epi16(a,i) _mm_shufflehi_epi16_function<i>(a) - -// Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while shifting in zeros. : https://msdn.microsoft.com/en-us/library/z2k3bbtb%28v=vs.90%29.aspx -//#define _mm_slli_epi32(a, imm) (__m128i)vshlq_n_s32(a,imm) - -// Based on SIMDe -FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, const int imm8) -{ -#if defined(__aarch64__) - const int32x4_t s = vdupq_n_s32(imm8); - return vshlq_s32(a, s); -#else - int32_t __attribute__((aligned(16))) data[4]; - vst1q_s32(data, a); - const int s = (imm8 > 31) ? 0 : imm8; - data[0] = data[0] << s; - data[1] = data[1] << s; - data[2] = data[2] << s; - data[3] = data[3] << s; - - return vld1q_s32(data); -#endif -} - - -//Shifts the 4 signed or unsigned 32-bit integers in a right by count bits while shifting in zeros. https://msdn.microsoft.com/en-us/library/w486zcfa(v=vs.100).aspx -//#define _mm_srli_epi32( a, imm ) (__m128i)vshrq_n_u32((uint32x4_t)a, imm) - -// Based on SIMDe -FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, const int imm8) -{ -#if defined(__aarch64__) - const int shift = (imm8 > 31) ? 0 : imm8; // Unfortunately, we need to check for this case for embree. - const int32x4_t s = vdupq_n_s32(-shift); - return vreinterpretq_s32_u32(vshlq_u32(vreinterpretq_u32_s32(a), s)); -#else - int32_t __attribute__((aligned(16))) data[4]; - vst1q_s32(data, a); - - const int s = (imm8 > 31) ? 0 : imm8; - - data[0] = data[0] >> s; - data[1] = data[1] >> s; - data[2] = data[2] >> s; - data[3] = data[3] >> s; - - return vld1q_s32(data); -#endif -} - - -// Shifts the 4 signed 32 - bit integers in a right by count bits while shifting in the sign bit. https://msdn.microsoft.com/en-us/library/z1939387(v=vs.100).aspx -//#define _mm_srai_epi32( a, imm ) vshrq_n_s32(a, imm) - -// Based on SIMDe -FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, const int imm8) -{ -#if defined(__aarch64__) - const int32x4_t s = vdupq_n_s32(-imm8); - return vshlq_s32(a, s); -#else - int32_t __attribute__((aligned(16))) data[4]; - vst1q_s32(data, a); - const uint32_t m = (uint32_t) ((~0U) << (32 - imm8)); - - for (int i = 0; i < 4; i++) { - uint32_t is_neg = ((uint32_t) (((data[i]) >> 31))); - data[i] = (data[i] >> imm8) | (m * is_neg); - } - - return vld1q_s32(data); -#endif -} - -// Shifts the 128 - bit value in a right by imm bytes while shifting in zeros.imm must be an immediate. https://msdn.microsoft.com/en-us/library/305w28yz(v=vs.100).aspx -//#define _mm_srli_si128( a, imm ) (__m128i)vmaxq_s8((int8x16_t)a, vextq_s8((int8x16_t)a, vdupq_n_s8(0), imm)) -#define _mm_srli_si128( a, imm ) (__m128i)vextq_s8((int8x16_t)a, vdupq_n_s8(0), (imm)) - -// Shifts the 128-bit value in a left by imm bytes while shifting in zeros. imm must be an immediate. https://msdn.microsoft.com/en-us/library/34d3k2kt(v=vs.100).aspx -#define _mm_slli_si128( a, imm ) (__m128i)vextq_s8(vdupq_n_s8(0), (int8x16_t)a, 16 - (imm)) - -// NEON does not provide a version of this function, here is an article about some ways to repro the results. -// http://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon -// Creates a 16-bit mask from the most significant bits of the 16 signed or unsigned 8-bit integers in a and zero extends the upper bits. https://msdn.microsoft.com/en-us/library/vstudio/s090c8fk(v=vs.100).aspx -FORCE_INLINE int _mm_movemask_epi8(__m128i _a) -{ - uint8x16_t input = (uint8x16_t)_a; - const int8_t __attribute__((aligned(16))) xr[8] = { -7, -6, -5, -4, -3, -2, -1, 0 }; - uint8x8_t mask_and = vdup_n_u8(0x80); - int8x8_t mask_shift = vld1_s8(xr); - - uint8x8_t lo = vget_low_u8(input); - uint8x8_t hi = vget_high_u8(input); - - lo = vand_u8(lo, mask_and); - lo = vshl_u8(lo, mask_shift); - - hi = vand_u8(hi, mask_and); - hi = vshl_u8(hi, mask_shift); - - lo = vpadd_u8(lo, lo); - lo = vpadd_u8(lo, lo); - lo = vpadd_u8(lo, lo); - - hi = vpadd_u8(hi, hi); - hi = vpadd_u8(hi, hi); - hi = vpadd_u8(hi, hi); - - return ((hi[0] << 8) | (lo[0] & 0xFF)); -} - - -// ****************************************** -// Math operations -// ****************************************** - -// Subtracts the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/1zad2k61(v=vs.100).aspx -FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b) -{ - return vsubq_f32(a, b); -} - -FORCE_INLINE __m128 _mm_sub_ss(__m128 a, __m128 b) -{ - return vsubq_f32(a, b); -} - -// Subtracts the 4 signed or unsigned 32-bit integers of b from the 4 signed or unsigned 32-bit integers of a. https://msdn.microsoft.com/en-us/library/vstudio/fhh866h0(v=vs.100).aspx -FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b) -{ - return vsubq_s32(a, b); -} - -// Adds the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/c9848chc(v=vs.100).aspx -FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b) -{ - return vaddq_f32(a, b); -} - -// adds the scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/be94x2y6(v=vs.100).aspx -FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b) -{ - const float32_t b0 = vgetq_lane_f32(b, 0); - float32x4_t value = vdupq_n_f32(0); - - //the upper values in the result must be the remnants of <a>. - value = vsetq_lane_f32(b0, value, 0); - return vaddq_f32(a, value); -} - -// Adds the 4 signed or unsigned 32-bit integers in a to the 4 signed or unsigned 32-bit integers in b. https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx -FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b) -{ - return vaddq_s32(a, b); -} - -// Adds the 8 signed or unsigned 16-bit integers in a to the 8 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/fceha5k4(v=vs.100).aspx -FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b) -{ - return (__m128i)vaddq_s16((int16x8_t)a, (int16x8_t)b); -} - -// Multiplies the 8 signed or unsigned 16-bit integers from a by the 8 signed or unsigned 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/9ks1472s(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b) -{ - return (__m128i)vmulq_s16((int16x8_t)a, (int16x8_t)b); -} - -// Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or unsigned 32-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mullo_epi32 (__m128i a, __m128i b) -{ - return (__m128i)vmulq_s32((int32x4_t)a,(int32x4_t)b); -} - -// Multiplies the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx -FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b) -{ - return vmulq_f32(a, b); -} - -FORCE_INLINE __m128 _mm_mul_ss(__m128 a, __m128 b) -{ - return vmulq_f32(a, b); -} - -// Computes the approximations of reciprocals of the four single-precision, floating-point values of a. https://msdn.microsoft.com/en-us/library/vstudio/796k1tty(v=vs.100).aspx -FORCE_INLINE __m128 _mm_rcp_ps(__m128 in) -{ -#if defined(BUILD_IOS) - return vdivq_f32(vdupq_n_f32(1.0f),in); - -#endif - // Get an initial estimate of 1/in. - float32x4_t reciprocal = vrecpeq_f32(in); - - // We only return estimated 1/in. - // Newton-Raphon iteration shold be done in the outside of _mm_rcp_ps(). - - // TODO(LTE): We could delete these ifdef? - reciprocal = vmulq_f32(vrecpsq_f32(in, reciprocal), reciprocal); - reciprocal = vmulq_f32(vrecpsq_f32(in, reciprocal), reciprocal); - return reciprocal; - -} - -FORCE_INLINE __m128 _mm_rcp_ss(__m128 in) -{ - float32x4_t value; - float32x4_t result = in; - - value = _mm_rcp_ps(in); - return vsetq_lane_f32(vgetq_lane_f32(value, 0), result, 0); -} - -// Divides the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/edaw8147(v=vs.100).aspx -FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b) -{ -#if defined(BUILD_IOS) - return vdivq_f32(a,b); -#else - float32x4_t reciprocal = _mm_rcp_ps(b); - - reciprocal = vmulq_f32(vrecpsq_f32(b, reciprocal), reciprocal); - reciprocal = vmulq_f32(vrecpsq_f32(b, reciprocal), reciprocal); - - // Add one more round of newton-raphson since NEON's reciprocal estimation has less accuracy compared to SSE2's rcp. - reciprocal = vmulq_f32(vrecpsq_f32(b, reciprocal), reciprocal); - - // Another round for safety - reciprocal = vmulq_f32(vrecpsq_f32(b, reciprocal), reciprocal); - - - return vmulq_f32(a, reciprocal); -#endif -} - -// Divides the scalar single-precision floating point value of a by b. https://msdn.microsoft.com/en-us/library/4y73xa49(v=vs.100).aspx -FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b) -{ - float32x4_t value; - float32x4_t result = a; - value = _mm_div_ps(a, b); - return vsetq_lane_f32(vgetq_lane_f32(value, 0), result, 0); -} - -// Computes the approximations of the reciprocal square roots of the four single-precision floating point values of in. https://msdn.microsoft.com/en-us/library/22hfsh53(v=vs.100).aspx -FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in) -{ - - float32x4_t value = vrsqrteq_f32(in); - - // TODO: We must debug and ensure that rsqrt(0) and rsqrt(-0) yield proper values. - // Related code snippets can be found here: https://cpp.hotexamples.com/examples/-/-/vrsqrteq_f32/cpp-vrsqrteq_f32-function-examples.html - // If we adapt this function, we might be able to avoid special zero treatment in _mm_sqrt_ps - - value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(in, value), value)); - value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(in, value), value)); - - // one more round to get better precision - value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(in, value), value)); - - // another round for safety - value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(in, value), value)); - - return value; -} - -FORCE_INLINE __m128 _mm_rsqrt_ss(__m128 in) -{ - float32x4_t result = in; - - __m128 value = _mm_rsqrt_ps(in); - - return vsetq_lane_f32(vgetq_lane_f32(value, 0), result, 0); -} - - -// Computes the approximations of square roots of the four single-precision, floating-point values of a. First computes reciprocal square roots and then reciprocals of the four values. https://msdn.microsoft.com/en-us/library/vstudio/8z67bwwk(v=vs.100).aspx -FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in) -{ -#if defined(BUILD_IOS) - return vsqrtq_f32(in); -#else - __m128 reciprocal = _mm_rsqrt_ps(in); - - // We must treat sqrt(in == 0) in a special way. At this point reciprocal contains gargabe due to vrsqrteq_f32(0) returning +inf. - // We assign 0 to reciprocal wherever required. - const float32x4_t vzero = vdupq_n_f32(0.0f); - const uint32x4_t mask = vceqq_f32(in, vzero); - reciprocal = vbslq_f32(mask, vzero, reciprocal); - - // sqrt(x) = x * (1 / sqrt(x)) - return vmulq_f32(in, reciprocal); -#endif -} - -// Computes the approximation of the square root of the scalar single-precision floating point value of in. https://msdn.microsoft.com/en-us/library/ahfsc22d(v=vs.100).aspx -FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in) -{ - float32x4_t value; - float32x4_t result = in; - - value = _mm_sqrt_ps(in); - return vsetq_lane_f32(vgetq_lane_f32(value, 0), result, 0); -} - - -// Computes the maximums of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/ff5d607a(v=vs.100).aspx -FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b) -{ -#if USE_PRECISE_MINMAX_IMPLEMENTATION - return vbslq_f32(vcltq_f32(b,a),a,b); -#else - // Faster, but would give inconsitent rendering(e.g. holes, NaN pixels) - return vmaxq_f32(a, b); -#endif -} - -// Computes the minima of the four single-precision, floating-point values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/wh13kadz(v=vs.100).aspx -FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b) -{ -#if USE_PRECISE_MINMAX_IMPLEMENTATION - return vbslq_f32(vcltq_f32(a,b),a,b); -#else - // Faster, but would give inconsitent rendering(e.g. holes, NaN pixels) - return vminq_f32(a, b); -#endif -} - -// Computes the maximum of the two lower scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/s6db5esz(v=vs.100).aspx -FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b) -{ - float32x4_t value; - float32x4_t result = a; - - value = _mm_max_ps(a, b); - return vsetq_lane_f32(vgetq_lane_f32(value, 0), result, 0); -} - -// Computes the minimum of the two lower scalar single-precision floating point values of a and b. https://msdn.microsoft.com/en-us/library/0a9y7xaa(v=vs.100).aspx -FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b) -{ - float32x4_t value; - float32x4_t result = a; - - - value = _mm_min_ps(a, b); - return vsetq_lane_f32(vgetq_lane_f32(value, 0), result, 0); -} - -// Computes the pairwise minima of the 8 signed 16-bit integers from a and the 8 signed 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/6te997ew(v=vs.100).aspx -FORCE_INLINE __m128i _mm_min_epi16(__m128i a, __m128i b) -{ - return (__m128i)vminq_s16((int16x8_t)a, (int16x8_t)b); -} - -// epi versions of min/max -// Computes the pariwise maximums of the four signed 32-bit integer values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/bb514055(v=vs.100).aspx -FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b ) -{ - return vmaxq_s32(a,b); -} - -// Computes the pariwise minima of the four signed 32-bit integer values of a and b. https://msdn.microsoft.com/en-us/library/vstudio/bb531476(v=vs.100).aspx -FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b ) -{ - return vminq_s32(a,b); -} - -// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit integers from b. https://msdn.microsoft.com/en-us/library/vstudio/59hddw1d(v=vs.100).aspx -FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b) -{ - int16x8_t ret = vqdmulhq_s16((int16x8_t)a, (int16x8_t)b); - ret = vshrq_n_s16(ret, 1); - return (__m128i)ret; -} - -// Computes pairwise add of each argument as single-precision, floating-point values a and b. -//https://msdn.microsoft.com/en-us/library/yd9wecaa.aspx -FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b ) -{ -#if defined(__aarch64__) - return vpaddq_f32(a,b); -#else -// This does not work, no vpaddq... -// return (__m128) vpaddq_f32(a,b); - // - // get two f32x2_t values from a - // do vpadd - // put result in low half of f32x4 result - // - // get two f32x2_t values from b - // do vpadd - // put result in high half of f32x4 result - // - // combine - return vcombine_f32( vpadd_f32( vget_low_f32(a), vget_high_f32(a) ), vpadd_f32( vget_low_f32(b), vget_high_f32(b) ) ); -#endif -} - -// ****************************************** -// Compare operations -// ****************************************** - -// Compares for less than https://msdn.microsoft.com/en-us/library/vstudio/f330yhc8(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b) -{ - return (__m128)vcltq_f32(a, b); -} - -FORCE_INLINE __m128 _mm_cmpnlt_ps(__m128 a, __m128 b) -{ - return (__m128) vmvnq_s32((__m128i)_mm_cmplt_ps(a,b)); -} - -// Compares for greater than. https://msdn.microsoft.com/en-us/library/vstudio/11dy102s(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b) -{ - return (__m128)vcgtq_f32(a, b); -} - -FORCE_INLINE __m128 _mm_cmpnle_ps(__m128 a, __m128 b) -{ - return (__m128) _mm_cmpgt_ps(a,b); -} - - -// Compares for greater than or equal. https://msdn.microsoft.com/en-us/library/vstudio/fs813y2t(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b) -{ - return (__m128)vcgeq_f32(a, b); -} - -// Compares for less than or equal. https://msdn.microsoft.com/en-us/library/vstudio/1s75w83z(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmple_ps(__m128 a, __m128 b) -{ - return (__m128)vcleq_f32(a, b); -} - -// Compares for equality. https://msdn.microsoft.com/en-us/library/vstudio/36aectz5(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cmpeq_ps(__m128 a, __m128 b) -{ - return (__m128)vceqq_f32(a, b); -} - -// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers in b for less than. https://msdn.microsoft.com/en-us/library/vstudio/4ak0bf5d(v=vs.100).aspx -FORCE_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b) -{ - return (__m128i)vcltq_s32(a, b); -} - -FORCE_INLINE __m128i _mm_cmpeq_epi32(__m128i a, __m128i b) -{ - return (__m128i) vceqq_s32(a,b); -} - -// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers in b for greater than. https://msdn.microsoft.com/en-us/library/vstudio/1s9f2z0y(v=vs.100).aspx -FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b) -{ - return (__m128i)vcgtq_s32(a, b); -} - -// Compares the four 32-bit floats in a and b to check if any values are NaN. Ordered compare between each value returns true for "orderable" and false for "not orderable" (NaN). https://msdn.microsoft.com/en-us/library/vstudio/0h9w00fx(v=vs.100).aspx -// see also: -// http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean -// http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics -FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b ) -{ - // Note: NEON does not have ordered compare builtin - // Need to compare a eq a and b eq b to check for NaN - // Do AND of results to get final - return (__m128) vreinterpretq_f32_u32( vandq_u32( vceqq_f32(a,a), vceqq_f32(b,b) ) ); -} - -// Compares the lower single-precision floating point scalar values of a and b using a less than operation. : https://msdn.microsoft.com/en-us/library/2kwe606b(v=vs.90).aspx -FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b) -{ - uint32x4_t value; - - value = vcltq_f32(a, b); - return vgetq_lane_u32(value, 0); -} - -// Compares the lower single-precision floating point scalar values of a and b using a greater than operation. : https://msdn.microsoft.com/en-us/library/b0738e0t(v=vs.100).aspx -FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b) -{ - uint32x4_t value; - - value = vcgtq_f32(a, b); - return vgetq_lane_u32(value, 0); -} - -// Compares the lower single-precision floating point scalar values of a and b using a less than or equal operation. : https://msdn.microsoft.com/en-us/library/1w4t7c57(v=vs.90).aspx -FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b) -{ - uint32x4_t value; - - value = vcleq_f32(a, b); - return vgetq_lane_u32(value, 0); -} - -// Compares the lower single-precision floating point scalar values of a and b using a greater than or equal operation. : https://msdn.microsoft.com/en-us/library/8t80des6(v=vs.100).aspx -FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b) -{ - uint32x4_t value; - - value = vcgeq_f32(a, b); - return vgetq_lane_u32(value, 0); -} - -// Compares the lower single-precision floating point scalar values of a and b using an equality operation. : https://msdn.microsoft.com/en-us/library/93yx2h2b(v=vs.100).aspx -FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b) -{ - uint32x4_t value; - - value = vceqq_f32(a, b); - return vgetq_lane_u32(value, 0); -} - -// Compares the lower single-precision floating point scalar values of a and b using an inequality operation. : https://msdn.microsoft.com/en-us/library/bafh5e0a(v=vs.90).aspx -FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b) -{ - uint32x4_t value; - - value = vceqq_f32(a, b); - return !vgetq_lane_u32(value, 0); -} - -// according to the documentation, these intrinsics behave the same as the non-'u' versions. We'll just alias them here. -#define _mm_ucomilt_ss _mm_comilt_ss -#define _mm_ucomile_ss _mm_comile_ss -#define _mm_ucomigt_ss _mm_comigt_ss -#define _mm_ucomige_ss _mm_comige_ss -#define _mm_ucomieq_ss _mm_comieq_ss -#define _mm_ucomineq_ss _mm_comineq_ss - -// ****************************************** -// Conversions -// ****************************************** - -// Converts the four single-precision, floating-point values of a to signed 32-bit integer values using truncate. https://msdn.microsoft.com/en-us/library/vstudio/1h005y6x(v=vs.100).aspx -FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a) -{ - return vcvtq_s32_f32(a); -} - -// Converts the four signed 32-bit integer values of a to single-precision, floating-point values https://msdn.microsoft.com/en-us/library/vstudio/36bwxcx5(v=vs.100).aspx -FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a) -{ - return vcvtq_f32_s32(a); -} - -// Converts the four single-precision, floating-point values of a to signed 32-bit integer values. https://msdn.microsoft.com/en-us/library/vstudio/xdc42k5e(v=vs.100).aspx -// *NOTE*. The default rounding mode on SSE is 'round to even', which ArmV7 does not support! -// It is supported on ARMv8 however. -FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a) -{ -#if 1 - return vcvtnq_s32_f32(a); -#else - __m128 half = vdupq_n_f32(0.5f); - const __m128 sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(a), 31))); - const __m128 aPlusHalf = vaddq_f32(a, half); - const __m128 aRound = vsubq_f32(aPlusHalf, sign); - return vcvtq_s32_f32(aRound); -#endif -} - -// Moves the least significant 32 bits of a to a 32-bit integer. https://msdn.microsoft.com/en-us/library/5z7a9642%28v=vs.90%29.aspx -FORCE_INLINE int _mm_cvtsi128_si32(__m128i a) -{ - return vgetq_lane_s32(a, 0); -} - -// Moves 32-bit integer a to the least significant 32 bits of an __m128 object, zero extending the upper bits. https://msdn.microsoft.com/en-us/library/ct3539ha%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_cvtsi32_si128(int a) -{ - __m128i result = vdupq_n_s32(0); - return vsetq_lane_s32(a, result, 0); -} - - -// Applies a type cast to reinterpret four 32-bit floating point values passed in as a 128-bit parameter as packed 32-bit integers. https://msdn.microsoft.com/en-us/library/bb514099.aspx -FORCE_INLINE __m128i _mm_castps_si128(__m128 a) -{ -#if defined(__aarch64__) - return (__m128i)a; -#else - return *(const __m128i *)&a; -#endif -} - -// Applies a type cast to reinterpret four 32-bit integers passed in as a 128-bit parameter as packed 32-bit floating point values. https://msdn.microsoft.com/en-us/library/bb514029.aspx -FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a) -{ -#if defined(__aarch64__) - return (__m128)a; -#else - return *(const __m128 *)&a; -#endif -} - -// Loads 128-bit value. : https://msdn.microsoft.com/en-us/library/atzzad1h(v=vs.80).aspx -FORCE_INLINE __m128i _mm_load_si128(const __m128i *p) -{ - return vld1q_s32((int32_t *)p); -} - -FORCE_INLINE __m128d _mm_castps_pd(const __m128 a) -{ - return *(const __m128d *)&a; -} - -FORCE_INLINE __m128d _mm_castsi128_pd(__m128i a) -{ - return *(const __m128d *)&a; -} -// ****************************************** -// Miscellaneous Operations -// ****************************************** - -// Packs the 16 signed 16-bit integers from a and b into 8-bit integers and saturates. https://msdn.microsoft.com/en-us/library/k4y4f7w5%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b) -{ - return (__m128i)vcombine_s8(vqmovn_s16((int16x8_t)a), vqmovn_s16((int16x8_t)b)); -} - -// Packs the 16 signed 16 - bit integers from a and b into 8 - bit unsigned integers and saturates. https://msdn.microsoft.com/en-us/library/07ad1wx4(v=vs.100).aspx -FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b) -{ - return (__m128i)vcombine_u8(vqmovun_s16((int16x8_t)a), vqmovun_s16((int16x8_t)b)); -} - -// Packs the 8 signed 32-bit integers from a and b into signed 16-bit integers and saturates. https://msdn.microsoft.com/en-us/library/393t56f9%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b) -{ - return (__m128i)vcombine_s16(vqmovn_s32(a), vqmovn_s32(b)); -} - -// Interleaves the lower 8 signed or unsigned 8-bit integers in a with the lower 8 signed or unsigned 8-bit integers in b. https://msdn.microsoft.com/en-us/library/xf7k860c%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b) -{ - int8x8_t a1 = (int8x8_t)vget_low_s16((int16x8_t)a); - int8x8_t b1 = (int8x8_t)vget_low_s16((int16x8_t)b); - - int8x8x2_t result = vzip_s8(a1, b1); - - return (__m128i)vcombine_s8(result.val[0], result.val[1]); -} - -// Interleaves the lower 4 signed or unsigned 16-bit integers in a with the lower 4 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/btxb17bw%28v=vs.90%29.aspx -FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b) -{ - int16x4_t a1 = vget_low_s16((int16x8_t)a); - int16x4_t b1 = vget_low_s16((int16x8_t)b); - - int16x4x2_t result = vzip_s16(a1, b1); - - return (__m128i)vcombine_s16(result.val[0], result.val[1]); -} - -// Interleaves the lower 2 signed or unsigned 32 - bit integers in a with the lower 2 signed or unsigned 32 - bit integers in b. https://msdn.microsoft.com/en-us/library/x8atst9d(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b) -{ - int32x2_t a1 = vget_low_s32(a); - int32x2_t b1 = vget_low_s32(b); - - int32x2x2_t result = vzip_s32(a1, b1); - - return vcombine_s32(result.val[0], result.val[1]); -} - -// Selects and interleaves the lower two single-precision, floating-point values from a and b. https://msdn.microsoft.com/en-us/library/25st103b%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b) -{ - float32x2x2_t result = vzip_f32(vget_low_f32(a), vget_low_f32(b)); - return vcombine_f32(result.val[0], result.val[1]); -} - -// Selects and interleaves the upper two single-precision, floating-point values from a and b. https://msdn.microsoft.com/en-us/library/skccxx7d%28v=vs.90%29.aspx -FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b) -{ - float32x2x2_t result = vzip_f32(vget_high_f32(a), vget_high_f32(b)); - return vcombine_f32(result.val[0], result.val[1]); -} - -// Interleaves the upper 8 signed or unsigned 8-bit integers in a with the upper 8 signed or unsigned 8-bit integers in b. https://msdn.microsoft.com/en-us/library/t5h7783k(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b) -{ - int8x8_t a1 = (int8x8_t)vget_high_s16((int16x8_t)a); - int8x8_t b1 = (int8x8_t)vget_high_s16((int16x8_t)b); - - int8x8x2_t result = vzip_s8(a1, b1); - - return (__m128i)vcombine_s8(result.val[0], result.val[1]); -} - -// Interleaves the upper 4 signed or unsigned 16-bit integers in a with the upper 4 signed or unsigned 16-bit integers in b. https://msdn.microsoft.com/en-us/library/03196cz7(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b) -{ - int16x4_t a1 = vget_high_s16((int16x8_t)a); - int16x4_t b1 = vget_high_s16((int16x8_t)b); - - int16x4x2_t result = vzip_s16(a1, b1); - - return (__m128i)vcombine_s16(result.val[0], result.val[1]); -} - -// Interleaves the upper 2 signed or unsigned 32-bit integers in a with the upper 2 signed or unsigned 32-bit integers in b. https://msdn.microsoft.com/en-us/library/65sa7cbs(v=vs.100).aspx -FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b) -{ - int32x2_t a1 = vget_high_s32(a); - int32x2_t b1 = vget_high_s32(b); - - int32x2x2_t result = vzip_s32(a1, b1); - - return vcombine_s32(result.val[0], result.val[1]); -} - -// Extracts the selected signed or unsigned 16-bit integer from a and zero extends. https://msdn.microsoft.com/en-us/library/6dceta0c(v=vs.100).aspx -#define _mm_extract_epi16( a, imm ) vgetq_lane_s16((int16x8_t)a, imm) - -// ****************************************** -// Streaming Extensions -// ****************************************** - -// Guarantees that every preceding store is globally visible before any subsequent store. https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx -FORCE_INLINE void _mm_sfence(void) -{ - __sync_synchronize(); -} - -// Stores the data in a to the address p without polluting the caches. If the cache line containing address p is already in the cache, the cache will be updated.Address p must be 16 - byte aligned. https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx -FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a) -{ - *p = a; -} - -// Cache line containing p is flushed and invalidated from all caches in the coherency domain. : https://msdn.microsoft.com/en-us/library/ba08y07y(v=vs.100).aspx -FORCE_INLINE void _mm_clflush(void const*p) -{ - // no corollary for Neon? -} - -FORCE_INLINE __m128i _mm_set_epi64x(int64_t a, int64_t b) -{ - // Stick to the flipped behavior of x86. - int64_t __attribute__((aligned(16))) data[2] = { b, a }; - return (__m128i)vld1q_s64(data); -} - -FORCE_INLINE __m128i _mm_set1_epi64x(int64_t _i) -{ - return (__m128i)vmovq_n_s64(_i); -} - -#if defined(__aarch64__) -FORCE_INLINE __m128 _mm_blendv_ps(__m128 a, __m128 b, __m128 c) -{ - int32x4_t mask = vshrq_n_s32(__m128i(c),31); - return vbslq_f32( uint32x4_t(mask), b, a); -} - -FORCE_INLINE __m128i _mm_load4epu8_epi32(__m128i *ptr) -{ - uint8x8_t t0 = vld1_u8((uint8_t*)ptr); - uint16x8_t t1 = vmovl_u8(t0); - uint32x4_t t2 = vmovl_u16(vget_low_u16(t1)); - return vreinterpretq_s32_u32(t2); -} - -FORCE_INLINE __m128i _mm_load4epu16_epi32(__m128i *ptr) -{ - uint16x8_t t0 = vld1q_u16((uint16_t*)ptr); - uint32x4_t t1 = vmovl_u16(vget_low_u16(t0)); - return vreinterpretq_s32_u32(t1); -} - -FORCE_INLINE __m128i _mm_load4epi8_f32(__m128i *ptr) -{ - int8x8_t t0 = vld1_s8((int8_t*)ptr); - int16x8_t t1 = vmovl_s8(t0); - int32x4_t t2 = vmovl_s16(vget_low_s16(t1)); - float32x4_t t3 = vcvtq_f32_s32(t2); - return vreinterpretq_s32_f32(t3); -} - -FORCE_INLINE __m128i _mm_load4epu8_f32(__m128i *ptr) -{ - uint8x8_t t0 = vld1_u8((uint8_t*)ptr); - uint16x8_t t1 = vmovl_u8(t0); - uint32x4_t t2 = vmovl_u16(vget_low_u16(t1)); - return vreinterpretq_s32_u32(t2); -} - -FORCE_INLINE __m128i _mm_load4epi16_f32(__m128i *ptr) -{ - int16x8_t t0 = vld1q_s16((int16_t*)ptr); - int32x4_t t1 = vmovl_s16(vget_low_s16(t0)); - float32x4_t t2 = vcvtq_f32_s32(t1); - return vreinterpretq_s32_f32(t2); -} - -FORCE_INLINE __m128i _mm_packus_epi32(__m128i a, __m128i b) -{ - return (__m128i)vcombine_u8(vqmovun_s16((int16x8_t)a), vqmovun_s16((int16x8_t)b)); -} - -FORCE_INLINE __m128i _mm_stream_load_si128(__m128i* ptr) -{ - // No non-temporal load on a single register on ARM. - return vreinterpretq_s32_u8(vld1q_u8((uint8_t*)ptr)); -} - -FORCE_INLINE void _mm_stream_ps(float* ptr, __m128i a) -{ - // No non-temporal store on a single register on ARM. - vst1q_f32((float*)ptr, vreinterpretq_f32_s32(a)); -} - -FORCE_INLINE __m128i _mm_min_epu32(__m128i a, __m128i b) -{ - return vreinterpretq_s32_u32(vminq_u32(vreinterpretq_u32_s32(a), vreinterpretq_u32_s32(b))); -} - -FORCE_INLINE __m128i _mm_max_epu32(__m128i a, __m128i b) -{ - return vreinterpretq_s32_u32(vmaxq_u32(vreinterpretq_u32_s32(a), vreinterpretq_u32_s32(b))); -} - -FORCE_INLINE __m128 _mm_abs_ps(__m128 a) -{ - return vabsq_f32(a); -} - -FORCE_INLINE __m128 _mm_madd_ps(__m128 a, __m128 b, __m128 c) -{ - return vmlaq_f32(c, a, b); -} - -FORCE_INLINE __m128 _mm_msub_ps(__m128 a, __m128 b, __m128 c) -{ - return vmlsq_f32(c, a, b); -} - -FORCE_INLINE __m128i _mm_abs_epi32(__m128i a) -{ - return vabsq_s32(a); -} -#endif //defined(__aarch64__) - -// Count the number of bits set to 1 in unsigned 32-bit integer a, and -// return that count in dst. -// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u32 -FORCE_INLINE int _mm_popcnt_u32(unsigned int a) -{ - return (int)vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t)a))); -} - -// Count the number of bits set to 1 in unsigned 64-bit integer a, and -// return that count in dst. -// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u64 -FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a) -{ - return (int64_t)vaddlv_u8(vcnt_u8(vcreate_u8(a))); -} - -#endif diff --git a/thirdparty/embree-aarch64/common/math/constants.cpp b/thirdparty/embree-aarch64/common/math/constants.cpp deleted file mode 100644 index eeff131664..0000000000 --- a/thirdparty/embree-aarch64/common/math/constants.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#if defined(__aarch64__) -#include <arm_neon.h> -#endif - -#include "constants.h" - -namespace embree -{ - TrueTy True; - FalseTy False; - ZeroTy zero; - OneTy one; - NegInfTy neg_inf; - PosInfTy inf; - PosInfTy pos_inf; - NaNTy nan; - UlpTy ulp; - PiTy pi; - OneOverPiTy one_over_pi; - TwoPiTy two_pi; - OneOverTwoPiTy one_over_two_pi; - FourPiTy four_pi; - OneOverFourPiTy one_over_four_pi; - StepTy step; - ReverseStepTy reverse_step; - EmptyTy empty; - UndefinedTy undefined; - -#if defined(__aarch64__) -const uint32x4_t movemask_mask = { 1, 2, 4, 8 }; -const uint32x4_t vzero = { 0, 0, 0, 0 }; -const uint32x4_t v0x80000000 = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; -const uint32x4_t v0x7fffffff = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; -const uint32x4_t v000F = { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF }; -const uint32x4_t v00F0 = { 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 }; -const uint32x4_t v00FF = { 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF }; -const uint32x4_t v0F00 = { 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 }; -const uint32x4_t v0F0F = { 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF }; -const uint32x4_t v0FF0 = { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 }; -const uint32x4_t v0FFF = { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; -const uint32x4_t vF000 = { 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 }; -const uint32x4_t vF00F = { 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF }; -const uint32x4_t vF0F0 = { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 }; -const uint32x4_t vF0FF = { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF }; -const uint32x4_t vFF00 = { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 }; -const uint32x4_t vFF0F = { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF }; -const uint32x4_t vFFF0 = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 }; -const uint32x4_t vFFFF = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; -const uint8x16_t v0022 = {0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11}; -const uint8x16_t v1133 = {4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15}; -const uint8x16_t v0101 = {0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7}; -const float32x4_t vOne = { 1.0f, 1.0f, 1.0f, 1.0f }; -const float32x4_t vmOne = { -1.0f, -1.0f, -1.0f, -1.0f }; -const float32x4_t vInf = { INFINITY, INFINITY, INFINITY, INFINITY }; -const float32x4_t vmInf = { -INFINITY, -INFINITY, -INFINITY, -INFINITY }; -#endif - -} diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulergcd.h b/thirdparty/embree-aarch64/common/tasking/taskschedulergcd.h deleted file mode 100644 index d31f8bb478..0000000000 --- a/thirdparty/embree-aarch64/common/tasking/taskschedulergcd.h +++ /dev/null @@ -1,49 +0,0 @@ -#pragma once - -#include "../sys/platform.h" -#include "../sys/alloc.h" -#include "../sys/barrier.h" -#include "../sys/thread.h" -#include "../sys/mutex.h" -#include "../sys/condition.h" -#include "../sys/ref.h" - -#include <dispatch/dispatch.h> - -namespace embree -{ - struct TaskScheduler - { - /*! initializes the task scheduler */ - static void create(size_t numThreads, bool set_affinity, bool start_threads); - - /*! destroys the task scheduler again */ - static void destroy() {} - - /* returns the ID of the current thread */ - static __forceinline size_t threadID() - { - return threadIndex(); - } - - /* returns the index (0..threadCount-1) of the current thread */ - static __forceinline size_t threadIndex() - { - currentThreadIndex = (currentThreadIndex + 1) % GCDNumThreads; - return currentThreadIndex; - } - - /* returns the total number of threads */ - static __forceinline size_t threadCount() - { - return GCDNumThreads; - } - - private: - static size_t GCDNumThreads; - static size_t currentThreadIndex; - - }; - -}; - diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen.h b/thirdparty/embree-aarch64/kernels/builders/primrefgen.h deleted file mode 100644 index 9919c945c3..0000000000 --- a/thirdparty/embree-aarch64/kernels/builders/primrefgen.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "../common/scene.h" -#include "../common/primref.h" -#include "../common/primref_mb.h" -#include "priminfo.h" -#include "bvh_builder_morton.h" - -namespace embree -{ - namespace isa - { - PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor); - - PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor); - - PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime = 0); - - PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f)); - - template<typename Mesh> - size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor); - } -} - diff --git a/thirdparty/embree-aarch64/kernels/common/instance_stack.h b/thirdparty/embree-aarch64/kernels/common/instance_stack.h deleted file mode 100644 index d7e3637f7b..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/instance_stack.h +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "rtcore.h" - -namespace embree { -namespace instance_id_stack { - -static_assert(RTC_MAX_INSTANCE_LEVEL_COUNT > 0, - "RTC_MAX_INSTANCE_LEVEL_COUNT must be greater than 0."); - -/******************************************************************************* - * Instance ID stack manipulation. - * This is used from the instance intersector. - ******************************************************************************/ - -/* - * Push an instance to the stack. - */ -RTC_FORCEINLINE bool push(RTCIntersectContext* context, - unsigned instanceId) -{ -#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1 - const bool spaceAvailable = context->instStackSize < RTC_MAX_INSTANCE_LEVEL_COUNT; - /* We assert here because instances are silently dropped when the stack is full. - This might be quite hard to find in production. */ - assert(spaceAvailable); - if (likely(spaceAvailable)) - context->instID[context->instStackSize++] = instanceId; - return spaceAvailable; -#else - const bool spaceAvailable = (context->instID[0] == RTC_INVALID_GEOMETRY_ID); - assert(spaceAvailable); - if (likely(spaceAvailable)) - context->instID[0] = instanceId; - return spaceAvailable; -#endif -} - - -/* - * Pop the last instance pushed to the stack. - * Do not call on an empty stack. - */ -RTC_FORCEINLINE void pop(RTCIntersectContext* context) -{ - assert(context); -#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1 - assert(context->instStackSize > 0); - context->instID[--context->instStackSize] = RTC_INVALID_GEOMETRY_ID; -#else - assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID); - context->instID[0] = RTC_INVALID_GEOMETRY_ID; -#endif -} - -/******************************************************************************* - * Optimized instance id stack copy. - * The copy() function at the bottom of this block will either copy full - * stacks or copy only until the last valid element has been copied, depending - * on RTC_MAX_INSTANCE_LEVEL_COUNT. - ******************************************************************************/ - -/* - * Plain array assignment. This works for scalar->scalar, - * scalar->vector, and vector->vector. - */ -template <class Src, class Tgt> -RTC_FORCEINLINE void level_copy(unsigned level, Src* src, Tgt* tgt) -{ - tgt[level] = src[level]; -} - -/* - * Masked SIMD vector->vector store. - */ -template <int K> -RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, vuint<K>* tgt, const vbool<K>& mask) -{ - vuint<K>::storeu(mask, tgt + level, src[level]); -} - -/* - * Masked scalar->SIMD vector store. - */ -template <int K> -RTC_FORCEINLINE void level_copy(unsigned level, const unsigned* src, vuint<K>* tgt, const vbool<K>& mask) -{ - vuint<K>::store(mask, tgt + level, src[level]); -} - -/* - * Indexed assign from vector to scalar. - */ -template <int K> -RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, unsigned* tgt, const size_t& idx) -{ - tgt[level] = src[level][idx]; -} - -/* - * Indexed assign from scalar to vector. - */ -template <int K> -RTC_FORCEINLINE void level_copy(unsigned level, const unsigned* src, vuint<K>* tgt, const size_t& idx) -{ - tgt[level][idx] = src[level]; -} - -/* - * Indexed assign from vector to vector. - */ -template <int K> -RTC_FORCEINLINE void level_copy(unsigned level, const vuint<K>* src, vuint<K>* tgt, const size_t& i, const size_t& j) -{ - tgt[level][j] = src[level][i]; -} - -/* - * Check if the given stack level is valid. - * These are only used for large max stack sizes. - */ -RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack) -{ - return stack[level] != RTC_INVALID_GEOMETRY_ID; -} -RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack, const size_t& /*i*/) -{ - return stack[level] != RTC_INVALID_GEOMETRY_ID; -} -template <int K> -RTC_FORCEINLINE bool level_valid(unsigned level, const unsigned* stack, const vbool<K>& /*mask*/) -{ - return stack[level] != RTC_INVALID_GEOMETRY_ID; -} - -template <int K> -RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack) -{ - return any(stack[level] != RTC_INVALID_GEOMETRY_ID); -} -template <int K> -RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const vbool<K>& mask) -{ - return any(mask & (stack[level] != RTC_INVALID_GEOMETRY_ID)); -} - -template <int K> -RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const size_t& i) -{ - return stack[level][i] != RTC_INVALID_GEOMETRY_ID; -} -template <int K> -RTC_FORCEINLINE bool level_valid(unsigned level, const vuint<K>* stack, const size_t& i, const size_t& /*j*/) -{ - return stack[level][i] != RTC_INVALID_GEOMETRY_ID; -} - -/* - * Copy an instance ID stack. - * - * This function automatically selects a LevelFunctor from the above Assign - * structs. - */ -template <class Src, class Tgt, class... Args> -RTC_FORCEINLINE void copy(Src src, Tgt tgt, Args&&... args) -{ -#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1) - /* - * Avoid all loops for only one level. - */ - level_copy(0, src, tgt, std::forward<Args>(args)...); - -#elif (RTC_MAX_INSTANCE_LEVEL_COUNT <= 4) - /* - * It is faster to avoid the valid test for low level counts. - * Just copy the whole stack. - */ - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) - level_copy(l, src, tgt, std::forward<Args>(args)...); - -#else - /* - * For general stack sizes, it pays off to test for validity. - */ - bool valid = true; - for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT && valid; ++l) - { - level_copy(l, src, tgt, std::forward<Args>(args)...); - valid = level_valid(l, src, std::forward<Args>(args)...); - } -#endif -} - -} // namespace instance_id_stack -} // namespace embree - diff --git a/thirdparty/embree-aarch64/kernels/common/scene_curves.h b/thirdparty/embree-aarch64/kernels/common/scene_curves.h deleted file mode 100644 index 2649ab0e3e..0000000000 --- a/thirdparty/embree-aarch64/kernels/common/scene_curves.h +++ /dev/null @@ -1,341 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "default.h" -#include "geometry.h" -#include "buffer.h" - -namespace embree -{ - /*! represents an array of bicubic bezier curves */ - struct CurveGeometry : public Geometry - { - /*! type of this geometry */ - static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE4; - - public: - - /*! bezier curve construction */ - CurveGeometry (Device* device, Geometry::GType gtype); - - public: - void setMask(unsigned mask); - void setNumTimeSteps (unsigned int numTimeSteps); - void setVertexAttributeCount (unsigned int N); - void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); - void updateBuffer(RTCBufferType type, unsigned int slot); - void commit(); - bool verify(); - void setTessellationRate(float N); - void setMaxRadiusScale(float s); - void addElementsToCount (GeometryCounts & counts) const; - - public: - - /*! returns the number of vertices */ - __forceinline size_t numVertices() const { - return vertices[0].size(); - } - - /*! returns the i'th curve */ - __forceinline const unsigned int& curve(size_t i) const { - return curves[i]; - } - - /*! returns i'th vertex of the first time step */ - __forceinline Vec3ff vertex(size_t i) const { - return vertices0[i]; - } - - /*! returns i'th normal of the first time step */ - __forceinline Vec3fa normal(size_t i) const { - return normals0[i]; - } - - /*! returns i'th tangent of the first time step */ - __forceinline Vec3ff tangent(size_t i) const { - return tangents0[i]; - } - - /*! returns i'th normal derivative of the first time step */ - __forceinline Vec3fa dnormal(size_t i) const { - return dnormals0[i]; - } - - /*! returns i'th radius of the first time step */ - __forceinline float radius(size_t i) const { - return vertices0[i].w; - } - - /*! returns i'th vertex of itime'th timestep */ - __forceinline Vec3ff vertex(size_t i, size_t itime) const { - return vertices[itime][i]; - } - - /*! returns i'th normal of itime'th timestep */ - __forceinline Vec3fa normal(size_t i, size_t itime) const { - return normals[itime][i]; - } - - /*! returns i'th tangent of itime'th timestep */ - __forceinline Vec3ff tangent(size_t i, size_t itime) const { - return tangents[itime][i]; - } - - /*! returns i'th normal derivative of itime'th timestep */ - __forceinline Vec3fa dnormal(size_t i, size_t itime) const { - return dnormals[itime][i]; - } - - /*! returns i'th radius of itime'th timestep */ - __forceinline float radius(size_t i, size_t itime) const { - return vertices[itime][i].w; - } - - /*! gathers the curve starting with i'th vertex */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i) const - { - p0 = vertex(i+0); - p1 = vertex(i+1); - p2 = vertex(i+2); - p3 = vertex(i+3); - } - - /*! gathers the curve starting with i'th vertex of itime'th timestep */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, size_t itime) const - { - p0 = vertex(i+0,itime); - p1 = vertex(i+1,itime); - p2 = vertex(i+2,itime); - p3 = vertex(i+3,itime); - } - - /*! gathers the curve starting with i'th vertex */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const - { - p0 = vertex(i+0); - p1 = vertex(i+1); - p2 = vertex(i+2); - p3 = vertex(i+3); - n0 = normal(i+0); - n1 = normal(i+1); - n2 = normal(i+2); - n3 = normal(i+3); - } - - /*! gathers the curve starting with i'th vertex of itime'th timestep */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, size_t itime) const - { - p0 = vertex(i+0,itime); - p1 = vertex(i+1,itime); - p2 = vertex(i+2,itime); - p3 = vertex(i+3,itime); - n0 = normal(i+0,itime); - n1 = normal(i+1,itime); - n2 = normal(i+2,itime); - n3 = normal(i+3,itime); - } - - /*! prefetches the curve starting with i'th vertex of itime'th timestep */ - __forceinline void prefetchL1_vertices(size_t i) const - { - prefetchL1(vertices0.getPtr(i)+0); - prefetchL1(vertices0.getPtr(i)+64); - } - - /*! prefetches the curve starting with i'th vertex of itime'th timestep */ - __forceinline void prefetchL2_vertices(size_t i) const - { - prefetchL2(vertices0.getPtr(i)+0); - prefetchL2(vertices0.getPtr(i)+64); - } - - /*! loads curve vertices for specified time */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - - const float t0 = 1.0f - ftime; - const float t1 = ftime; - Vec3ff a0,a1,a2,a3; - gather(a0,a1,a2,a3,i,itime); - Vec3ff b0,b1,b2,b3; - gather(b0,b1,b2,b3,i,itime+1); - p0 = madd(Vec3ff(t0),a0,t1*b0); - p1 = madd(Vec3ff(t0),a1,t1*b1); - p2 = madd(Vec3ff(t0),a2,t1*b2); - p3 = madd(Vec3ff(t0),a3,t1*b3); - } - - /*! loads curve vertices for specified time */ - __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - - const float t0 = 1.0f - ftime; - const float t1 = ftime; - Vec3ff a0,a1,a2,a3; Vec3fa an0,an1,an2,an3; - gather(a0,a1,a2,a3,an0,an1,an2,an3,i,itime); - Vec3ff b0,b1,b2,b3; Vec3fa bn0,bn1,bn2,bn3; - gather(b0,b1,b2,b3,bn0,bn1,bn2,bn3,i,itime+1); - p0 = madd(Vec3ff(t0),a0,t1*b0); - p1 = madd(Vec3ff(t0),a1,t1*b1); - p2 = madd(Vec3ff(t0),a2,t1*b2); - p3 = madd(Vec3ff(t0),a3,t1*b3); - n0 = madd(Vec3ff(t0),an0,t1*bn0); - n1 = madd(Vec3ff(t0),an1,t1*bn1); - n2 = madd(Vec3ff(t0),an2,t1*bn2); - n3 = madd(Vec3ff(t0),an3,t1*bn3); - } - - template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> - __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const - { - Vec3ff v0,v1,v2,v3; Vec3fa n0,n1,n2,n3; - unsigned int vertexID = curve(primID); - gather(v0,v1,v2,v3,n0,n1,n2,n3,vertexID,itime); - SourceCurve3ff ccurve(v0,v1,v2,v3); - SourceCurve3fa ncurve(n0,n1,n2,n3); - ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve); - return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); - } - - template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> - __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0); - const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1); - return clerp(curve0,curve1,ftime); - } - - /*! gathers the hermite curve starting with i'th vertex */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i) const - { - p0 = vertex (i+0); - p1 = vertex (i+1); - t0 = tangent(i+0); - t1 = tangent(i+1); - } - - /*! gathers the hermite curve starting with i'th vertex of itime'th timestep */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, size_t itime) const - { - p0 = vertex (i+0,itime); - p1 = vertex (i+1,itime); - t0 = tangent(i+0,itime); - t1 = tangent(i+1,itime); - } - - /*! loads curve vertices for specified time */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - const float f0 = 1.0f - ftime, f1 = ftime; - Vec3ff ap0,at0,ap1,at1; - gather_hermite(ap0,at0,ap1,at1,i,itime); - Vec3ff bp0,bt0,bp1,bt1; - gather_hermite(bp0,bt0,bp1,bt1,i,itime+1); - p0 = madd(Vec3ff(f0),ap0,f1*bp0); - t0 = madd(Vec3ff(f0),at0,f1*bt0); - p1 = madd(Vec3ff(f0),ap1,f1*bp1); - t1 = madd(Vec3ff(f0),at1,f1*bt1); - } - - /*! gathers the hermite curve starting with i'th vertex */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i) const - { - p0 = vertex (i+0); - p1 = vertex (i+1); - t0 = tangent(i+0); - t1 = tangent(i+1); - n0 = normal(i+0); - n1 = normal(i+1); - dn0 = dnormal(i+0); - dn1 = dnormal(i+1); - } - - /*! gathers the hermite curve starting with i'th vertex of itime'th timestep */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, size_t itime) const - { - p0 = vertex (i+0,itime); - p1 = vertex (i+1,itime); - t0 = tangent(i+0,itime); - t1 = tangent(i+1,itime); - n0 = normal(i+0,itime); - n1 = normal(i+1,itime); - dn0 = dnormal(i+0,itime); - dn1 = dnormal(i+1,itime); - } - - /*! loads curve vertices for specified time */ - __forceinline void gather_hermite(Vec3ff& p0, Vec3fa& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3fa& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - const float f0 = 1.0f - ftime, f1 = ftime; - Vec3ff ap0,at0,ap1,at1; Vec3fa an0,adn0,an1,adn1; - gather_hermite(ap0,at0,an0,adn0,ap1,at1,an1,adn1,i,itime); - Vec3ff bp0,bt0,bp1,bt1; Vec3fa bn0,bdn0,bn1,bdn1; - gather_hermite(bp0,bt0,bn0,bdn0,bp1,bt1,bn1,bdn1,i,itime+1); - p0 = madd(Vec3ff(f0),ap0,f1*bp0); - t0 = madd(Vec3ff(f0),at0,f1*bt0); - n0 = madd(Vec3ff(f0),an0,f1*bn0); - dn0= madd(Vec3ff(f0),adn0,f1*bdn0); - p1 = madd(Vec3ff(f0),ap1,f1*bp1); - t1 = madd(Vec3ff(f0),at1,f1*bt1); - n1 = madd(Vec3ff(f0),an1,f1*bn1); - dn1= madd(Vec3ff(f0),adn1,f1*bdn1); - } - - template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> - __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const - { - Vec3ff v0,t0,v1,t1; Vec3fa n0,dn0,n1,dn1; - unsigned int vertexID = curve(primID); - gather_hermite(v0,t0,n0,dn0,v1,t1,n1,dn1,vertexID,itime); - - SourceCurve3ff ccurve(v0,t0,v1,t1); - SourceCurve3fa ncurve(n0,dn0,n1,dn1); - ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve); - return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); - } - - template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> - __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const - { - float ftime; - const size_t itime = timeSegment(time, ftime); - const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0); - const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1); - return clerp(curve0,curve1,ftime); - } - - private: - void resizeBuffers(unsigned int numSteps); - - public: - BufferView<unsigned int> curves; //!< array of curve indices - BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer - BufferView<Vec3fa> normals0; //!< fast access to first normal buffer - BufferView<Vec3ff> tangents0; //!< fast access to first tangent buffer - BufferView<Vec3fa> dnormals0; //!< fast access to first normal derivative buffer - vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep - vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep - vector<BufferView<Vec3ff>> tangents; //!< tangent array for each timestep - vector<BufferView<Vec3fa>> dnormals; //!< normal derivative array for each timestep - BufferView<char> flags; //!< start, end flag per segment - vector<BufferView<char>> vertexAttribs; //!< user buffers - int tessellationRate; //!< tessellation rate for flat curve - float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii - }; - - DECLARE_ISA_FUNCTION(CurveGeometry*, createCurves, Device* COMMA Geometry::GType); -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h deleted file mode 100644 index 69cf612275..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bezier_curve.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurveBezierCurveInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurveBezierCurveInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurveBezierCurveInterector4iMB(VirtualCurveIntersector &prim); -#if defined(__AVX__) - void AddVirtualCurveBezierCurveInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurveBezierCurveInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurveBezierCurveInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h deleted file mode 100644 index d37e41098e..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_bspline_curve.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurveBSplineCurveInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurveBSplineCurveInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurveBSplineCurveInterector4iMB(VirtualCurveIntersector &prim); -#if defined(__AVX__) - void AddVirtualCurveBSplineCurveInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurveBSplineCurveInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurveBSplineCurveInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h deleted file mode 100644 index a133a11d63..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_catmullrom_curve.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurveCatmullRomCurveInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurveCatmullRomCurveInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurveCatmullRomCurveInterector4iMB(VirtualCurveIntersector &prim); -#if defined(__AVX__) - void AddVirtualCurveCatmullRomCurveInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurveCatmullRomCurveInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurveCatmullRomCurveInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h deleted file mode 100644 index 9aec35da45..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_hermite_curve.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurveHermiteCurveInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurveHermiteCurveInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurveHermiteCurveInterector4iMB(VirtualCurveIntersector &prim); -#if defined(__AVX__) - void AddVirtualCurveHermiteCurveInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurveHermiteCurveInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurveHermiteCurveInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h deleted file mode 100644 index dd37d194f5..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_linear_curve.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurveLinearCurveInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurveLinearCurveInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurveLinearCurveInterector4iMB(VirtualCurveIntersector &prim); -#if defined(__AVX__) - void AddVirtualCurveLinearCurveInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurveLinearCurveInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurveLinearCurveInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h b/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h deleted file mode 100644 index fe5ceed840..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual_point.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2020 Light Transport Entertainment Inc. -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "curve_intersector_virtual.h" - -namespace embree -{ - namespace isa - { - void AddVirtualCurvePointInterector4i(VirtualCurveIntersector &prim); - void AddVirtualCurvePointInterector4v(VirtualCurveIntersector &prim); - void AddVirtualCurvePointInterector4iMB(VirtualCurveIntersector &prim); - -#if defined (__AVX__) - void AddVirtualCurvePointInterector8i(VirtualCurveIntersector &prim); - void AddVirtualCurvePointInterector8v(VirtualCurveIntersector &prim); - void AddVirtualCurvePointInterector8iMB(VirtualCurveIntersector &prim); -#endif - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h deleted file mode 100644 index f65b4abf61..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_moeller.h +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "subgrid.h" -#include "quad_intersector_moeller.h" - -namespace embree -{ - namespace isa - { - - /* ----------------------------- */ - /* -- single ray intersectors -- */ - /* ----------------------------- */ - - template<int M> - __forceinline void interpolateUV(MoellerTrumboreHitM<M> &hit,const GridMesh::Grid &g, const SubGrid& subgrid) - { - /* correct U,V interpolation across the entire grid */ - const vint<M> sx((int)subgrid.x()); - const vint<M> sy((int)subgrid.y()); - const vint<M> sxM(sx + vint<M>(0,1,1,0)); - const vint<M> syM(sy + vint<M>(0,0,1,1)); - const float inv_resX = rcp((float)((int)g.resX-1)); - const float inv_resY = rcp((float)((int)g.resY-1)); - hit.U = (hit.U + (vfloat<M>)sxM * hit.absDen) * inv_resX; - hit.V = (hit.V + (vfloat<M>)syM * hit.absDen) * inv_resY; - } - - template<int M, bool filter> - struct SubGridQuadMIntersector1MoellerTrumbore; - - template<int M, bool filter> - struct SubGridQuadMIntersector1MoellerTrumbore - { - __forceinline SubGridQuadMIntersector1MoellerTrumbore() {} - - __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} - - __forceinline void intersect(RayHit& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - MoellerTrumboreHitM<M> hit; - MoellerTrumboreIntersector1<M> intersector(ray,nullptr); - Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); - - /* intersect first triangle */ - if (intersector.intersect(ray,v0,v1,v3,hit)) - { - interpolateUV<M>(hit,g,subgrid); - epilog(hit.valid,hit); - } - - /* intersect second triangle */ - if (intersector.intersect(ray,v2,v3,v1,hit)) - { - hit.U = hit.absDen - hit.U; - hit.V = hit.absDen - hit.V; - interpolateUV<M>(hit,g,subgrid); - epilog(hit.valid,hit); - } - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - MoellerTrumboreHitM<M> hit; - MoellerTrumboreIntersector1<M> intersector(ray,nullptr); - Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); - - /* intersect first triangle */ - if (intersector.intersect(ray,v0,v1,v3,hit)) - { - interpolateUV<M>(hit,g,subgrid); - if (epilog(hit.valid,hit)) - return true; - } - - /* intersect second triangle */ - if (intersector.intersect(ray,v2,v3,v1,hit)) - { - hit.U = hit.absDen - hit.U; - hit.V = hit.absDen - hit.V; - interpolateUV<M>(hit,g,subgrid); - if (epilog(hit.valid,hit)) - return true; - } - return false; - } - }; - -#if defined (__AVX__) - - /*! Intersects 4 quads with 1 ray using AVX */ - template<bool filter> - struct SubGridQuadMIntersector1MoellerTrumbore<4,filter> - { - __forceinline SubGridQuadMIntersector1MoellerTrumbore() {} - - __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const - { - const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); - const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); -#else - const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); - const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); -#endif - MoellerTrumboreHitM<8> hit; - MoellerTrumboreIntersector1<8> intersector(ray,nullptr); - const vbool8 flags(0,0,0,0,1,1,1,1); - if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit))) - { - vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen; - -#if !defined(EMBREE_BACKFACE_CULLING) - hit.U = select(flags,absDen-V,U); - hit.V = select(flags,absDen-U,V); - hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); -#else - hit.U = select(flags,absDen-U,U); - hit.V = select(flags,absDen-V,V); -#endif - /* correct U,V interpolation across the entire grid */ - const vint8 sx((int)subgrid.x()); - const vint8 sy((int)subgrid.y()); - const vint8 sx8(sx + vint8(0,1,1,0,0,1,1,0)); - const vint8 sy8(sy + vint8(0,0,1,1,0,0,1,1)); - const float inv_resX = rcp((float)((int)g.resX-1)); - const float inv_resY = rcp((float)((int)g.resY-1)); - hit.U = (hit.U + (vfloat8)sx8 * absDen) * inv_resX; - hit.V = (hit.V + (vfloat8)sy8 * absDen) * inv_resY; - - if (unlikely(epilog(hit.valid,hit))) - return true; - } - return false; - } - - __forceinline bool intersect(RayHit& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); - } - }; - -#endif - - // ============================================================================================================================ - // ============================================================================================================================ - // ============================================================================================================================ - - - /* ----------------------------- */ - /* -- ray packet intersectors -- */ - /* ----------------------------- */ - - template<int K> - struct SubGridQuadHitK - { - __forceinline SubGridQuadHitK(const vfloat<K>& U, - const vfloat<K>& V, - const vfloat<K>& T, - const vfloat<K>& absDen, - const Vec3vf<K>& Ng, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid& subgrid, - const unsigned int i) - : U(U), V(V), T(T), absDen(absDen), flags(flags), tri_Ng(Ng), g(g), subgrid(subgrid), i(i) {} - - __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const - { - const vfloat<K> rcpAbsDen = rcp(absDen); - const vfloat<K> t = T * rcpAbsDen; - const vfloat<K> u0 = min(U * rcpAbsDen,1.0f); - const vfloat<K> v0 = min(V * rcpAbsDen,1.0f); - const vfloat<K> u1 = vfloat<K>(1.0f) - u0; - const vfloat<K> v1 = vfloat<K>(1.0f) - v0; - const vfloat<K> uu = select(flags,u1,u0); - const vfloat<K> vv = select(flags,v1,v0); - const unsigned int sx = subgrid.x() + (unsigned int)(i % 2); - const unsigned int sy = subgrid.y() + (unsigned int)(i >>1); - const float inv_resX = rcp((float)(int)(g.resX-1)); - const float inv_resY = rcp((float)(int)(g.resY-1)); - const vfloat<K> u = (uu + (float)(int)sx) * inv_resX; - const vfloat<K> v = (vv + (float)(int)sy) * inv_resY; - const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); - return std::make_tuple(u,v,t,Ng); - } - - private: - const vfloat<K> U; - const vfloat<K> V; - const vfloat<K> T; - const vfloat<K> absDen; - const vbool<K> flags; - const Vec3vf<K> tri_Ng; - - const GridMesh::Grid &g; - const SubGrid& subgrid; - const size_t i; - }; - - template<int M, int K, bool filter> - struct SubGridQuadMIntersectorKMoellerTrumboreBase - { - __forceinline SubGridQuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {} - - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_e1, - const Vec3vf<K>& tri_e2, - const Vec3vf<K>& tri_Ng, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - /* calculate denominator */ - vbool<K> valid = valid0; - const Vec3vf<K> C = tri_v0 - ray.org; - const Vec3vf<K> R = cross(C,ray.dir); - const vfloat<K> den = dot(tri_Ng,ray.dir); - const vfloat<K> absDen = abs(den); - const vfloat<K> sgnDen = signmsk(den); - - /* test against edge p2 p0 */ - const vfloat<K> U = dot(R,tri_e2) ^ sgnDen; - valid &= U >= 0.0f; - if (likely(none(valid))) return false; - - /* test against edge p0 p1 */ - const vfloat<K> V = dot(R,tri_e1) ^ sgnDen; - valid &= V >= 0.0f; - if (likely(none(valid))) return false; - - /* test against edge p1 p2 */ - const vfloat<K> W = absDen-U-V; - valid &= W >= 0.0f; - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<K> T = dot(tri_Ng,C) ^ sgnDen; - valid &= (absDen*ray.tnear() < T) & (T <= absDen*ray.tfar); - if (unlikely(none(valid))) return false; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - valid &= den < vfloat<K>(zero); - if (unlikely(none(valid))) return false; -#else - valid &= den != vfloat<K>(zero); - if (unlikely(none(valid))) return false; -#endif - - /* calculate hit information */ - SubGridQuadHitK<K> hit(U,V,T,absDen,tri_Ng,flags,g,subgrid,i); - return epilog(valid,hit); - } - - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_v1, - const Vec3vf<K>& tri_v2, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - const Vec3vf<K> e1 = tri_v0-tri_v1; - const Vec3vf<K> e2 = tri_v2-tri_v0; - const Vec3vf<K> Ng = cross(e2,e1); - return intersectK(valid0,ray,tri_v0,e1,e2,Ng,flags,g,subgrid,i,epilog); - } - - template<typename Epilog> - __forceinline bool intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& v0, - const Vec3vf<K>& v1, - const Vec3vf<K>& v2, - const Vec3vf<K>& v3, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),g,subgrid,i,epilog); - if (none(valid0)) return true; - intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),g,subgrid,i,epilog); - return none(valid0); - } - - static __forceinline bool intersect1(RayK<K>& ray, - size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - const Vec3vf<M>& tri_Ng, - MoellerTrumboreHitM<M> &hit) - { - /* calculate denominator */ - const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); - const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); - const Vec3vf<M> C = Vec3vf<M>(tri_v0) - O; - const Vec3vf<M> R = cross(C,D); - const vfloat<M> den = dot(Vec3vf<M>(tri_Ng),D); - const vfloat<M> absDen = abs(den); - const vfloat<M> sgnDen = signmsk(den); - - /* perform edge tests */ - const vfloat<M> U = dot(R,Vec3vf<M>(tri_e2)) ^ sgnDen; - const vfloat<M> V = dot(R,Vec3vf<M>(tri_e1)) ^ sgnDen; - - /* perform backface culling */ -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = (den < vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#else - vbool<M> valid = (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); -#endif - if (likely(none(valid))) return false; - - /* perform depth test */ - const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; - valid &= (absDen*vfloat<M>(ray.tnear()[k]) < T) & (T <= absDen*vfloat<M>(ray.tfar[k])); - if (likely(none(valid))) return false; - - /* calculate hit information */ - new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng); - return true; - } - - static __forceinline bool intersect1(RayK<K>& ray, - size_t k, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - MoellerTrumboreHitM<M> &hit) - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - const Vec3vf<M> Ng = cross(e2,e1); - return intersect1(ray,k,v0,e1,e2,Ng,hit); - } - - }; - - template<int M, int K, bool filter> - struct SubGridQuadMIntersectorKMoellerTrumbore : public SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter> - { - __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) - : SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {} - - __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); - - MoellerTrumboreHitM<4> hit; - if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,hit)) - { - interpolateUV<M>(hit,g,subgrid); - epilog(hit.valid,hit); - } - - if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,hit)) - { - hit.U = hit.absDen - hit.U; - hit.V = hit.absDen - hit.V; - interpolateUV<M>(hit,g,subgrid); - epilog(hit.valid,hit); - } - - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); - - MoellerTrumboreHitM<4> hit; - if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,hit)) - { - interpolateUV<M>(hit,g,subgrid); - if (epilog(hit.valid,hit)) return true; - } - - if (SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,hit)) - { - hit.U = hit.absDen - hit.U; - hit.V = hit.absDen - hit.V; - interpolateUV<M>(hit,g,subgrid); - if (epilog(hit.valid,hit)) return true; - } - return false; - } - }; - - -#if defined (__AVX__) - - /*! Intersects 4 quads with 1 ray using AVX */ - template<int K, bool filter> - struct SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> : public SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter> - { - __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) - : SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {} - - template<typename Epilog> - __forceinline bool intersect1(RayK<K>& ray, size_t k,const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const GridMesh::Grid &g, const SubGrid &subgrid, const Epilog& epilog) const - { - const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); - const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); -#else - const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); - const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); -#endif - const vbool8 flags(0,0,0,0,1,1,1,1); - - MoellerTrumboreHitM<8> hit; - if (SubGridQuadMIntersectorKMoellerTrumboreBase<8,K,filter>::intersect1(ray,k,vtx0,vtx1,vtx2,hit)) - { - vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen; -#if !defined(EMBREE_BACKFACE_CULLING) - hit.U = select(flags,absDen-V,U); - hit.V = select(flags,absDen-U,V); - hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); -#else - hit.U = select(flags,absDen-U,U); - hit.V = select(flags,absDen-V,V); -#endif - - /* correct U,V interpolation across the entire grid */ - const vint8 sx((int)subgrid.x()); - const vint8 sy((int)subgrid.y()); - const vint8 sx8(sx + vint8(0,1,1,0,0,1,1,0)); - const vint8 sy8(sy + vint8(0,0,1,1,0,0,1,1)); - const float inv_resX = rcp((float)((int)g.resX-1)); - const float inv_resY = rcp((float)((int)g.resY-1)); - hit.U = (hit.U + (vfloat8)sx8 * absDen) * inv_resX; - hit.V = (hit.V + (vfloat8)sy8 * absDen) * inv_resY; - if (unlikely(epilog(hit.valid,hit))) - return true; - - } - return false; - } - - __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Intersect1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID())); - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Occluded1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID())); - } - }; - -#endif - - - - } -} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h b/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h deleted file mode 100644 index 1cd88aa799..0000000000 --- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector_pluecker.h +++ /dev/null @@ -1,508 +0,0 @@ -// Copyright 2009-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#pragma once - -#include "subgrid.h" -#include "quad_intersector_moeller.h" -#include "quad_intersector_pluecker.h" - -namespace embree -{ - namespace isa - { - - template<int M> - struct SubGridQuadHitPlueckerM - { - __forceinline SubGridQuadHitPlueckerM() {} - - __forceinline SubGridQuadHitPlueckerM(const vbool<M>& valid, - const vfloat<M>& U, - const vfloat<M>& V, - const vfloat<M>& UVW, - const vfloat<M>& t, - const Vec3vf<M>& Ng, - const vbool<M>& flags) : valid(valid), vt(t) - { - const vbool<M> invalid = abs(UVW) < min_rcp_input; - const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW)); - const vfloat<M> u = min(U * rcpUVW,1.0f); - const vfloat<M> v = min(V * rcpUVW,1.0f); - const vfloat<M> u1 = vfloat<M>(1.0f) - u; - const vfloat<M> v1 = vfloat<M>(1.0f) - v; -#if !defined(__AVX__) || defined(EMBREE_BACKFACE_CULLING) - vu = select(flags,u1,u); - vv = select(flags,v1,v); - vNg = Vec3vf<M>(Ng.x,Ng.y,Ng.z); -#else - const vfloat<M> flip = select(flags,vfloat<M>(-1.0f),vfloat<M>(1.0f)); - vv = select(flags,u1,v); - vu = select(flags,v1,u); - vNg = Vec3vf<M>(flip*Ng.x,flip*Ng.y,flip*Ng.z); -#endif - } - - __forceinline void finalize() - { - } - - __forceinline Vec2f uv(const size_t i) - { - const float u = vu[i]; - const float v = vv[i]; - return Vec2f(u,v); - } - - __forceinline float t(const size_t i) { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - - public: - vbool<M> valid; - vfloat<M> vu; - vfloat<M> vv; - vfloat<M> vt; - Vec3vf<M> vNg; - }; - - template<int M> - __forceinline void interpolateUV(SubGridQuadHitPlueckerM<M> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const vint<M> &stepX, const vint<M> &stepY) - { - /* correct U,V interpolation across the entire grid */ - const vint<M> sx((int)subgrid.x()); - const vint<M> sy((int)subgrid.y()); - const vint<M> sxM(sx + stepX); - const vint<M> syM(sy + stepY); - const float inv_resX = rcp((float)((int)g.resX-1)); - const float inv_resY = rcp((float)((int)g.resY-1)); - hit.vu = (hit.vu + vfloat<M>(sxM)) * inv_resX; - hit.vv = (hit.vv + vfloat<M>(syM)) * inv_resY; - } - - template<int M> - __forceinline static bool intersectPluecker(Ray& ray, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_v1, - const Vec3vf<M>& tri_v2, - const vbool<M>& flags, - SubGridQuadHitPlueckerM<M> &hit) - { - /* calculate vertices relative to ray origin */ - const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); - const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); - const Vec3vf<M> v0 = tri_v0-O; - const Vec3vf<M> v1 = tri_v1-O; - const Vec3vf<M> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<M> e0 = v2-v0; - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<M> U = dot(cross(e0,v2+v0),D); - const vfloat<M> V = dot(cross(e1,v0+v1),D); - const vfloat<M> W = dot(cross(e2,v1+v2),D); - const vfloat<M> UVW = U+V+W; - const vfloat<M> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = max(U,V,W) <= eps; -#else - vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<M> den = twice(dot(Ng,D)); - - /* perform depth test */ - const vfloat<M> T = twice(dot(v0,Ng)); - const vfloat<M> t = rcp(den)*T; - valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar); - valid &= den != vfloat<M>(zero); - if (unlikely(none(valid))) return false; - - /* update hit information */ - new (&hit) SubGridQuadHitPlueckerM<M>(valid,U,V,UVW,t,Ng,flags); - return true; - } - - template<int M, bool filter> - struct SubGridQuadMIntersector1Pluecker; - - template<int M, bool filter> - struct SubGridQuadMIntersector1Pluecker - { - __forceinline SubGridQuadMIntersector1Pluecker() {} - - __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} - - __forceinline void intersect(RayHit& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - SubGridQuadHitPlueckerM<M> hit; - Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); - - /* intersect first triangle */ - if (intersectPluecker(ray,v0,v1,v3,vbool<M>(false),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - epilog(hit.valid,hit); - } - - /* intersect second triangle */ - if (intersectPluecker(ray,v2,v3,v1,vbool<M>(true),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - epilog(hit.valid,hit); - } - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - SubGridQuadHitPlueckerM<M> hit; - Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); - - /* intersect first triangle */ - if (intersectPluecker(ray,v0,v1,v3,vbool<M>(false),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - if (epilog(hit.valid,hit)) - return true; - } - - /* intersect second triangle */ - if (intersectPluecker(ray,v2,v3,v1,vbool<M>(true),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - if (epilog(hit.valid,hit)) - return true; - } - - return false; - } - }; - -#if defined (__AVX__) - - /*! Intersects 4 quads with 1 ray using AVX */ - template<bool filter> - struct SubGridQuadMIntersector1Pluecker<4,filter> - { - __forceinline SubGridQuadMIntersector1Pluecker() {} - - __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const - { - const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); - const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); -#else - const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); - const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); -#endif - SubGridQuadHitPlueckerM<8> hit; - const vbool8 flags(0,0,0,0,1,1,1,1); - if (unlikely(intersectPluecker(ray,vtx0,vtx1,vtx2,flags,hit))) - { - /* correct U,V interpolation across the entire grid */ - interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1)); - if (unlikely(epilog(hit.valid,hit))) - return true; - } - return false; - } - - __forceinline bool intersect(RayHit& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const GridMesh::Grid &g, const SubGrid& subgrid) const - { - return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); - } - }; - -#endif - - - /* ----------------------------- */ - /* -- ray packet intersectors -- */ - /* ----------------------------- */ - - template<int K> - struct SubGridQuadHitPlueckerK - { - __forceinline SubGridQuadHitPlueckerK(const vfloat<K>& U, - const vfloat<K>& V, - const vfloat<K>& UVW, - const vfloat<K>& t, - const Vec3vf<K>& Ng, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid& subgrid, - const unsigned int i) - : U(U), V(V), UVW(UVW), t(t), flags(flags), tri_Ng(Ng), g(g), subgrid(subgrid), i(i) {} - - __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const - { - const vbool<K> invalid = abs(UVW) < min_rcp_input; - const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW)); - const vfloat<K> u0 = min(U * rcpUVW,1.0f); - const vfloat<K> v0 = min(V * rcpUVW,1.0f); - const vfloat<K> u1 = vfloat<K>(1.0f) - u0; - const vfloat<K> v1 = vfloat<K>(1.0f) - v0; - const vfloat<K> uu = select(flags,u1,u0); - const vfloat<K> vv = select(flags,v1,v0); - const unsigned int sx = subgrid.x() + (unsigned int)(i % 2); - const unsigned int sy = subgrid.y() + (unsigned int)(i >>1); - const float inv_resX = rcp((float)(int)(g.resX-1)); - const float inv_resY = rcp((float)(int)(g.resY-1)); - const vfloat<K> u = (uu + (float)(int)sx) * inv_resX; - const vfloat<K> v = (vv + (float)(int)sy) * inv_resY; - const Vec3vf<K> Ng(tri_Ng.x,tri_Ng.y,tri_Ng.z); - return std::make_tuple(u,v,t,Ng); - } - - private: - const vfloat<K> U; - const vfloat<K> V; - const vfloat<K> UVW; - const vfloat<K> t; - const vfloat<K> absDen; - const vbool<K> flags; - const Vec3vf<K> tri_Ng; - - const GridMesh::Grid &g; - const SubGrid& subgrid; - const size_t i; - }; - - - template<int M, int K, bool filter> - struct SubGridQuadMIntersectorKPlueckerBase - { - __forceinline SubGridQuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {} - - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_v1, - const Vec3vf<K>& tri_v2, - const Vec3vf<K>& tri_Ng, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - /* calculate denominator */ - /* calculate vertices relative to ray origin */ - vbool<K> valid = valid0; - const Vec3vf<K> O = ray.org; - const Vec3vf<K> D = ray.dir; - const Vec3vf<K> v0 = tri_v0-O; - const Vec3vf<K> v1 = tri_v1-O; - const Vec3vf<K> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<K> e0 = v2-v0; - const Vec3vf<K> e1 = v0-v1; - const Vec3vf<K> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<K> U = dot(Vec3vf<K>(cross(e0,v2+v0)),D); - const vfloat<K> V = dot(Vec3vf<K>(cross(e1,v0+v1)),D); - const vfloat<K> W = dot(Vec3vf<K>(cross(e2,v1+v2)),D); - const vfloat<K> UVW = U+V+W; - const vfloat<K> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - valid &= max(U,V,W) <= eps; -#else - valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<K> den = twice(dot(Vec3vf<K>(Ng),D)); - - /* perform depth test */ - const vfloat<K> T = twice(dot(v0,Vec3vf<K>(Ng))); - const vfloat<K> t = rcp(den)*T; - valid &= ray.tnear() <= t & t <= ray.tfar; - valid &= den != vfloat<K>(zero); - if (unlikely(none(valid))) return false; - - /* calculate hit information */ - SubGridQuadHitPlueckerK<K> hit(U,V,UVW,t,tri_Ng,flags,g,subgrid,i); - return epilog(valid,hit); - } - - template<typename Epilog> - __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& v0, - const Vec3vf<K>& v1, - const Vec3vf<K>& v2, - const vbool<K>& flags, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - const Vec3vf<K> e1 = v0-v1; - const Vec3vf<K> e2 = v2-v0; - const Vec3vf<K> Ng = cross(e2,e1); - return intersectK(valid0,ray,v0,v1,v2,Ng,flags,g,subgrid,i,epilog); - } - - template<typename Epilog> - __forceinline bool intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& v0, - const Vec3vf<K>& v1, - const Vec3vf<K>& v2, - const Vec3vf<K>& v3, - const GridMesh::Grid &g, - const SubGrid &subgrid, - const unsigned int i, - const Epilog& epilog) const - { - intersectK(valid0,ray,v0,v1,v3,vbool<K>(false),g,subgrid,i,epilog); - if (none(valid0)) return true; - intersectK(valid0,ray,v2,v3,v1,vbool<K>(true ),g,subgrid,i,epilog); - return none(valid0); - } - - static __forceinline bool intersect1(RayK<K>& ray, - size_t k, - const Vec3vf<M>& tri_v0, - const Vec3vf<M>& tri_v1, - const Vec3vf<M>& tri_v2, - const Vec3vf<M>& tri_Ng, - const vbool<M>& flags, - SubGridQuadHitPlueckerM<M> &hit) - { - /* calculate vertices relative to ray origin */ - const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); - const Vec3vf<M> D = broadcast<vfloat<M>>(ray.dir,k); - const Vec3vf<M> v0 = tri_v0-O; - const Vec3vf<M> v1 = tri_v1-O; - const Vec3vf<M> v2 = tri_v2-O; - - /* calculate triangle edges */ - const Vec3vf<M> e0 = v2-v0; - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v1-v2; - - /* perform edge tests */ - const vfloat<M> U = dot(cross(e0,v2+v0),D); - const vfloat<M> V = dot(cross(e1,v0+v1),D); - const vfloat<M> W = dot(cross(e2,v1+v2),D); - const vfloat<M> UVW = U+V+W; - const vfloat<M> eps = float(ulp)*abs(UVW); -#if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = max(U,V,W) <= eps ; -#else - vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif - if (unlikely(none(valid))) return false; - - /* calculate geometry normal and denominator */ - const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); - const vfloat<M> den = twice(dot(Ng,D)); - - /* perform depth test */ - const vfloat<M> T = twice(dot(v0,Ng)); - const vfloat<M> t = rcp(den)*T; - valid &= vfloat<M>(ray.tnear()[k]) <= t & t <= vfloat<M>(ray.tfar[k]); - if (unlikely(none(valid))) return false; - - /* avoid division by 0 */ - valid &= den != vfloat<M>(zero); - if (unlikely(none(valid))) return false; - - /* update hit information */ - new (&hit) SubGridQuadHitPlueckerM<M>(valid,U,V,UVW,t,tri_Ng,flags); - return true; - } - - static __forceinline bool intersect1(RayK<K>& ray, - size_t k, - const Vec3vf<M>& v0, - const Vec3vf<M>& v1, - const Vec3vf<M>& v2, - const vbool<M>& flags, - SubGridQuadHitPlueckerM<M> &hit) - { - const Vec3vf<M> e1 = v0-v1; - const Vec3vf<M> e2 = v2-v0; - const Vec3vf<M> Ng = cross(e2,e1); // FIXME: optimize!!! - return intersect1(ray,k,v0,v1,v2,Ng,flags,hit); - } - - }; - - template<int M, int K, bool filter> - struct SubGridQuadMIntersectorKPluecker : public SubGridQuadMIntersectorKPlueckerBase<M,K,filter> - { - __forceinline SubGridQuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) - : SubGridQuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {} - - __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); - - SubGridQuadHitPlueckerM<4> hit; - if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,vboolf4(false),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - epilog(hit.valid,hit); - } - - if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,vboolf4(true),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - epilog(hit.valid,hit); - } - - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const - { - Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); - - SubGridQuadHitPlueckerM<4> hit; - if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v0,v1,v3,vboolf4(false),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - if (epilog(hit.valid,hit)) return true; - } - - if (SubGridQuadMIntersectorKPlueckerBase<4,K,filter>::intersect1(ray,k,v2,v3,v1,vboolf4(true),hit)) - { - interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); - if (epilog(hit.valid,hit)) return true; - } - return false; - } - }; - - } -} diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_any_of.h b/thirdparty/embree/common/algorithms/parallel_any_of.h index 01f1f80f6c..a64e4a1889 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_any_of.h +++ b/thirdparty/embree/common/algorithms/parallel_any_of.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_filter.h b/thirdparty/embree/common/algorithms/parallel_filter.h index 5823fc631f..090ef164c2 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_filter.h +++ b/thirdparty/embree/common/algorithms/parallel_filter.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_for.h b/thirdparty/embree/common/algorithms/parallel_for.h index 51d296fb16..645681ac63 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_for.h +++ b/thirdparty/embree/common/algorithms/parallel_for.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -8,12 +8,6 @@ #include "../math/math.h" #include "../math/range.h" -#if defined(TASKING_GCD) && defined(BUILD_IOS) -#include <dispatch/dispatch.h> -#include <algorithm> -#include <type_traits> -#endif - namespace embree { /* parallel_for without range */ @@ -29,28 +23,10 @@ namespace embree if (!TaskScheduler::wait()) // -- GODOT start -- // throw std::runtime_error("task cancelled"); - abort(); + abort(); // -- GODOT end -- } -#elif defined(TASKING_GCD) && defined(BUILD_IOS) - - const size_t baselineNumBlocks = (TaskScheduler::threadCount() > 1)? TaskScheduler::threadCount() : 1; - const size_t length = N; - const size_t blockSize = (length + baselineNumBlocks-1) / baselineNumBlocks; - const size_t numBlocks = (length + blockSize-1) / blockSize; - - dispatch_apply(numBlocks, DISPATCH_APPLY_AUTO, ^(size_t currentBlock) { - - const size_t start = (currentBlock * blockSize); - const size_t blockLength = std::min(length - start, blockSize); - const size_t end = start + blockLength; - - for(size_t i=start; i < end; i++) - { - func(i); - } - }); - + #elif defined(TASKING_TBB) #if TBB_INTERFACE_VERSION >= 12002 tbb::task_group_context context; @@ -60,7 +36,7 @@ namespace embree if (context.is_group_execution_cancelled()) // -- GODOT start -- // throw std::runtime_error("task cancelled"); - abort(); + abort(); // -- GODOT end -- #else tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { @@ -69,7 +45,7 @@ namespace embree if (tbb::task::self().is_cancelled()) // -- GODOT start -- // throw std::runtime_error("task cancelled"); - abort(); + abort(); // -- GODOT end -- #endif @@ -92,28 +68,9 @@ namespace embree if (!TaskScheduler::wait()) // -- GODOT start -- // throw std::runtime_error("task cancelled"); - abort(); + abort(); // -- GODOT end -- -#elif defined(TASKING_GCD) && defined(BUILD_IOS) - - const size_t baselineNumBlocks = (TaskScheduler::threadCount() > 1)? 4*TaskScheduler::threadCount() : 1; - const size_t length = last - first; - const size_t blockSizeByThreads = (length + baselineNumBlocks-1) / baselineNumBlocks; - size_t blockSize = std::max<size_t>(minStepSize,blockSizeByThreads); - blockSize += blockSize % 4; - - const size_t numBlocks = (length + blockSize-1) / blockSize; - - dispatch_apply(numBlocks, DISPATCH_APPLY_AUTO, ^(size_t currentBlock) { - - const size_t start = first + (currentBlock * blockSize); - const size_t end = std::min<size_t>(last, start + blockSize); - - func( embree::range<Index>(start,end) ); - }); - - #elif defined(TASKING_TBB) #if TBB_INTERFACE_VERSION >= 12002 tbb::task_group_context context; @@ -123,7 +80,7 @@ namespace embree if (context.is_group_execution_cancelled()) // -- GODOT start -- // throw std::runtime_error("task cancelled"); - abort(); + abort(); // -- GODOT end -- #else tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) { @@ -132,7 +89,7 @@ namespace embree if (tbb::task::self().is_cancelled()) // -- GODOT start -- // throw std::runtime_error("task cancelled"); - abort(); + abort(); // -- GODOT end -- #endif @@ -167,7 +124,7 @@ namespace embree if (context.is_group_execution_cancelled()) // -- GODOT start -- // throw std::runtime_error("task cancelled"); - abort(); + abort(); // -- GODOT end -- #else tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { @@ -176,7 +133,7 @@ namespace embree if (tbb::task::self().is_cancelled()) // -- GODOT start -- // throw std::runtime_error("task cancelled"); - abort(); + abort(); // -- GODOT end -- #endif } @@ -192,10 +149,10 @@ namespace embree func(i); },ap,context); if (context.is_group_execution_cancelled()) - // -- GODOT start -- - // throw std::runtime_error("task cancelled"); - abort(); - // -- GODOT end -- + // -- GODOT start -- + // throw std::runtime_error("task cancelled"); + abort(); + // -- GODOT end -- #else tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { func(i); @@ -203,7 +160,7 @@ namespace embree if (tbb::task::self().is_cancelled()) // -- GODOT start -- // throw std::runtime_error("task cancelled"); - abort(); + abort(); // -- GODOT end -- #endif } diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_for_for.h b/thirdparty/embree/common/algorithms/parallel_for_for.h index 852b8a0900..92c37a4a38 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_for_for.h +++ b/thirdparty/embree/common/algorithms/parallel_for_for.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_for_for_prefix_sum.h b/thirdparty/embree/common/algorithms/parallel_for_for_prefix_sum.h index d2671d8a6a..b15b44a991 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_for_for_prefix_sum.h +++ b/thirdparty/embree/common/algorithms/parallel_for_for_prefix_sum.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_map.h b/thirdparty/embree/common/algorithms/parallel_map.h index 02e1a8f8d0..15c098fe20 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_map.h +++ b/thirdparty/embree/common/algorithms/parallel_map.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_partition.h b/thirdparty/embree/common/algorithms/parallel_partition.h index 3b3ad7c854..a1cbdc8e04 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_partition.h +++ b/thirdparty/embree/common/algorithms/parallel_partition.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h b/thirdparty/embree/common/algorithms/parallel_prefix_sum.h index 117c7a79b0..208bb4e480 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_prefix_sum.h +++ b/thirdparty/embree/common/algorithms/parallel_prefix_sum.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_reduce.h b/thirdparty/embree/common/algorithms/parallel_reduce.h index 0daf94e50e..8271372ea4 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_reduce.h +++ b/thirdparty/embree/common/algorithms/parallel_reduce.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -43,7 +43,7 @@ namespace embree template<typename Index, typename Value, typename Func, typename Reduction> __forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction ) { -#if defined(TASKING_INTERNAL) || (defined(TASKING_GCD) && defined(BUILD_IOS)) +#if defined(TASKING_INTERNAL) /* fast path for small number of iterations */ Index taskCount = (last-first+minStepSize-1)/minStepSize; diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_set.h b/thirdparty/embree/common/algorithms/parallel_set.h index 640beba7ec..7eae577457 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_set.h +++ b/thirdparty/embree/common/algorithms/parallel_set.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_sort.h b/thirdparty/embree/common/algorithms/parallel_sort.h index a758227c1b..30e56c2bfc 100644 --- a/thirdparty/embree-aarch64/common/algorithms/parallel_sort.h +++ b/thirdparty/embree/common/algorithms/parallel_sort.h @@ -1,13 +1,10 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../simd/simd.h" #include "parallel_for.h" -#if defined(TASKING_GCD) && defined(BUILD_IOS) -#include "../sys/alloc.h" -#endif #include <algorithm> namespace embree @@ -323,7 +320,7 @@ namespace embree #pragma nounroll #endif for (size_t i=startID; i<endID; i++) { -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask; #else const Key index = ((Key)src[i] >> shift) & mask; @@ -385,7 +382,7 @@ namespace embree #endif for (size_t i=startID; i<endID; i++) { const Ty elt = src[i]; -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) const size_t index = ((size_t)(Key)src[i] >> (size_t)shift) & (size_t)mask; #else const size_t index = ((Key)src[i] >> shift) & mask; diff --git a/thirdparty/embree-aarch64/common/lexers/parsestream.h b/thirdparty/embree/common/lexers/parsestream.h index db46dc114f..f65a52cb47 100644 --- a/thirdparty/embree-aarch64/common/lexers/parsestream.h +++ b/thirdparty/embree/common/lexers/parsestream.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/lexers/stream.h b/thirdparty/embree/common/lexers/stream.h index 3f75677e68..a40c15f8eb 100644 --- a/thirdparty/embree-aarch64/common/lexers/stream.h +++ b/thirdparty/embree/common/lexers/stream.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/lexers/streamfilters.h b/thirdparty/embree/common/lexers/streamfilters.h index 25580a77b8..3592b77b03 100644 --- a/thirdparty/embree-aarch64/common/lexers/streamfilters.h +++ b/thirdparty/embree/common/lexers/streamfilters.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/lexers/stringstream.cpp b/thirdparty/embree/common/lexers/stringstream.cpp index 98dc80ad59..a037869506 100644 --- a/thirdparty/embree-aarch64/common/lexers/stringstream.cpp +++ b/thirdparty/embree/common/lexers/stringstream.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "stringstream.h" diff --git a/thirdparty/embree-aarch64/common/lexers/stringstream.h b/thirdparty/embree/common/lexers/stringstream.h index e6dbd4aecc..6d9c27e3cd 100644 --- a/thirdparty/embree-aarch64/common/lexers/stringstream.h +++ b/thirdparty/embree/common/lexers/stringstream.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/lexers/tokenstream.cpp b/thirdparty/embree/common/lexers/tokenstream.cpp index d05be65862..6ed6f2045a 100644 --- a/thirdparty/embree-aarch64/common/lexers/tokenstream.cpp +++ b/thirdparty/embree/common/lexers/tokenstream.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "tokenstream.h" diff --git a/thirdparty/embree-aarch64/common/lexers/tokenstream.h b/thirdparty/embree/common/lexers/tokenstream.h index 72a7b4f2f3..6e49dd0b39 100644 --- a/thirdparty/embree-aarch64/common/lexers/tokenstream.h +++ b/thirdparty/embree/common/lexers/tokenstream.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/math/affinespace.h b/thirdparty/embree/common/math/affinespace.h index 32452fbe72..9d4a0f0846 100644 --- a/thirdparty/embree-aarch64/common/math/affinespace.h +++ b/thirdparty/embree/common/math/affinespace.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/math/bbox.h b/thirdparty/embree/common/math/bbox.h index 29bb13912b..bc43155358 100644 --- a/thirdparty/embree-aarch64/common/math/bbox.h +++ b/thirdparty/embree/common/math/bbox.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -77,7 +77,7 @@ namespace embree return lower > upper; } -#if defined(__SSE__) || defined(__ARM_NEON) +#if defined(__SSE__) template<> __forceinline bool BBox<Vec3fa>::empty() const { return !all(le_mask(lower,upper)); } @@ -228,11 +228,11 @@ namespace embree /// SSE / AVX / MIC specializations //////////////////////////////////////////////////////////////////////////////// -#if defined (__SSE__) || defined(__ARM_NEON) +#if defined __SSE__ #include "../simd/sse.h" #endif -#if defined (__AVX__) +#if defined __AVX__ #include "../simd/avx.h" #endif diff --git a/thirdparty/embree-aarch64/common/math/col3.h b/thirdparty/embree/common/math/col3.h index f52015fb88..3f50c04393 100644 --- a/thirdparty/embree-aarch64/common/math/col3.h +++ b/thirdparty/embree/common/math/col3.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -42,6 +42,6 @@ namespace embree } /*! default template instantiations */ - typedef Col3<uint8_t > Col3uc; + typedef Col3<unsigned char> Col3uc; typedef Col3<float > Col3f; } diff --git a/thirdparty/embree-aarch64/common/math/col4.h b/thirdparty/embree/common/math/col4.h index 90df293f8e..788508516b 100644 --- a/thirdparty/embree-aarch64/common/math/col4.h +++ b/thirdparty/embree/common/math/col4.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -42,6 +42,6 @@ namespace embree } /*! default template instantiations */ - typedef Col4<uint8_t > Col4uc; + typedef Col4<unsigned char> Col4uc; typedef Col4<float > Col4f; } diff --git a/thirdparty/embree-aarch64/common/math/color.h b/thirdparty/embree/common/math/color.h index c3083e4fc0..529584ea16 100644 --- a/thirdparty/embree-aarch64/common/math/color.h +++ b/thirdparty/embree/common/math/color.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -52,17 +52,17 @@ namespace embree __forceinline void set(Col3uc& d) const { vfloat4 s = clamp(vfloat4(m128))*255.0f; - d.r = (uint8_t)(s[0]); - d.g = (uint8_t)(s[1]); - d.b = (uint8_t)(s[2]); + d.r = (unsigned char)(s[0]); + d.g = (unsigned char)(s[1]); + d.b = (unsigned char)(s[2]); } __forceinline void set(Col4uc& d) const { vfloat4 s = clamp(vfloat4(m128))*255.0f; - d.r = (uint8_t)(s[0]); - d.g = (uint8_t)(s[1]); - d.b = (uint8_t)(s[2]); - d.a = (uint8_t)(s[3]); + d.r = (unsigned char)(s[0]); + d.g = (unsigned char)(s[1]); + d.b = (unsigned char)(s[2]); + d.a = (unsigned char)(s[3]); } //////////////////////////////////////////////////////////////////////////////// @@ -114,16 +114,16 @@ namespace embree __forceinline void set(Col3uc& d) const { vfloat4 s = clamp(vfloat4(m128))*255.0f; - d.r = (uint8_t)(s[0]); - d.g = (uint8_t)(s[1]); - d.b = (uint8_t)(s[2]); + d.r = (unsigned char)(s[0]); + d.g = (unsigned char)(s[1]); + d.b = (unsigned char)(s[2]); } __forceinline void set(Col4uc& d) const { vfloat4 s = clamp(vfloat4(m128))*255.0f; - d.r = (uint8_t)(s[0]); - d.g = (uint8_t)(s[1]); - d.b = (uint8_t)(s[2]); + d.r = (unsigned char)(s[0]); + d.g = (unsigned char)(s[1]); + d.b = (unsigned char)(s[2]); d.a = 255; } @@ -152,37 +152,21 @@ namespace embree } __forceinline const Color rcp ( const Color& a ) { -#if defined(__aarch64__) && defined(BUILD_IOS) - __m128 reciprocal = _mm_rcp_ps(a.m128); - reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal); - reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal); - return (const Color)reciprocal; -#else #if defined(__AVX512VL__) const Color r = _mm_rcp14_ps(a.m128); #else const Color r = _mm_rcp_ps(a.m128); #endif return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a)); -#endif //defined(__aarch64__) && defined(BUILD_IOS) } __forceinline const Color rsqrt( const Color& a ) { -#if defined(__aarch64__) && defined(BUILD_IOS) - __m128 r = _mm_rsqrt_ps(a.m128); - r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r)); - r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r)); - return r; -#else - #if defined(__AVX512VL__) __m128 r = _mm_rsqrt14_ps(a.m128); #else __m128 r = _mm_rsqrt_ps(a.m128); #endif return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); - -#endif //defined(__aarch64__) && defined(BUILD_IOS) } __forceinline const Color sqrt ( const Color& a ) { return _mm_sqrt_ps(a.m128); } diff --git a/thirdparty/embree/common/math/constants.cpp b/thirdparty/embree/common/math/constants.cpp new file mode 100644 index 0000000000..03919ae20c --- /dev/null +++ b/thirdparty/embree/common/math/constants.cpp @@ -0,0 +1,27 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "constants.h" + +namespace embree +{ + TrueTy True; + FalseTy False; + ZeroTy zero; + OneTy one; + NegInfTy neg_inf; + PosInfTy inf; + PosInfTy pos_inf; + NaNTy nan; + UlpTy ulp; + PiTy pi; + OneOverPiTy one_over_pi; + TwoPiTy two_pi; + OneOverTwoPiTy one_over_two_pi; + FourPiTy four_pi; + OneOverFourPiTy one_over_four_pi; + StepTy step; + ReverseStepTy reverse_step; + EmptyTy empty; + UndefinedTy undefined; +} diff --git a/thirdparty/embree-aarch64/common/math/constants.h b/thirdparty/embree/common/math/constants.h index e80abec80f..578473a8ab 100644 --- a/thirdparty/embree-aarch64/common/math/constants.h +++ b/thirdparty/embree/common/math/constants.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -12,19 +12,6 @@ #include <cfloat> #include <climits> -// Math constants may not be defined in libcxx + mingw + strict C++ standard -#if defined(__MINGW32__) - -// TODO(LTE): use constexpr -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif -#ifndef M_1_PI -#define M_1_PI 0.31830988618379067154 -#endif - -#endif // __MINGW32__ - namespace embree { static MAYBE_UNUSED const float one_over_255 = 1.0f/255.0f; @@ -57,8 +44,8 @@ namespace embree __forceinline operator unsigned int ( ) const { return 0; } __forceinline operator short ( ) const { return 0; } __forceinline operator unsigned short ( ) const { return 0; } - __forceinline operator int8_t ( ) const { return 0; } - __forceinline operator uint8_t ( ) const { return 0; } + __forceinline operator char ( ) const { return 0; } + __forceinline operator unsigned char ( ) const { return 0; } }; extern MAYBE_UNUSED ZeroTy zero; @@ -75,8 +62,8 @@ namespace embree __forceinline operator unsigned int ( ) const { return 1; } __forceinline operator short ( ) const { return 1; } __forceinline operator unsigned short ( ) const { return 1; } - __forceinline operator int8_t ( ) const { return 1; } - __forceinline operator uint8_t ( ) const { return 1; } + __forceinline operator char ( ) const { return 1; } + __forceinline operator unsigned char ( ) const { return 1; } }; extern MAYBE_UNUSED OneTy one; @@ -93,8 +80,8 @@ namespace embree __forceinline operator unsigned int ( ) const { return std::numeric_limits<unsigned int>::min(); } __forceinline operator short ( ) const { return std::numeric_limits<short>::min(); } __forceinline operator unsigned short ( ) const { return std::numeric_limits<unsigned short>::min(); } - __forceinline operator int8_t ( ) const { return std::numeric_limits<int8_t>::min(); } - __forceinline operator uint8_t ( ) const { return std::numeric_limits<uint8_t>::min(); } + __forceinline operator char ( ) const { return std::numeric_limits<char>::min(); } + __forceinline operator unsigned char ( ) const { return std::numeric_limits<unsigned char>::min(); } }; @@ -112,8 +99,8 @@ namespace embree __forceinline operator unsigned int ( ) const { return std::numeric_limits<unsigned int>::max(); } __forceinline operator short ( ) const { return std::numeric_limits<short>::max(); } __forceinline operator unsigned short ( ) const { return std::numeric_limits<unsigned short>::max(); } - __forceinline operator int8_t ( ) const { return std::numeric_limits<int8_t>::max(); } - __forceinline operator uint8_t ( ) const { return std::numeric_limits<uint8_t>::max(); } + __forceinline operator char ( ) const { return std::numeric_limits<char>::max(); } + __forceinline operator unsigned char ( ) const { return std::numeric_limits<unsigned char>::max(); } }; extern MAYBE_UNUSED PosInfTy inf; @@ -207,33 +194,4 @@ namespace embree }; extern MAYBE_UNUSED UndefinedTy undefined; - -#if defined(__aarch64__) - extern const uint32x4_t movemask_mask; - extern const uint32x4_t vzero; - extern const uint32x4_t v0x80000000; - extern const uint32x4_t v0x7fffffff; - extern const uint32x4_t v000F; - extern const uint32x4_t v00F0; - extern const uint32x4_t v00FF; - extern const uint32x4_t v0F00; - extern const uint32x4_t v0F0F; - extern const uint32x4_t v0FF0; - extern const uint32x4_t v0FFF; - extern const uint32x4_t vF000; - extern const uint32x4_t vF00F; - extern const uint32x4_t vF0F0; - extern const uint32x4_t vF0FF; - extern const uint32x4_t vFF00; - extern const uint32x4_t vFF0F; - extern const uint32x4_t vFFF0; - extern const uint32x4_t vFFFF; - extern const uint8x16_t v0022; - extern const uint8x16_t v1133; - extern const uint8x16_t v0101; - extern const float32x4_t vOne; - extern const float32x4_t vmOne; - extern const float32x4_t vInf; - extern const float32x4_t vmInf; -#endif } diff --git a/thirdparty/embree-aarch64/common/math/interval.h b/thirdparty/embree/common/math/interval.h index f06478e881..310add2129 100644 --- a/thirdparty/embree-aarch64/common/math/interval.h +++ b/thirdparty/embree/common/math/interval.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/math/lbbox.h b/thirdparty/embree/common/math/lbbox.h index 95df4a918d..2b397a05c8 100644 --- a/thirdparty/embree-aarch64/common/math/lbbox.h +++ b/thirdparty/embree/common/math/lbbox.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/math/linearspace2.h b/thirdparty/embree/common/math/linearspace2.h index b9a382962c..184ee695fb 100644 --- a/thirdparty/embree-aarch64/common/math/linearspace2.h +++ b/thirdparty/embree/common/math/linearspace2.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/math/linearspace3.h b/thirdparty/embree/common/math/linearspace3.h index 12b5bb776b..9eaa2cc2bb 100644 --- a/thirdparty/embree-aarch64/common/math/linearspace3.h +++ b/thirdparty/embree/common/math/linearspace3.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/math/math.h b/thirdparty/embree/common/math/math.h index 6d54abd44d..4bc54c1a6a 100644 --- a/thirdparty/embree-aarch64/common/math/math.h +++ b/thirdparty/embree/common/math/math.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -9,18 +9,15 @@ #include <cmath> #if defined(__ARM_NEON) -#include "SSE2NEON.h" -#if defined(NEON_AVX2_EMULATION) -#include "AVX2NEON.h" -#endif +#include "../simd/arm/emulation.h" #else #include <emmintrin.h> #include <xmmintrin.h> #include <immintrin.h> #endif -#if defined(__WIN32__) && !defined(__MINGW32__) -#if (__MSV_VER <= 1700) +#if defined(__WIN32__) +#if defined(_MSC_VER) && (_MSC_VER <= 1700) namespace std { __forceinline bool isinf ( const float x ) { return _finite(x) == 0; } @@ -47,7 +44,7 @@ namespace embree __forceinline int toInt (const float& a) { return int(a); } __forceinline float toFloat(const int& a) { return float(a); } -#if defined(__WIN32__) && !defined(__MINGW32__) +#if defined(__WIN32__) __forceinline bool finite ( const float x ) { return _finite(x) != 0; } #endif @@ -56,16 +53,6 @@ namespace embree __forceinline float rcp ( const float x ) { -#if defined(__aarch64__) - // Move scalar to vector register and do rcp. - __m128 a; - a[0] = x; - float32x4_t reciprocal = vrecpeq_f32(a); - reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal); - reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal); - return reciprocal[0]; -#else - const __m128 a = _mm_set_ss(x); #if defined(__AVX512VL__) @@ -79,74 +66,33 @@ namespace embree #else return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a)))); #endif - -#endif //defined(__aarch64__) } __forceinline float signmsk ( const float x ) { -#if defined(__aarch64__) - // FP and Neon shares same vector register in arm64 - __m128 a; - __m128i b; - a[0] = x; - b[0] = 0x80000000; - a = _mm_and_ps(a, vreinterpretq_f32_s32(b)); - return a[0]; -#else return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); -#endif } __forceinline float xorf( const float x, const float y ) { -#if defined(__aarch64__) - // FP and Neon shares same vector register in arm64 - __m128 a; - __m128 b; - a[0] = x; - b[0] = y; - a = _mm_xor_ps(a, b); - return a[0]; -#else return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y))); -#endif } __forceinline float andf( const float x, const unsigned y ) { -#if defined(__aarch64__) - // FP and Neon shares same vector register in arm64 - __m128 a; - __m128i b; - a[0] = x; - b[0] = y; - a = _mm_and_ps(a, vreinterpretq_f32_s32(b)); - return a[0]; -#else return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y)))); -#endif } __forceinline float rsqrt( const float x ) { -#if defined(__aarch64__) - // FP and Neon shares same vector register in arm64 - __m128 a; - a[0] = x; - __m128 value = _mm_rsqrt_ps(a); - value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value)); - value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value)); - return value[0]; -#else - const __m128 a = _mm_set_ss(x); #if defined(__AVX512VL__) - const __m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a); + __m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a); #else - const __m128 r = _mm_rsqrt_ss(a); + __m128 r = _mm_rsqrt_ss(a); #endif - const __m128 c = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), - _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r))); - return _mm_cvtss_f32(c); + r = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r))); +#if defined(__ARM_NEON) + r = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r))); #endif + return _mm_cvtss_f32(r); } -#if defined(__WIN32__) && (__MSC_VER <= 1700) && !defined(__MINGW32__) +#if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700) __forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); } __forceinline double nextafter(double x, double y) { return _nextafter(x, y); } __forceinline int roundf(float f) { return (int)(f + 0.5f); } @@ -200,17 +146,7 @@ namespace embree __forceinline double floor( const double x ) { return ::floor (x); } __forceinline double ceil ( const double x ) { return ::ceil (x); } -#if defined(__aarch64__) - __forceinline float mini(float a, float b) { - // FP and Neon shares same vector register in arm64 - __m128 x; - __m128 y; - x[0] = a; - y[0] = b; - x = _mm_min_ps(x, y); - return x[0]; - } -#elif defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline float mini(float a, float b) { const __m128i ai = _mm_castps_si128(_mm_set_ss(a)); const __m128i bi = _mm_castps_si128(_mm_set_ss(b)); @@ -219,17 +155,7 @@ namespace embree } #endif -#if defined(__aarch64__) - __forceinline float maxi(float a, float b) { - // FP and Neon shares same vector register in arm64 - __m128 x; - __m128 y; - x[0] = a; - y[0] = b; - x = _mm_max_ps(x, y); - return x[0]; - } -#elif defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline float maxi(float a, float b) { const __m128i ai = _mm_castps_si128(_mm_set_ss(a)); const __m128i bi = _mm_castps_si128(_mm_set_ss(b)); @@ -246,7 +172,7 @@ namespace embree __forceinline int64_t min(int64_t a, int64_t b) { return a<b ? a:b; } __forceinline float min(float a, float b) { return a<b ? a:b; } __forceinline double min(double a, double b) { return a<b ? a:b; } -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) __forceinline size_t min(size_t a, size_t b) { return a<b ? a:b; } #endif @@ -263,7 +189,7 @@ namespace embree __forceinline int64_t max(int64_t a, int64_t b) { return a<b ? b:a; } __forceinline float max(float a, float b) { return a<b ? b:a; } __forceinline double max(double a, double b) { return a<b ? b:a; } -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) __forceinline size_t max(size_t a, size_t b) { return a<b ? b:a; } #endif @@ -305,16 +231,6 @@ namespace embree __forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); } __forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); } __forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); } -#elif defined (__aarch64__) && defined(__clang__) -#pragma clang fp contract(fast) - - -__forceinline float madd ( const float a, const float b, const float c) { return a*b + c; } -__forceinline float msub ( const float a, const float b, const float c) { return a*b - c; } -__forceinline float nmadd ( const float a, const float b, const float c) { return c - a*b; } -__forceinline float nmsub ( const float a, const float b, const float c) { return -(c + a*b); } - -#pragma clang fp contract(on) #else __forceinline float madd ( const float a, const float b, const float c) { return a*b+c; } __forceinline float msub ( const float a, const float b, const float c) { return a*b-c; } @@ -363,15 +279,17 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur /*! exchange */ template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; } - - template<typename T> __forceinline T prod_diff(const T& a,const T& b,const T& c,const T& d) { -#if 1//!defined(__aarch64__) - return msub(a,b,c*d); -#else - return nmadd(c,d,a*b); -#endif - } - + /* load/store */ + template<typename Ty> struct mem; + + template<> struct mem<float> { + static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; } + static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; } + + static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; } + static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; } + }; + /*! bit reverse operation */ template<class T> __forceinline T bitReverse(const T& vin) @@ -389,7 +307,7 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur template<class T> __forceinline T bitInterleave(const T& xin, const T& yin, const T& zin) { - T x = xin, y = yin, z = zin; + T x = xin, y = yin, z = zin; x = (x | (x << 16)) & 0x030000FF; x = (x | (x << 8)) & 0x0300F00F; x = (x | (x << 4)) & 0x030C30C3; @@ -408,7 +326,7 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur return x | (y << 1) | (z << 2); } -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) template<> __forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi) diff --git a/thirdparty/embree-aarch64/common/math/obbox.h b/thirdparty/embree/common/math/obbox.h index 032b56904e..2fe8bbf071 100644 --- a/thirdparty/embree-aarch64/common/math/obbox.h +++ b/thirdparty/embree/common/math/obbox.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/math/quaternion.h b/thirdparty/embree/common/math/quaternion.h index 20c69bc62f..080800efcd 100644 --- a/thirdparty/embree-aarch64/common/math/quaternion.h +++ b/thirdparty/embree/common/math/quaternion.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/math/range.h b/thirdparty/embree/common/math/range.h index 762d9cd9ea..909fadb995 100644 --- a/thirdparty/embree-aarch64/common/math/range.h +++ b/thirdparty/embree/common/math/range.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/math/transcendental.h b/thirdparty/embree/common/math/transcendental.h index 6855d82b53..fd16c26e81 100644 --- a/thirdparty/embree-aarch64/common/math/transcendental.h +++ b/thirdparty/embree/common/math/transcendental.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -418,7 +418,7 @@ __forceinline void __rangeReduceLog(const T &input, } template <typename T> struct ExponentType { }; -template <int N> struct ExponentType<vfloat<N>> { typedef vint<N> Ty; }; +template <int N> struct ExponentType<vfloat_impl<N>> { typedef vint<N> Ty; }; template <> struct ExponentType<float> { typedef int Ty; }; template <typename T> diff --git a/thirdparty/embree-aarch64/common/math/vec2.h b/thirdparty/embree/common/math/vec2.h index a619459e9c..d62aef51f3 100644 --- a/thirdparty/embree-aarch64/common/math/vec2.h +++ b/thirdparty/embree/common/math/vec2.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -205,11 +205,11 @@ namespace embree #include "vec2fa.h" -#if defined(__SSE__) || defined(__ARM_NEON) +#if defined __SSE__ #include "../simd/sse.h" #endif -#if defined(__AVX__) +#if defined __AVX__ #include "../simd/avx.h" #endif @@ -221,7 +221,7 @@ namespace embree { template<> __forceinline Vec2<float>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {} -#if defined(__SSE__) || defined(__ARM_NEON) +#if defined(__SSE__) template<> __forceinline Vec2<vfloat4>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {} #endif diff --git a/thirdparty/embree-aarch64/common/math/vec2fa.h b/thirdparty/embree/common/math/vec2fa.h index 451ecd556c..a51fb68fd0 100644 --- a/thirdparty/embree-aarch64/common/math/vec2fa.h +++ b/thirdparty/embree/common/math/vec2fa.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -97,12 +97,6 @@ namespace embree __forceinline Vec2fa rcp ( const Vec2fa& a ) { -#if defined(__aarch64__) - __m128 reciprocal = _mm_rcp_ps(a.m128); - reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal); - reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal); - return (const Vec2fa)reciprocal; -#else #if defined(__AVX512VL__) const Vec2fa r = _mm_rcp14_ps(a.m128); #else @@ -117,7 +111,6 @@ namespace embree #endif return res; -#endif //defined(__aarch64__) } __forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps(a.m128); } @@ -125,21 +118,12 @@ namespace embree __forceinline Vec2fa rsqrt( const Vec2fa& a ) { -#if defined(__aarch64__) - __m128 r = _mm_rsqrt_ps(a.m128); - r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r)); - r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r)); - return r; -#else - #if defined(__AVX512VL__) __m128 r = _mm_rsqrt14_ps(a.m128); #else __m128 r = _mm_rsqrt_ps(a.m128); #endif return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); - -#endif } __forceinline Vec2fa zero_fix(const Vec2fa& a) { @@ -172,7 +156,7 @@ namespace embree __forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps(a.m128,b.m128); } __forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps(a.m128,b.m128); } -#if defined(__aarch64__) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) { const vint4 ai = _mm_castps_si128(a); const vint4 bi = _mm_castps_si128(b); @@ -181,7 +165,7 @@ namespace embree } #endif -#if defined(__aarch64__) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) { const vint4 ai = _mm_castps_si128(a); const vint4 bi = _mm_castps_si128(b); @@ -292,9 +276,9 @@ namespace embree //////////////////////////////////////////////////////////////////////////////// #if defined(__aarch64__) -__forceinline Vec2fa floor(const Vec2fa& a) { return vrndmq_f32(a); } -__forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32(a); } -//__forceinline Vec2fa trunc(const Vec2fa& a) { return vrndq_f32(a); } + //__forceinline Vec2fa trunc(const Vec2fa& a) { return vrndq_f32(a); } + __forceinline Vec2fa floor(const Vec2fa& a) { return vrndmq_f32(a); } + __forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32(a); } #elif defined (__SSE4_1__) //__forceinline Vec2fa trunc( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); } __forceinline Vec2fa floor( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); } diff --git a/thirdparty/embree-aarch64/common/math/vec3.h b/thirdparty/embree/common/math/vec3.h index 1870321715..ce94eff327 100644 --- a/thirdparty/embree-aarch64/common/math/vec3.h +++ b/thirdparty/embree/common/math/vec3.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -206,7 +206,8 @@ namespace embree template<typename T> __forceinline T rcp_length( const Vec3<T>& a ) { return rsqrt(sqr(a)); } template<typename T> __forceinline Vec3<T> normalize( const Vec3<T>& a ) { return a*rsqrt(sqr(a)); } template<typename T> __forceinline T distance ( const Vec3<T>& a, const Vec3<T>& b ) { return length(a-b); } - template<typename T> __forceinline Vec3<T> cross ( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(prod_diff(a.y,b.z,a.z,b.y), prod_diff(a.z,b.x,a.x,b.z), prod_diff(a.x,b.y,a.y,b.x)); } + template<typename T> __forceinline Vec3<T> cross ( const Vec3<T>& a, const Vec3<T>& b ) { return Vec3<T>(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x)); } + template<typename T> __forceinline Vec3<T> stable_triangle_normal( const Vec3<T>& a, const Vec3<T>& b, const Vec3<T>& c ) { const T ab_x = a.z*b.y, ab_y = a.x*b.z, ab_z = a.y*b.x; @@ -265,11 +266,11 @@ namespace embree /// SSE / AVX / MIC specializations //////////////////////////////////////////////////////////////////////////////// -#if defined(__SSE__) || defined(__ARM_NEON) +#if defined __SSE__ #include "../simd/sse.h" #endif -#if defined(__AVX__) +#if defined __AVX__ #include "../simd/avx.h" #endif @@ -290,18 +291,14 @@ namespace embree template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; } -#elif defined(__SSE__) || defined(__ARM_NEON) +#elif defined(__SSE__) template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) { const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); } #endif -#if defined(__SSE__) || defined(__ARM_NEON) - __forceinline Vec3<vfloat4> broadcast4f(const Vec3<vfloat4>& a, const size_t k) { - return Vec3<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k])); - } - +#if defined(__SSE__) template<> __forceinline Vec3<vfloat4> broadcast<vfloat4,vfloat4>(const Vec3<vfloat4>& a, const size_t k) { return Vec3<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k])); @@ -318,15 +315,6 @@ namespace embree __forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; } - __forceinline Vec3<vfloat4> broadcast4f(const Vec3<vfloat8>& a, const size_t k) { - return Vec3<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k])); - } - __forceinline Vec3<vfloat8> broadcast8f(const Vec3<vfloat4>& a, const size_t k) { - return Vec3<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k])); - } - __forceinline Vec3<vfloat8> broadcast8f(const Vec3<vfloat8>& a, const size_t k) { - return Vec3<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k])); - } template<> __forceinline Vec3<vfloat8> broadcast<vfloat8,vfloat4>(const Vec3<vfloat4>& a, const size_t k) { diff --git a/thirdparty/embree-aarch64/common/math/vec3ba.h b/thirdparty/embree/common/math/vec3ba.h index 90f31739c2..a021b522dc 100644 --- a/thirdparty/embree-aarch64/common/math/vec3ba.h +++ b/thirdparty/embree/common/math/vec3ba.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/math/vec3fa.h b/thirdparty/embree/common/math/vec3fa.h index 6163cfb596..586039741d 100644 --- a/thirdparty/embree-aarch64/common/math/vec3fa.h +++ b/thirdparty/embree/common/math/vec3fa.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -55,13 +55,7 @@ namespace embree //////////////////////////////////////////////////////////////////////////////// static __forceinline Vec3fa load( const void* const a ) { -#if defined(__aarch64__) - __m128 t = _mm_load_ps((float*)a); - t[3] = 0.0f; - return Vec3fa(t); -#else return Vec3fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)))); -#endif } static __forceinline Vec3fa loadu( const void* const a ) { @@ -95,42 +89,19 @@ namespace embree __forceinline Vec3fa operator +( const Vec3fa& a ) { return a; } __forceinline Vec3fa operator -( const Vec3fa& a ) { -#if defined(__aarch64__) - return vnegq_f32(a.m128); -#else const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); - return _mm_xor_ps(a.m128, mask); -#endif } __forceinline Vec3fa abs ( const Vec3fa& a ) { -#if defined(__aarch64__) - return _mm_abs_ps(a.m128); -#else const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); return _mm_and_ps(a.m128, mask); -#endif } __forceinline Vec3fa sign ( const Vec3fa& a ) { -#if defined(__aarch64__) - Vec3fa r = blendv_ps(vOne, vmOne, _mm_cmplt_ps (a.m128,vdupq_n_f32(0.0f))); - return r; -#else return blendv_ps(Vec3fa(one).m128, (-Vec3fa(one)).m128, _mm_cmplt_ps (a.m128,Vec3fa(zero).m128)); -#endif } __forceinline Vec3fa rcp ( const Vec3fa& a ) { -#if defined(__aarch64__) && defined(BUILD_IOS) - return vdivq_f32(vdupq_n_f32(1.0f),a.m128); -#elif defined(__aarch64__) - __m128 reciprocal = _mm_rcp_ps(a.m128); - reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal); - reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal); - return (const Vec3fa)reciprocal; -#else - #if defined(__AVX512VL__) const Vec3fa r = _mm_rcp14_ps(a.m128); #else @@ -145,7 +116,6 @@ namespace embree #endif return res; -#endif //defined(__aarch64__) } __forceinline Vec3fa sqrt ( const Vec3fa& a ) { return _mm_sqrt_ps(a.m128); } @@ -153,20 +123,12 @@ namespace embree __forceinline Vec3fa rsqrt( const Vec3fa& a ) { -#if defined(__aarch64__) - __m128 r = _mm_rsqrt_ps(a.m128); - r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r)); - r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r)); - return r; -#else - #if defined(__AVX512VL__) __m128 r = _mm_rsqrt14_ps(a.m128); #else __m128 r = _mm_rsqrt_ps(a.m128); #endif return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a.m128, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); -#endif } __forceinline Vec3fa zero_fix(const Vec3fa& a) { @@ -199,7 +161,7 @@ namespace embree __forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) { return _mm_min_ps(a.m128,b.m128); } __forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) { return _mm_max_ps(a.m128,b.m128); } -#if defined(__aarch64__) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) { const vint4 ai = _mm_castps_si128(a.m128); const vint4 bi = _mm_castps_si128(b.m128); @@ -208,7 +170,7 @@ namespace embree } #endif -#if defined(__aarch64__) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) { const vint4 ai = _mm_castps_si128(a.m128); const vint4 bi = _mm_castps_si128(b.m128); @@ -231,29 +193,10 @@ namespace embree __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmadd_ps(a.m128,b.m128,c.m128); } __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return _mm_fnmsub_ps(a.m128,b.m128,c.m128); } #else - -#if defined(__aarch64__) - __forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { - return _mm_madd_ps(a.m128, b.m128, c.m128); //a*b+c; - } - __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { - return _mm_msub_ps(a.m128, b.m128, c.m128); //-a*b+c; - } - __forceinline Vec3fa nmsub( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { - Vec3fa t = _mm_madd_ps(a.m128, b.m128, c.m128); - return -t; - } - __forceinline Vec3fa msub( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { - return _mm_madd_ps(a.m128,b.m128,vnegq_f32(c.m128)); //a*b-c - } - -#else __forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b+c; } + __forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b-c; } __forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b+c;} __forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return -a*b-c; } - __forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return a*b-c; } -#endif - #endif __forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); } @@ -275,37 +218,18 @@ namespace embree //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// -#if defined(__aarch64__) && defined(BUILD_IOS) - __forceinline float reduce_add(const Vec3fa& v) { - float32x4_t t = v.m128; - t[3] = 0.0f; - return vaddvq_f32(t); - } - - __forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; } - __forceinline float reduce_min(const Vec3fa& v) { - float32x4_t t = v.m128; - t[3] = t[2]; - return vminvq_f32(t); - } - __forceinline float reduce_max(const Vec3fa& v) { - float32x4_t t = v.m128; - t[3] = t[2]; - return vmaxvq_f32(t); - } -#else - __forceinline float reduce_add(const Vec3fa& v) { + + __forceinline float reduce_add(const Vec3fa& v) { const vfloat4 a(v.m128); const vfloat4 b = shuffle<1>(a); const vfloat4 c = shuffle<2>(a); - return _mm_cvtss_f32(a+b+c); + return _mm_cvtss_f32(a+b+c); } __forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; } __forceinline float reduce_min(const Vec3fa& v) { return min(v.x,v.y,v.z); } __forceinline float reduce_max(const Vec3fa& v) { return max(v.x,v.y,v.z); } -#endif - + //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// @@ -317,13 +241,8 @@ namespace embree __forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpneq_ps(a.m128, b.m128); } __forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmplt_ps (a.m128, b.m128); } __forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmple_ps (a.m128, b.m128); } - #if defined(__aarch64__) - __forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpgt_ps (a.m128, b.m128); } - __forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpge_ps (a.m128, b.m128); } -#else - __forceinline Vec3ba gt_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnle_ps(a.m128, b.m128); } - __forceinline Vec3ba ge_mask(const Vec3fa& a, const Vec3fa& b) { return _mm_cmpnlt_ps(a.m128, b.m128); } -#endif + __forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpnle_ps(a.m128, b.m128); } + __forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return _mm_cmpnlt_ps(a.m128, b.m128); } __forceinline bool isvalid ( const Vec3fa& v ) { return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE))); @@ -361,7 +280,7 @@ namespace embree vfloat4 b0 = shuffle<1,2,0,3>(vfloat4(b.m128)); vfloat4 a1 = shuffle<1,2,0,3>(vfloat4(a.m128)); vfloat4 b1 = vfloat4(b.m128); - return Vec3fa(shuffle<1,2,0,3>(prod_diff(a0,b0,a1,b1))); + return Vec3fa(shuffle<1,2,0,3>(msub(a0,b0,a1*b1))); } __forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); } @@ -416,11 +335,7 @@ namespace embree /// Rounding Functions //////////////////////////////////////////////////////////////////////////////// -#if defined(__aarch64__) - __forceinline Vec3fa floor(const Vec3fa& a) { return vrndmq_f32(a.m128); } - __forceinline Vec3fa ceil (const Vec3fa& a) { return vrndpq_f32(a.m128); } - __forceinline Vec3fa trunc(const Vec3fa& a) { return vrndq_f32(a.m128); } -#elif defined (__SSE4_1__) +#if defined (__SSE4_1__) __forceinline Vec3fa trunc( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); } __forceinline Vec3fa floor( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); } __forceinline Vec3fa ceil ( const Vec3fa& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); } @@ -478,10 +393,8 @@ namespace embree __forceinline Vec3fx( const Vec3fa& other, const int a1) { m128 = other.m128; a = a1; } __forceinline Vec3fx( const Vec3fa& other, const unsigned a1) { m128 = other.m128; u = a1; } - __forceinline Vec3fx( const Vec3fa& other, const float w1) { -#if defined (__aarch64__) - m128 = other.m128; m128[3] = w1; -#elif defined (__SSE4_1__) + __forceinline Vec3fx( const Vec3fa& other, const float w1) { +#if defined (__SSE4_1__) m128 = _mm_insert_ps(other.m128, _mm_set_ss(w1),3 << 4); #else const vint4 mask(-1,-1,-1,0); @@ -613,7 +526,7 @@ namespace embree __forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) { return _mm_min_ps(a.m128,b.m128); } __forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) { return _mm_max_ps(a.m128,b.m128); } -#if defined(__SSE4_1__) || defined(__aarch64__) +#if defined(__SSE4_1__) __forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) { const vint4 ai = _mm_castps_si128(a.m128); const vint4 bi = _mm_castps_si128(b.m128); @@ -622,7 +535,7 @@ namespace embree } #endif -#if defined(__SSE4_1__) || defined(__aarch64__) +#if defined(__SSE4_1__) __forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) { const vint4 ai = _mm_castps_si128(a.m128); const vint4 bi = _mm_castps_si128(b.m128); @@ -671,11 +584,11 @@ namespace embree /// Reductions //////////////////////////////////////////////////////////////////////////////// - __forceinline float reduce_add(const Vec3fx& v) { + __forceinline float reduce_add(const Vec3fx& v) { const vfloat4 a(v.m128); const vfloat4 b = shuffle<1>(a); const vfloat4 c = shuffle<2>(a); - return _mm_cvtss_f32(a+b+c); + return _mm_cvtss_f32(a+b+c); } __forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; } @@ -787,7 +700,11 @@ namespace embree /// Rounding Functions //////////////////////////////////////////////////////////////////////////////// -#if defined (__SSE4_1__) && !defined(__aarch64__) +#if defined(__aarch64__) + __forceinline Vec3fx trunc(const Vec3fx& a) { return vrndq_f32(a.m128); } + __forceinline Vec3fx floor(const Vec3fx& a) { return vrndmq_f32(a.m128); } + __forceinline Vec3fx ceil (const Vec3fx& a) { return vrndpq_f32(a.m128); } +#elif defined (__SSE4_1__) __forceinline Vec3fx trunc( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEAREST_INT); } __forceinline Vec3fx floor( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_NEG_INF ); } __forceinline Vec3fx ceil ( const Vec3fx& a ) { return _mm_round_ps(a.m128, _MM_FROUND_TO_POS_INF ); } diff --git a/thirdparty/embree-aarch64/common/math/vec3ia.h b/thirdparty/embree/common/math/vec3ia.h index 737f67fd72..694804c40d 100644 --- a/thirdparty/embree-aarch64/common/math/vec3ia.h +++ b/thirdparty/embree/common/math/vec3ia.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -65,9 +65,7 @@ namespace embree __forceinline Vec3ia operator +( const Vec3ia& a ) { return a; } __forceinline Vec3ia operator -( const Vec3ia& a ) { return _mm_sub_epi32(_mm_setzero_si128(), a.m128); } -#if (defined(__aarch64__)) - __forceinline Vec3ia abs ( const Vec3ia& a ) { return vabsq_s32(a.m128); } -#elif defined(__SSSE3__) +#if defined(__SSSE3__) __forceinline Vec3ia abs ( const Vec3ia& a ) { return _mm_abs_epi32(a.m128); } #endif @@ -83,7 +81,7 @@ namespace embree __forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); } __forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; } -#if defined(__aarch64__) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return _mm_mullo_epi32(a.m128, b.m128); } __forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); } __forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; } @@ -101,14 +99,12 @@ namespace embree __forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); } __forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; } -#if !defined(__ARM_NEON) __forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return _mm_slli_epi32(a.m128, n); } __forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return _mm_srai_epi32(a.m128, n); } __forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return _mm_slli_epi32(a.m128, b); } __forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return _mm_srai_epi32(a.m128, b); } __forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return _mm_srli_epi32(a.m128, b); } -#endif //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators @@ -120,7 +116,7 @@ namespace embree __forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; } __forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; } -#if defined(__aarch64__) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; } __forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; } #endif @@ -131,38 +127,18 @@ namespace embree __forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; } __forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; } -#if !defined(__ARM_NEON) __forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; } __forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; } -#endif //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// -#if defined(__aarch64__) - __forceinline int reduce_add(const Vec3ia& v) { - int32x4_t t = v.m128; - t[3] = 0; - return vaddvq_s32(t); - - } - __forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; } - __forceinline int reduce_min(const Vec3ia& v) { - int32x4_t t = (__m128i)blendv_ps((__m128)v0x7fffffff, (__m128)v.m128, (__m128)vFFF0); - return vminvq_s32(t); - - } - __forceinline int reduce_max(const Vec3ia& v) { - int32x4_t t = (__m128i)blendv_ps((__m128)v0x80000000, (__m128)v.m128, (__m128)vFFF0); - return vmaxvq_s32(t); - - } -#else + __forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; } __forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; } __forceinline int reduce_min(const Vec3ia& v) { return min(v.x,v.y,v.z); } __forceinline int reduce_max(const Vec3ia& v) { return max(v.x,v.y,v.z); } -#endif + //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// @@ -185,14 +161,14 @@ namespace embree //////////////////////////////////////////////////////////////////////////////// __forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) { -#if defined(__aarch64__) || defined(__SSE4_1__) +#if defined(__SSE4_1__) return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); #else return _mm_or_si128(_mm_and_si128(_mm_castps_si128(m), t), _mm_andnot_si128(_mm_castps_si128(m), f)); #endif } -#if defined(__aarch64__) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return _mm_min_epi32(a.m128,b.m128); } __forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return _mm_max_epi32(a.m128,b.m128); } #else diff --git a/thirdparty/embree-aarch64/common/math/vec4.h b/thirdparty/embree/common/math/vec4.h index d16542f507..0ed107928a 100644 --- a/thirdparty/embree-aarch64/common/math/vec4.h +++ b/thirdparty/embree/common/math/vec4.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -192,7 +192,7 @@ namespace embree //////////////////////////////////////////////////////////////////////////////// typedef Vec4<bool > Vec4b; - typedef Vec4<uint8_t > Vec4uc; + typedef Vec4<unsigned char> Vec4uc; typedef Vec4<int > Vec4i; typedef Vec4<float > Vec4f; } @@ -205,7 +205,7 @@ namespace embree /// SSE / AVX / MIC specializations //////////////////////////////////////////////////////////////////////////////// -#if defined(__SSE__) || defined(__ARM_NEON) +#if defined __SSE__ #include "../simd/sse.h" #endif @@ -225,31 +225,16 @@ namespace embree template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; } -#elif defined(__SSE__) || defined(__ARM_NEON) +#elif defined(__SSE__) template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) { const vfloat4 v = vfloat4(a.m128); x = shuffle<0,0,0,0>(v); y = shuffle<1,1,1,1>(v); z = shuffle<2,2,2,2>(v); w = shuffle<3,3,3,3>(v); } #endif -#if defined(__SSE__) || defined(__ARM_NEON) - __forceinline Vec4<vfloat4> broadcast4f( const Vec4<vfloat4>& a, const size_t k ) { - return Vec4<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k]), vfloat4::broadcast(&a.w[k])); - } -#endif - #if defined(__AVX__) template<> __forceinline Vec4<vfloat8>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; } - __forceinline Vec4<vfloat4> broadcast4f( const Vec4<vfloat8>& a, const size_t k ) { - return Vec4<vfloat4>(vfloat4::broadcast(&a.x[k]), vfloat4::broadcast(&a.y[k]), vfloat4::broadcast(&a.z[k]), vfloat4::broadcast(&a.w[k])); - } - __forceinline Vec4<vfloat8> broadcast8f( const Vec4<vfloat4>& a, const size_t k ) { - return Vec4<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]), vfloat8::broadcast(&a.w[k])); - } - __forceinline Vec4<vfloat8> broadcast8f( const Vec4<vfloat8>& a, const size_t k ) { - return Vec4<vfloat8>(vfloat8::broadcast(&a.x[k]), vfloat8::broadcast(&a.y[k]), vfloat8::broadcast(&a.z[k]), vfloat8::broadcast(&a.w[k])); - } #endif #if defined(__AVX512F__) diff --git a/thirdparty/embree/common/simd/arm/emulation.h b/thirdparty/embree/common/simd/arm/emulation.h new file mode 100644 index 0000000000..1c3875fb27 --- /dev/null +++ b/thirdparty/embree/common/simd/arm/emulation.h @@ -0,0 +1,50 @@ +// Copyright 2009-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +/* Make precision match SSE, at the cost of some performance */ +#if !defined(__aarch64__) +# define SSE2NEON_PRECISE_DIV 1 +# define SSE2NEON_PRECISE_SQRT 1 +#endif + +#include "sse2neon.h" + +__forceinline __m128 _mm_fmsub_ps(__m128 a, __m128 b, __m128 c) { + __m128 neg_c = vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(c))); + return _mm_fmadd_ps(a, b, neg_c); +} + +__forceinline __m128 _mm_fnmadd_ps(__m128 a, __m128 b, __m128 c) { +#if defined(__aarch64__) + return vreinterpretq_m128_f32(vfmsq_f32(vreinterpretq_f32_m128(c), + vreinterpretq_f32_m128(b), + vreinterpretq_f32_m128(a))); +#else + return _mm_sub_ps(c, _mm_mul_ps(a, b)); +#endif +} + +__forceinline __m128 _mm_fnmsub_ps(__m128 a, __m128 b, __m128 c) { + return vreinterpretq_m128_f32(vnegq_f32(vreinterpretq_f32_m128(_mm_fmadd_ps(a,b,c)))); +} + + +/* Dummy defines for floating point control */ +#define _MM_MASK_MASK 0x1f80 +#define _MM_MASK_DIV_ZERO 0x200 +#define _MM_FLUSH_ZERO_ON 0x8000 +#define _MM_MASK_DENORM 0x100 +#define _MM_SET_EXCEPTION_MASK(x) +#define _MM_SET_FLUSH_ZERO_MODE(x) + +__forceinline int _mm_getcsr() +{ + return 0; +} + +__forceinline void _mm_mfence() +{ + __sync_synchronize(); +} diff --git a/thirdparty/embree/common/simd/arm/sse2neon.h b/thirdparty/embree/common/simd/arm/sse2neon.h new file mode 100644 index 0000000000..7eb25cf2c5 --- /dev/null +++ b/thirdparty/embree/common/simd/arm/sse2neon.h @@ -0,0 +1,6996 @@ +#ifndef SSE2NEON_H +#define SSE2NEON_H + +// This header file provides a simple API translation layer +// between SSE intrinsics to their corresponding Arm/Aarch64 NEON versions +// +// This header file does not yet translate all of the SSE intrinsics. +// +// Contributors to this work are: +// John W. Ratcliff <jratcliffscarab@gmail.com> +// Brandon Rowlett <browlett@nvidia.com> +// Ken Fast <kfast@gdeb.com> +// Eric van Beurden <evanbeurden@nvidia.com> +// Alexander Potylitsin <apotylitsin@nvidia.com> +// Hasindu Gamaarachchi <hasindu2008@gmail.com> +// Jim Huang <jserv@biilabs.io> +// Mark Cheng <marktwtn@biilabs.io> +// Malcolm James MacLeod <malcolm@gulden.com> +// Devin Hussey (easyaspi314) <husseydevin@gmail.com> +// Sebastian Pop <spop@amazon.com> +// Developer Ecosystem Engineering <DeveloperEcosystemEngineering@apple.com> +// Danila Kutenin <danilak@google.com> +// François Turban (JishinMaster) <francois.turban@gmail.com> +// Pei-Hsuan Hung <afcidk@gmail.com> +// Yang-Hao Yuan <yanghau@biilabs.io> +// Syoyo Fujita <syoyo@lighttransport.com> +// Brecht Van Lommel <brecht@blender.org> + +/* + * sse2neon is freely redistributable under the MIT License. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Tunable configurations */ + +/* Enable precise implementation of math operations + * This would slow down the computation a bit, but gives consistent result with + * x86 SSE2. (e.g. would solve a hole or NaN pixel in the rendering result) + */ +/* _mm_min_ps and _mm_max_ps */ +#ifndef SSE2NEON_PRECISE_MINMAX +#define SSE2NEON_PRECISE_MINMAX (0) +#endif +/* _mm_rcp_ps and _mm_div_ps */ +#ifndef SSE2NEON_PRECISE_DIV +#define SSE2NEON_PRECISE_DIV (0) +#endif +/* _mm_sqrt_ps and _mm_rsqrt_ps */ +#ifndef SSE2NEON_PRECISE_SQRT +#define SSE2NEON_PRECISE_SQRT (0) +#endif +#ifndef SSE2NEON_PRECISE_RSQRT +#define SSE2NEON_PRECISE_RSQRT (0) +#endif + +#if defined(__GNUC__) || defined(__clang__) +#pragma push_macro("FORCE_INLINE") +#pragma push_macro("ALIGN_STRUCT") +#define FORCE_INLINE static inline __attribute__((always_inline)) +#define ALIGN_STRUCT(x) __attribute__((aligned(x))) +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#endif +#ifndef unlikely +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif +#else +#error "Macro name collisions may happen with unsupported compiler." +#ifdef FORCE_INLINE +#undef FORCE_INLINE +#endif +#define FORCE_INLINE static inline +#ifndef ALIGN_STRUCT +#define ALIGN_STRUCT(x) __declspec(align(x)) +#endif +#endif +#ifndef likely +#define likely(x) (x) +#endif +#ifndef unlikely +#define unlikely(x) (x) +#endif + +#include <stdint.h> +#include <stdlib.h> + +/* Architecture-specific build options */ +/* FIXME: #pragma GCC push_options is only available on GCC */ +#if defined(__GNUC__) +#if defined(__arm__) && __ARM_ARCH == 7 +/* According to ARM C Language Extensions Architecture specification, + * __ARM_NEON is defined to a value indicating the Advanced SIMD (NEON) + * architecture supported. + */ +#if !defined(__ARM_NEON) || !defined(__ARM_NEON__) +#error "You must enable NEON instructions (e.g. -mfpu=neon) to use SSE2NEON." +#endif +#if !defined(__clang__) +#pragma GCC push_options +#pragma GCC target("fpu=neon") +#endif +#elif defined(__aarch64__) +#if !defined(__clang__) +#pragma GCC push_options +#pragma GCC target("+simd") +#endif +#else +#error "Unsupported target. Must be either ARMv7-A+NEON or ARMv8-A." +#endif +#endif + +#include <arm_neon.h> + +/* Rounding functions require either Aarch64 instructions or libm failback */ +#if !defined(__aarch64__) +#include <math.h> +#endif + +/* "__has_builtin" can be used to query support for built-in functions + * provided by gcc/clang and other compilers that support it. + */ +#ifndef __has_builtin /* GCC prior to 10 or non-clang compilers */ +/* Compatibility with gcc <= 9 */ +#if __GNUC__ <= 9 +#define __has_builtin(x) HAS##x +#define HAS__builtin_popcount 1 +#define HAS__builtin_popcountll 1 +#else +#define __has_builtin(x) 0 +#endif +#endif + +/** + * MACRO for shuffle parameter for _mm_shuffle_ps(). + * Argument fp3 is a digit[0123] that represents the fp from argument "b" + * of mm_shuffle_ps that will be placed in fp3 of result. fp2 is the same + * for fp2 in result. fp1 is a digit[0123] that represents the fp from + * argument "a" of mm_shuffle_ps that will be places in fp1 of result. + * fp0 is the same for fp0 of result. + */ +#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) + +/* Rounding mode macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_NEG_INF 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_ZERO 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 +#define _MM_FROUND_NO_EXC 0x08 +#define _MM_ROUND_NEAREST 0x0000 +#define _MM_ROUND_DOWN 0x2000 +#define _MM_ROUND_UP 0x4000 +#define _MM_ROUND_TOWARD_ZERO 0x6000 + +/* indicate immediate constant argument in a given range */ +#define __constrange(a, b) const + +/* A few intrinsics accept traditional data types like ints or floats, but + * most operate on data types that are specific to SSE. + * If a vector type ends in d, it contains doubles, and if it does not have + * a suffix, it contains floats. An integer vector type can contain any type + * of integer, from chars to shorts to unsigned long longs. + */ +typedef int64x1_t __m64; +typedef float32x4_t __m128; /* 128-bit vector containing 4 floats */ +// On ARM 32-bit architecture, the float64x2_t is not supported. +// The data type __m128d should be represented in a different way for related +// intrinsic conversion. +#if defined(__aarch64__) +typedef float64x2_t __m128d; /* 128-bit vector containing 2 doubles */ +#else +typedef float32x4_t __m128d; +#endif +typedef int64x2_t __m128i; /* 128-bit vector containing integers */ + +/* type-safe casting between types */ + +#define vreinterpretq_m128_f16(x) vreinterpretq_f32_f16(x) +#define vreinterpretq_m128_f32(x) (x) +#define vreinterpretq_m128_f64(x) vreinterpretq_f32_f64(x) + +#define vreinterpretq_m128_u8(x) vreinterpretq_f32_u8(x) +#define vreinterpretq_m128_u16(x) vreinterpretq_f32_u16(x) +#define vreinterpretq_m128_u32(x) vreinterpretq_f32_u32(x) +#define vreinterpretq_m128_u64(x) vreinterpretq_f32_u64(x) + +#define vreinterpretq_m128_s8(x) vreinterpretq_f32_s8(x) +#define vreinterpretq_m128_s16(x) vreinterpretq_f32_s16(x) +#define vreinterpretq_m128_s32(x) vreinterpretq_f32_s32(x) +#define vreinterpretq_m128_s64(x) vreinterpretq_f32_s64(x) + +#define vreinterpretq_f16_m128(x) vreinterpretq_f16_f32(x) +#define vreinterpretq_f32_m128(x) (x) +#define vreinterpretq_f64_m128(x) vreinterpretq_f64_f32(x) + +#define vreinterpretq_u8_m128(x) vreinterpretq_u8_f32(x) +#define vreinterpretq_u16_m128(x) vreinterpretq_u16_f32(x) +#define vreinterpretq_u32_m128(x) vreinterpretq_u32_f32(x) +#define vreinterpretq_u64_m128(x) vreinterpretq_u64_f32(x) + +#define vreinterpretq_s8_m128(x) vreinterpretq_s8_f32(x) +#define vreinterpretq_s16_m128(x) vreinterpretq_s16_f32(x) +#define vreinterpretq_s32_m128(x) vreinterpretq_s32_f32(x) +#define vreinterpretq_s64_m128(x) vreinterpretq_s64_f32(x) + +#define vreinterpretq_m128i_s8(x) vreinterpretq_s64_s8(x) +#define vreinterpretq_m128i_s16(x) vreinterpretq_s64_s16(x) +#define vreinterpretq_m128i_s32(x) vreinterpretq_s64_s32(x) +#define vreinterpretq_m128i_s64(x) (x) + +#define vreinterpretq_m128i_u8(x) vreinterpretq_s64_u8(x) +#define vreinterpretq_m128i_u16(x) vreinterpretq_s64_u16(x) +#define vreinterpretq_m128i_u32(x) vreinterpretq_s64_u32(x) +#define vreinterpretq_m128i_u64(x) vreinterpretq_s64_u64(x) + +#define vreinterpretq_f32_m128i(x) vreinterpretq_f32_s64(x) +#define vreinterpretq_f64_m128i(x) vreinterpretq_f64_s64(x) + +#define vreinterpretq_s8_m128i(x) vreinterpretq_s8_s64(x) +#define vreinterpretq_s16_m128i(x) vreinterpretq_s16_s64(x) +#define vreinterpretq_s32_m128i(x) vreinterpretq_s32_s64(x) +#define vreinterpretq_s64_m128i(x) (x) + +#define vreinterpretq_u8_m128i(x) vreinterpretq_u8_s64(x) +#define vreinterpretq_u16_m128i(x) vreinterpretq_u16_s64(x) +#define vreinterpretq_u32_m128i(x) vreinterpretq_u32_s64(x) +#define vreinterpretq_u64_m128i(x) vreinterpretq_u64_s64(x) + +#define vreinterpret_m64_s8(x) vreinterpret_s64_s8(x) +#define vreinterpret_m64_s16(x) vreinterpret_s64_s16(x) +#define vreinterpret_m64_s32(x) vreinterpret_s64_s32(x) +#define vreinterpret_m64_s64(x) (x) + +#define vreinterpret_m64_u8(x) vreinterpret_s64_u8(x) +#define vreinterpret_m64_u16(x) vreinterpret_s64_u16(x) +#define vreinterpret_m64_u32(x) vreinterpret_s64_u32(x) +#define vreinterpret_m64_u64(x) vreinterpret_s64_u64(x) + +#define vreinterpret_m64_f16(x) vreinterpret_s64_f16(x) +#define vreinterpret_m64_f32(x) vreinterpret_s64_f32(x) +#define vreinterpret_m64_f64(x) vreinterpret_s64_f64(x) + +#define vreinterpret_u8_m64(x) vreinterpret_u8_s64(x) +#define vreinterpret_u16_m64(x) vreinterpret_u16_s64(x) +#define vreinterpret_u32_m64(x) vreinterpret_u32_s64(x) +#define vreinterpret_u64_m64(x) vreinterpret_u64_s64(x) + +#define vreinterpret_s8_m64(x) vreinterpret_s8_s64(x) +#define vreinterpret_s16_m64(x) vreinterpret_s16_s64(x) +#define vreinterpret_s32_m64(x) vreinterpret_s32_s64(x) +#define vreinterpret_s64_m64(x) (x) + +#define vreinterpret_f32_m64(x) vreinterpret_f32_s64(x) + +#if defined(__aarch64__) +#define vreinterpretq_m128d_s32(x) vreinterpretq_f64_s32(x) +#define vreinterpretq_m128d_s64(x) vreinterpretq_f64_s64(x) + +#define vreinterpretq_m128d_u64(x) vreinterpretq_f64_u64(x) + +#define vreinterpretq_m128d_f32(x) vreinterpretq_f64_f32(x) +#define vreinterpretq_m128d_f64(x) (x) + +#define vreinterpretq_s64_m128d(x) vreinterpretq_s64_f64(x) + +#define vreinterpretq_u64_m128d(x) vreinterpretq_u64_f64(x) + +#define vreinterpretq_f64_m128d(x) (x) +#define vreinterpretq_f32_m128d(x) vreinterpretq_f32_f64(x) +#else +#define vreinterpretq_m128d_s32(x) vreinterpretq_f32_s32(x) +#define vreinterpretq_m128d_s64(x) vreinterpretq_f32_s64(x) + +#define vreinterpretq_m128d_u32(x) vreinterpretq_f32_u32(x) +#define vreinterpretq_m128d_u64(x) vreinterpretq_f32_u64(x) + +#define vreinterpretq_m128d_f32(x) (x) + +#define vreinterpretq_s64_m128d(x) vreinterpretq_s64_f32(x) + +#define vreinterpretq_u32_m128d(x) vreinterpretq_u32_f32(x) +#define vreinterpretq_u64_m128d(x) vreinterpretq_u64_f32(x) + +#define vreinterpretq_f32_m128d(x) (x) +#endif + +// A struct is defined in this header file called 'SIMDVec' which can be used +// by applications which attempt to access the contents of an _m128 struct +// directly. It is important to note that accessing the __m128 struct directly +// is bad coding practice by Microsoft: @see: +// https://msdn.microsoft.com/en-us/library/ayeb3ayc.aspx +// +// However, some legacy source code may try to access the contents of an __m128 +// struct directly so the developer can use the SIMDVec as an alias for it. Any +// casting must be done manually by the developer, as you cannot cast or +// otherwise alias the base NEON data type for intrinsic operations. +// +// union intended to allow direct access to an __m128 variable using the names +// that the MSVC compiler provides. This union should really only be used when +// trying to access the members of the vector as integer values. GCC/clang +// allow native access to the float members through a simple array access +// operator (in C since 4.6, in C++ since 4.8). +// +// Ideally direct accesses to SIMD vectors should not be used since it can cause +// a performance hit. If it really is needed however, the original __m128 +// variable can be aliased with a pointer to this union and used to access +// individual components. The use of this union should be hidden behind a macro +// that is used throughout the codebase to access the members instead of always +// declaring this type of variable. +typedef union ALIGN_STRUCT(16) SIMDVec { + float m128_f32[4]; // as floats - DON'T USE. Added for convenience. + int8_t m128_i8[16]; // as signed 8-bit integers. + int16_t m128_i16[8]; // as signed 16-bit integers. + int32_t m128_i32[4]; // as signed 32-bit integers. + int64_t m128_i64[2]; // as signed 64-bit integers. + uint8_t m128_u8[16]; // as unsigned 8-bit integers. + uint16_t m128_u16[8]; // as unsigned 16-bit integers. + uint32_t m128_u32[4]; // as unsigned 32-bit integers. + uint64_t m128_u64[2]; // as unsigned 64-bit integers. +} SIMDVec; + +// casting using SIMDVec +#define vreinterpretq_nth_u64_m128i(x, n) (((SIMDVec *) &x)->m128_u64[n]) +#define vreinterpretq_nth_u32_m128i(x, n) (((SIMDVec *) &x)->m128_u32[n]) +#define vreinterpretq_nth_u8_m128i(x, n) (((SIMDVec *) &x)->m128_u8[n]) + +/* Backwards compatibility for compilers with lack of specific type support */ + +// Older gcc does not define vld1q_u8_x4 type +#if defined(__GNUC__) && !defined(__clang__) && \ + ((__GNUC__ == 10 && (__GNUC_MINOR__ <= 1)) || \ + (__GNUC__ == 9 && (__GNUC_MINOR__ <= 3)) || \ + (__GNUC__ == 8 && (__GNUC_MINOR__ <= 4)) || __GNUC__ <= 7) +FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p) +{ + uint8x16x4_t ret; + ret.val[0] = vld1q_u8(p + 0); + ret.val[1] = vld1q_u8(p + 16); + ret.val[2] = vld1q_u8(p + 32); + ret.val[3] = vld1q_u8(p + 48); + return ret; +} +#else +// Wraps vld1q_u8_x4 +FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p) +{ + return vld1q_u8_x4(p); +} +#endif + +/* Function Naming Conventions + * The naming convention of SSE intrinsics is straightforward. A generic SSE + * intrinsic function is given as follows: + * _mm_<name>_<data_type> + * + * The parts of this format are given as follows: + * 1. <name> describes the operation performed by the intrinsic + * 2. <data_type> identifies the data type of the function's primary arguments + * + * This last part, <data_type>, is a little complicated. It identifies the + * content of the input values, and can be set to any of the following values: + * + ps - vectors contain floats (ps stands for packed single-precision) + * + pd - vectors cantain doubles (pd stands for packed double-precision) + * + epi8/epi16/epi32/epi64 - vectors contain 8-bit/16-bit/32-bit/64-bit + * signed integers + * + epu8/epu16/epu32/epu64 - vectors contain 8-bit/16-bit/32-bit/64-bit + * unsigned integers + * + si128 - unspecified 128-bit vector or 256-bit vector + * + m128/m128i/m128d - identifies input vector types when they are different + * than the type of the returned vector + * + * For example, _mm_setzero_ps. The _mm implies that the function returns + * a 128-bit vector. The _ps at the end implies that the argument vectors + * contain floats. + * + * A complete example: Byte Shuffle - pshufb (_mm_shuffle_epi8) + * // Set packed 16-bit integers. 128 bits, 8 short, per 16 bits + * __m128i v_in = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + * // Set packed 8-bit integers + * // 128 bits, 16 chars, per 8 bits + * __m128i v_perm = _mm_setr_epi8(1, 0, 2, 3, 8, 9, 10, 11, + * 4, 5, 12, 13, 6, 7, 14, 15); + * // Shuffle packed 8-bit integers + * __m128i v_out = _mm_shuffle_epi8(v_in, v_perm); // pshufb + * + * Data (Number, Binary, Byte Index): + +------+------+-------------+------+------+-------------+ + | 1 | 2 | 3 | 4 | Number + +------+------+------+------+------+------+------+------+ + | 0000 | 0001 | 0000 | 0010 | 0000 | 0011 | 0000 | 0100 | Binary + +------+------+------+------+------+------+------+------+ + | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Index + +------+------+------+------+------+------+------+------+ + + +------+------+------+------+------+------+------+------+ + | 5 | 6 | 7 | 8 | Number + +------+------+------+------+------+------+------+------+ + | 0000 | 0101 | 0000 | 0110 | 0000 | 0111 | 0000 | 1000 | Binary + +------+------+------+------+------+------+------+------+ + | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | Index + +------+------+------+------+------+------+------+------+ + * Index (Byte Index): + +------+------+------+------+------+------+------+------+ + | 1 | 0 | 2 | 3 | 8 | 9 | 10 | 11 | + +------+------+------+------+------+------+------+------+ + + +------+------+------+------+------+------+------+------+ + | 4 | 5 | 12 | 13 | 6 | 7 | 14 | 15 | + +------+------+------+------+------+------+------+------+ + * Result: + +------+------+------+------+------+------+------+------+ + | 1 | 0 | 2 | 3 | 8 | 9 | 10 | 11 | Index + +------+------+------+------+------+------+------+------+ + | 0001 | 0000 | 0000 | 0010 | 0000 | 0101 | 0000 | 0110 | Binary + +------+------+------+------+------+------+------+------+ + | 256 | 2 | 5 | 6 | Number + +------+------+------+------+------+------+------+------+ + + +------+------+------+------+------+------+------+------+ + | 4 | 5 | 12 | 13 | 6 | 7 | 14 | 15 | Index + +------+------+------+------+------+------+------+------+ + | 0000 | 0011 | 0000 | 0111 | 0000 | 0100 | 0000 | 1000 | Binary + +------+------+------+------+------+------+------+------+ + | 3 | 7 | 4 | 8 | Number + +------+------+------+------+------+------+-------------+ + */ + +/* Set/get methods */ + +/* Constants for use with _mm_prefetch. */ +enum _mm_hint { + _MM_HINT_NTA = 0, /* load data to L1 and L2 cache, mark it as NTA */ + _MM_HINT_T0 = 1, /* load data to L1 and L2 cache */ + _MM_HINT_T1 = 2, /* load data to L2 cache only */ + _MM_HINT_T2 = 3, /* load data to L2 cache only, mark it as NTA */ + _MM_HINT_ENTA = 4, /* exclusive version of _MM_HINT_NTA */ + _MM_HINT_ET0 = 5, /* exclusive version of _MM_HINT_T0 */ + _MM_HINT_ET1 = 6, /* exclusive version of _MM_HINT_T1 */ + _MM_HINT_ET2 = 7 /* exclusive version of _MM_HINT_T2 */ +}; + +// Loads one cache line of data from address p to a location closer to the +// processor. https://msdn.microsoft.com/en-us/library/84szxsww(v=vs.100).aspx +FORCE_INLINE void _mm_prefetch(const void *p, int i) +{ + (void) i; + __builtin_prefetch(p); +} + +// Pause the processor. This is typically used in spin-wait loops and depending +// on the x86 processor typical values are in the 40-100 cycle range. The +// 'yield' instruction isn't a good fit beacuse it's effectively a nop on most +// Arm cores. Experience with several databases has shown has shown an 'isb' is +// a reasonable approximation. +FORCE_INLINE void _mm_pause() +{ + __asm__ __volatile__("isb\n"); +} + +// Copy the lower single-precision (32-bit) floating-point element of a to dst. +// +// dst[31:0] := a[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_f32 +FORCE_INLINE float _mm_cvtss_f32(__m128 a) +{ + return vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); +} + +// Convert the lower single-precision (32-bit) floating-point element in b to a +// double-precision (64-bit) floating-point element, store the result in the +// lower element of dst, and copy the upper element from a to the upper element +// of dst. +// +// dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +// dst[127:64] := a[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd +FORCE_INLINE __m128d _mm_cvtss_sd(__m128d a, __m128 b) +{ + double d = (double) vgetq_lane_f32(vreinterpretq_f32_m128(b), 0); +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vsetq_lane_f64(d, vreinterpretq_f64_m128d(a), 0)); +#else + return vreinterpretq_m128d_s64( + vsetq_lane_s64(*(int64_t *) &d, vreinterpretq_s64_m128d(a), 0)); +#endif +} + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 32-bit integer, and store the result in dst. +// +// dst[31:0] := Convert_FP32_To_Int32(a[31:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si32 +#define _mm_cvtss_si32(a) _mm_cvt_ss2si(a) + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 64-bit integer, and store the result in dst. +// +// dst[63:0] := Convert_FP32_To_Int64(a[31:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64 +FORCE_INLINE int _mm_cvtss_si64(__m128 a) +{ +#if defined(__aarch64__) + return vgetq_lane_s64( + vreinterpretq_s64_s32(vcvtnq_s32_f32(vreinterpretq_f32_m128(a))), 0); +#else + float32_t data = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + float32_t diff = data - floor(data); + if (diff > 0.5) + return (int64_t) ceil(data); + if (unlikely(diff == 0.5)) { + int64_t f = (int64_t) floor(data); + int64_t c = (int64_t) ceil(data); + return c & 1 ? f : c; + } + return (int64_t) floor(data); +#endif +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 32-bit integers with truncation, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ps2pi +FORCE_INLINE __m64 _mm_cvtt_ps2pi(__m128 a) +{ + return vreinterpret_m64_s32( + vget_low_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)))); +} + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 32-bit integer with truncation, and store the result in dst. +// +// dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ss2si +FORCE_INLINE int _mm_cvtt_ss2si(__m128 a) +{ + return vgetq_lane_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)), 0); +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 32-bit integers with truncation, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_pi32 +#define _mm_cvttps_pi32(a) _mm_cvtt_ps2pi(a) + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 32-bit integer with truncation, and store the result in dst. +// +// dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si32 +#define _mm_cvttss_si32(a) _mm_cvtt_ss2si(a) + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 64-bit integer with truncation, and store the result in dst. +// +// dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64 +FORCE_INLINE int64_t _mm_cvttss_si64(__m128 a) +{ + return vgetq_lane_s64( + vmovl_s32(vget_low_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)))), 0); +} + +// Sets the 128-bit value to zero +// https://msdn.microsoft.com/en-us/library/vstudio/ys7dw0kh(v=vs.100).aspx +FORCE_INLINE __m128i _mm_setzero_si128(void) +{ + return vreinterpretq_m128i_s32(vdupq_n_s32(0)); +} + +// Clears the four single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/vstudio/tk1t2tbz(v=vs.100).aspx +FORCE_INLINE __m128 _mm_setzero_ps(void) +{ + return vreinterpretq_m128_f32(vdupq_n_f32(0)); +} + +// Return vector of type __m128d with all elements set to zero. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd +FORCE_INLINE __m128d _mm_setzero_pd(void) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vdupq_n_f64(0)); +#else + return vreinterpretq_m128d_f32(vdupq_n_f32(0)); +#endif +} + +// Sets the four single-precision, floating-point values to w. +// +// r0 := r1 := r2 := r3 := w +// +// https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx +FORCE_INLINE __m128 _mm_set1_ps(float _w) +{ + return vreinterpretq_m128_f32(vdupq_n_f32(_w)); +} + +// Sets the four single-precision, floating-point values to w. +// https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx +FORCE_INLINE __m128 _mm_set_ps1(float _w) +{ + return vreinterpretq_m128_f32(vdupq_n_f32(_w)); +} + +// Sets the four single-precision, floating-point values to the four inputs. +// https://msdn.microsoft.com/en-us/library/vstudio/afh0zf75(v=vs.100).aspx +FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x) +{ + float ALIGN_STRUCT(16) data[4] = {x, y, z, w}; + return vreinterpretq_m128_f32(vld1q_f32(data)); +} + +// Copy single-precision (32-bit) floating-point element a to the lower element +// of dst, and zero the upper 3 elements. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ss +FORCE_INLINE __m128 _mm_set_ss(float a) +{ + float ALIGN_STRUCT(16) data[4] = {a, 0, 0, 0}; + return vreinterpretq_m128_f32(vld1q_f32(data)); +} + +// Sets the four single-precision, floating-point values to the four inputs in +// reverse order. +// https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx +FORCE_INLINE __m128 _mm_setr_ps(float w, float z, float y, float x) +{ + float ALIGN_STRUCT(16) data[4] = {w, z, y, x}; + return vreinterpretq_m128_f32(vld1q_f32(data)); +} + +// Sets the 8 signed 16-bit integer values in reverse order. +// +// Return Value +// r0 := w0 +// r1 := w1 +// ... +// r7 := w7 +FORCE_INLINE __m128i _mm_setr_epi16(short w0, + short w1, + short w2, + short w3, + short w4, + short w5, + short w6, + short w7) +{ + int16_t ALIGN_STRUCT(16) data[8] = {w0, w1, w2, w3, w4, w5, w6, w7}; + return vreinterpretq_m128i_s16(vld1q_s16((int16_t *) data)); +} + +// Sets the 4 signed 32-bit integer values in reverse order +// https://technet.microsoft.com/en-us/library/security/27yb3ee5(v=vs.90).aspx +FORCE_INLINE __m128i _mm_setr_epi32(int i3, int i2, int i1, int i0) +{ + int32_t ALIGN_STRUCT(16) data[4] = {i3, i2, i1, i0}; + return vreinterpretq_m128i_s32(vld1q_s32(data)); +} + +// Set packed 64-bit integers in dst with the supplied values in reverse order. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi64 +FORCE_INLINE __m128i _mm_setr_epi64(__m64 e1, __m64 e0) +{ + return vreinterpretq_m128i_s64(vcombine_s64(e1, e0)); +} + +// Sets the 16 signed 8-bit integer values to b. +// +// r0 := b +// r1 := b +// ... +// r15 := b +// +// https://msdn.microsoft.com/en-us/library/6e14xhyf(v=vs.100).aspx +FORCE_INLINE __m128i _mm_set1_epi8(signed char w) +{ + return vreinterpretq_m128i_s8(vdupq_n_s8(w)); +} + +// Broadcast double-precision (64-bit) floating-point value a to all elements of +// dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd +FORCE_INLINE __m128d _mm_set1_pd(double d) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vdupq_n_f64(d)); +#else + return vreinterpretq_m128d_s64(vdupq_n_s64(*(int64_t *) &d)); +#endif +} + +// Sets the 8 signed 16-bit integer values to w. +// +// r0 := w +// r1 := w +// ... +// r7 := w +// +// https://msdn.microsoft.com/en-us/library/k0ya3x0e(v=vs.90).aspx +FORCE_INLINE __m128i _mm_set1_epi16(short w) +{ + return vreinterpretq_m128i_s16(vdupq_n_s16(w)); +} + +// Sets the 16 signed 8-bit integer values. +// https://msdn.microsoft.com/en-us/library/x0cx8zd3(v=vs.90).aspx +FORCE_INLINE __m128i _mm_set_epi8(signed char b15, + signed char b14, + signed char b13, + signed char b12, + signed char b11, + signed char b10, + signed char b9, + signed char b8, + signed char b7, + signed char b6, + signed char b5, + signed char b4, + signed char b3, + signed char b2, + signed char b1, + signed char b0) +{ + int8_t ALIGN_STRUCT(16) + data[16] = {(int8_t) b0, (int8_t) b1, (int8_t) b2, (int8_t) b3, + (int8_t) b4, (int8_t) b5, (int8_t) b6, (int8_t) b7, + (int8_t) b8, (int8_t) b9, (int8_t) b10, (int8_t) b11, + (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15}; + return (__m128i) vld1q_s8(data); +} + +// Sets the 8 signed 16-bit integer values. +// https://msdn.microsoft.com/en-au/library/3e0fek84(v=vs.90).aspx +FORCE_INLINE __m128i _mm_set_epi16(short i7, + short i6, + short i5, + short i4, + short i3, + short i2, + short i1, + short i0) +{ + int16_t ALIGN_STRUCT(16) data[8] = {i0, i1, i2, i3, i4, i5, i6, i7}; + return vreinterpretq_m128i_s16(vld1q_s16(data)); +} + +// Sets the 16 signed 8-bit integer values in reverse order. +// https://msdn.microsoft.com/en-us/library/2khb9c7k(v=vs.90).aspx +FORCE_INLINE __m128i _mm_setr_epi8(signed char b0, + signed char b1, + signed char b2, + signed char b3, + signed char b4, + signed char b5, + signed char b6, + signed char b7, + signed char b8, + signed char b9, + signed char b10, + signed char b11, + signed char b12, + signed char b13, + signed char b14, + signed char b15) +{ + int8_t ALIGN_STRUCT(16) + data[16] = {(int8_t) b0, (int8_t) b1, (int8_t) b2, (int8_t) b3, + (int8_t) b4, (int8_t) b5, (int8_t) b6, (int8_t) b7, + (int8_t) b8, (int8_t) b9, (int8_t) b10, (int8_t) b11, + (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15}; + return (__m128i) vld1q_s8(data); +} + +// Sets the 4 signed 32-bit integer values to i. +// +// r0 := i +// r1 := i +// r2 := i +// r3 := I +// +// https://msdn.microsoft.com/en-us/library/vstudio/h4xscxat(v=vs.100).aspx +FORCE_INLINE __m128i _mm_set1_epi32(int _i) +{ + return vreinterpretq_m128i_s32(vdupq_n_s32(_i)); +} + +// Sets the 2 signed 64-bit integer values to i. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/whtfzhzk(v=vs.100) +FORCE_INLINE __m128i _mm_set1_epi64(__m64 _i) +{ + return vreinterpretq_m128i_s64(vdupq_n_s64((int64_t) _i)); +} + +// Sets the 2 signed 64-bit integer values to i. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x +FORCE_INLINE __m128i _mm_set1_epi64x(int64_t _i) +{ + return vreinterpretq_m128i_s64(vdupq_n_s64(_i)); +} + +// Sets the 4 signed 32-bit integer values. +// https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx +FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0) +{ + int32_t ALIGN_STRUCT(16) data[4] = {i0, i1, i2, i3}; + return vreinterpretq_m128i_s32(vld1q_s32(data)); +} + +// Returns the __m128i structure with its two 64-bit integer values +// initialized to the values of the two 64-bit integers passed in. +// https://msdn.microsoft.com/en-us/library/dk2sdw0h(v=vs.120).aspx +FORCE_INLINE __m128i _mm_set_epi64x(int64_t i1, int64_t i2) +{ + return vreinterpretq_m128i_s64( + vcombine_s64(vcreate_s64(i2), vcreate_s64(i1))); +} + +// Returns the __m128i structure with its two 64-bit integer values +// initialized to the values of the two 64-bit integers passed in. +// https://msdn.microsoft.com/en-us/library/dk2sdw0h(v=vs.120).aspx +FORCE_INLINE __m128i _mm_set_epi64(__m64 i1, __m64 i2) +{ + return _mm_set_epi64x((int64_t) i1, (int64_t) i2); +} + +// Set packed double-precision (64-bit) floating-point elements in dst with the +// supplied values. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd +FORCE_INLINE __m128d _mm_set_pd(double e1, double e0) +{ + double ALIGN_STRUCT(16) data[2] = {e0, e1}; +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vld1q_f64((float64_t *) data)); +#else + return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) data)); +#endif +} + +// Set packed double-precision (64-bit) floating-point elements in dst with the +// supplied values in reverse order. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd +FORCE_INLINE __m128d _mm_setr_pd(double e1, double e0) +{ + return _mm_set_pd(e0, e1); +} + +// Copy double-precision (64-bit) floating-point element a to the lower element +// of dst, and zero the upper element. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd +FORCE_INLINE __m128d _mm_set_sd(double a) +{ + return _mm_set_pd(0, a); +} + +// Broadcast double-precision (64-bit) floating-point value a to all elements of +// dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1 +#define _mm_set_pd1 _mm_set1_pd + +// Stores four single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx +FORCE_INLINE void _mm_store_ps(float *p, __m128 a) +{ + vst1q_f32(p, vreinterpretq_f32_m128(a)); +} + +// Store the lower single-precision (32-bit) floating-point element from a into +// 4 contiguous elements in memory. mem_addr must be aligned on a 16-byte +// boundary or a general-protection exception may be generated. +// +// MEM[mem_addr+31:mem_addr] := a[31:0] +// MEM[mem_addr+63:mem_addr+32] := a[31:0] +// MEM[mem_addr+95:mem_addr+64] := a[31:0] +// MEM[mem_addr+127:mem_addr+96] := a[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ps1 +FORCE_INLINE void _mm_store_ps1(float *p, __m128 a) +{ + float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + vst1q_f32(p, vdupq_n_f32(a0)); +} + +// Store the lower single-precision (32-bit) floating-point element from a into +// 4 contiguous elements in memory. mem_addr must be aligned on a 16-byte +// boundary or a general-protection exception may be generated. +// +// MEM[mem_addr+31:mem_addr] := a[31:0] +// MEM[mem_addr+63:mem_addr+32] := a[31:0] +// MEM[mem_addr+95:mem_addr+64] := a[31:0] +// MEM[mem_addr+127:mem_addr+96] := a[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_ps +#define _mm_store1_ps _mm_store_ps1 + +// Store 4 single-precision (32-bit) floating-point elements from a into memory +// in reverse order. mem_addr must be aligned on a 16-byte boundary or a +// general-protection exception may be generated. +// +// MEM[mem_addr+31:mem_addr] := a[127:96] +// MEM[mem_addr+63:mem_addr+32] := a[95:64] +// MEM[mem_addr+95:mem_addr+64] := a[63:32] +// MEM[mem_addr+127:mem_addr+96] := a[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_ps +FORCE_INLINE void _mm_storer_ps(float *p, __m128 a) +{ + float32x4_t tmp = vrev64q_f32(vreinterpretq_f32_m128(a)); + float32x4_t rev = vextq_f32(tmp, tmp, 2); + vst1q_f32(p, rev); +} + +// Stores four single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx +FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a) +{ + vst1q_f32(p, vreinterpretq_f32_m128(a)); +} + +// Stores four 32-bit integer values as (as a __m128i value) at the address p. +// https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx +FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a) +{ + vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a)); +} + +// Stores four 32-bit integer values as (as a __m128i value) at the address p. +// https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx +FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a) +{ + vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a)); +} + +// Stores the lower single - precision, floating - point value. +// https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx +FORCE_INLINE void _mm_store_ss(float *p, __m128 a) +{ + vst1q_lane_f32(p, vreinterpretq_f32_m128(a), 0); +} + +// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point +// elements) from a into memory. mem_addr must be aligned on a 16-byte boundary +// or a general-protection exception may be generated. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd +FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a) +{ +#if defined(__aarch64__) + vst1q_f64((float64_t *) mem_addr, vreinterpretq_f64_m128d(a)); +#else + vst1q_f32((float32_t *) mem_addr, vreinterpretq_f32_m128d(a)); +#endif +} + +// Store the upper double-precision (64-bit) floating-point element from a into +// memory. +// +// MEM[mem_addr+63:mem_addr] := a[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd +FORCE_INLINE void _mm_storeh_pd(double *mem_addr, __m128d a) +{ +#if defined(__aarch64__) + vst1_f64((float64_t *) mem_addr, vget_high_f64(vreinterpretq_f64_m128d(a))); +#else + vst1_f32((float32_t *) mem_addr, vget_high_f32(vreinterpretq_f32_m128d(a))); +#endif +} + +// Store the lower double-precision (64-bit) floating-point element from a into +// memory. +// +// MEM[mem_addr+63:mem_addr] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd +FORCE_INLINE void _mm_storel_pd(double *mem_addr, __m128d a) +{ +#if defined(__aarch64__) + vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a))); +#else + vst1_f32((float32_t *) mem_addr, vget_low_f32(vreinterpretq_f32_m128d(a))); +#endif +} + +// Store 2 double-precision (64-bit) floating-point elements from a into memory +// in reverse order. mem_addr must be aligned on a 16-byte boundary or a +// general-protection exception may be generated. +// +// MEM[mem_addr+63:mem_addr] := a[127:64] +// MEM[mem_addr+127:mem_addr+64] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd +FORCE_INLINE void _mm_storer_pd(double *mem_addr, __m128d a) +{ + float32x4_t f = vreinterpretq_f32_m128d(a); + _mm_store_pd(mem_addr, vreinterpretq_m128d_f32(vextq_f32(f, f, 2))); +} + +// Store the lower double-precision (64-bit) floating-point element from a into +// 2 contiguous elements in memory. mem_addr must be aligned on a 16-byte +// boundary or a general-protection exception may be generated. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1 +FORCE_INLINE void _mm_store_pd1(double *mem_addr, __m128d a) +{ +#if defined(__aarch64__) + float64x1_t a_low = vget_low_f64(vreinterpretq_f64_m128d(a)); + vst1q_f64((float64_t *) mem_addr, + vreinterpretq_f64_m128d(vcombine_f64(a_low, a_low))); +#else + float32x2_t a_low = vget_low_f32(vreinterpretq_f32_m128d(a)); + vst1q_f32((float32_t *) mem_addr, + vreinterpretq_f32_m128d(vcombine_f32(a_low, a_low))); +#endif +} + +// Store the lower double-precision (64-bit) floating-point element from a into +// 2 contiguous elements in memory. mem_addr must be aligned on a 16-byte +// boundary or a general-protection exception may be generated. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=9,526,5601&text=_mm_store1_pd +#define _mm_store1_pd _mm_store_pd1 + +// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point +// elements) from a into memory. mem_addr does not need to be aligned on any +// particular boundary. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd +FORCE_INLINE void _mm_storeu_pd(double *mem_addr, __m128d a) +{ + _mm_store_pd(mem_addr, a); +} + +// Reads the lower 64 bits of b and stores them into the lower 64 bits of a. +// https://msdn.microsoft.com/en-us/library/hhwf428f%28v=vs.90%29.aspx +FORCE_INLINE void _mm_storel_epi64(__m128i *a, __m128i b) +{ + uint64x1_t hi = vget_high_u64(vreinterpretq_u64_m128i(*a)); + uint64x1_t lo = vget_low_u64(vreinterpretq_u64_m128i(b)); + *a = vreinterpretq_m128i_u64(vcombine_u64(lo, hi)); +} + +// Stores the lower two single-precision floating point values of a to the +// address p. +// +// *p0 := a0 +// *p1 := a1 +// +// https://msdn.microsoft.com/en-us/library/h54t98ks(v=vs.90).aspx +FORCE_INLINE void _mm_storel_pi(__m64 *p, __m128 a) +{ + *p = vreinterpret_m64_f32(vget_low_f32(a)); +} + +// Stores the upper two single-precision, floating-point values of a to the +// address p. +// +// *p0 := a2 +// *p1 := a3 +// +// https://msdn.microsoft.com/en-us/library/a7525fs8(v%3dvs.90).aspx +FORCE_INLINE void _mm_storeh_pi(__m64 *p, __m128 a) +{ + *p = vreinterpret_m64_f32(vget_high_f32(a)); +} + +// Loads a single single-precision, floating-point value, copying it into all +// four words +// https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx +FORCE_INLINE __m128 _mm_load1_ps(const float *p) +{ + return vreinterpretq_m128_f32(vld1q_dup_f32(p)); +} + +// Load a single-precision (32-bit) floating-point element from memory into all +// elements of dst. +// +// dst[31:0] := MEM[mem_addr+31:mem_addr] +// dst[63:32] := MEM[mem_addr+31:mem_addr] +// dst[95:64] := MEM[mem_addr+31:mem_addr] +// dst[127:96] := MEM[mem_addr+31:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ps1 +#define _mm_load_ps1 _mm_load1_ps + +// Sets the lower two single-precision, floating-point values with 64 +// bits of data loaded from the address p; the upper two values are passed +// through from a. +// +// Return Value +// r0 := *p0 +// r1 := *p1 +// r2 := a2 +// r3 := a3 +// +// https://msdn.microsoft.com/en-us/library/s57cyak2(v=vs.100).aspx +FORCE_INLINE __m128 _mm_loadl_pi(__m128 a, __m64 const *p) +{ + return vreinterpretq_m128_f32( + vcombine_f32(vld1_f32((const float32_t *) p), vget_high_f32(a))); +} + +// Load 4 single-precision (32-bit) floating-point elements from memory into dst +// in reverse order. mem_addr must be aligned on a 16-byte boundary or a +// general-protection exception may be generated. +// +// dst[31:0] := MEM[mem_addr+127:mem_addr+96] +// dst[63:32] := MEM[mem_addr+95:mem_addr+64] +// dst[95:64] := MEM[mem_addr+63:mem_addr+32] +// dst[127:96] := MEM[mem_addr+31:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_ps +FORCE_INLINE __m128 _mm_loadr_ps(const float *p) +{ + float32x4_t v = vrev64q_f32(vld1q_f32(p)); + return vreinterpretq_m128_f32(vextq_f32(v, v, 2)); +} + +// Sets the upper two single-precision, floating-point values with 64 +// bits of data loaded from the address p; the lower two values are passed +// through from a. +// +// r0 := a0 +// r1 := a1 +// r2 := *p0 +// r3 := *p1 +// +// https://msdn.microsoft.com/en-us/library/w92wta0x(v%3dvs.100).aspx +FORCE_INLINE __m128 _mm_loadh_pi(__m128 a, __m64 const *p) +{ + return vreinterpretq_m128_f32( + vcombine_f32(vget_low_f32(a), vld1_f32((const float32_t *) p))); +} + +// Loads four single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx +FORCE_INLINE __m128 _mm_load_ps(const float *p) +{ + return vreinterpretq_m128_f32(vld1q_f32(p)); +} + +// Loads four single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx +FORCE_INLINE __m128 _mm_loadu_ps(const float *p) +{ + // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are + // equivalent for neon + return vreinterpretq_m128_f32(vld1q_f32(p)); +} + +// Load unaligned 16-bit integer from memory into the first element of dst. +// +// dst[15:0] := MEM[mem_addr+15:mem_addr] +// dst[MAX:16] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si16 +FORCE_INLINE __m128i _mm_loadu_si16(const void *p) +{ + return vreinterpretq_m128i_s16( + vsetq_lane_s16(*(const int16_t *) p, vdupq_n_s16(0), 0)); +} + +// Load unaligned 64-bit integer from memory into the first element of dst. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[MAX:64] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si64 +FORCE_INLINE __m128i _mm_loadu_si64(const void *p) +{ + return vreinterpretq_m128i_s64( + vcombine_s64(vld1_s64((const int64_t *) p), vdup_n_s64(0))); +} + +// Load a double-precision (64-bit) floating-point element from memory into the +// lower of dst, and zero the upper element. mem_addr does not need to be +// aligned on any particular boundary. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd +FORCE_INLINE __m128d _mm_load_sd(const double *p) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vsetq_lane_f64(*p, vdupq_n_f64(0), 0)); +#else + const float *fp = (const float *) p; + float ALIGN_STRUCT(16) data[4] = {fp[0], fp[1], 0, 0}; + return vreinterpretq_m128d_f32(vld1q_f32(data)); +#endif +} + +// Loads two double-precision from 16-byte aligned memory, floating-point +// values. +// +// dst[127:0] := MEM[mem_addr+127:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd +FORCE_INLINE __m128d _mm_load_pd(const double *p) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vld1q_f64(p)); +#else + const float *fp = (const float *) p; + float ALIGN_STRUCT(16) data[4] = {fp[0], fp[1], fp[2], fp[3]}; + return vreinterpretq_m128d_f32(vld1q_f32(data)); +#endif +} + +// Loads two double-precision from unaligned memory, floating-point values. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd +FORCE_INLINE __m128d _mm_loadu_pd(const double *p) +{ + return _mm_load_pd(p); +} + +// Loads an single - precision, floating - point value into the low word and +// clears the upper three words. +// https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx +FORCE_INLINE __m128 _mm_load_ss(const float *p) +{ + return vreinterpretq_m128_f32(vsetq_lane_f32(*p, vdupq_n_f32(0), 0)); +} + +// Load 64-bit integer from memory into the first element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64 +FORCE_INLINE __m128i _mm_loadl_epi64(__m128i const *p) +{ + /* Load the lower 64 bits of the value pointed to by p into the + * lower 64 bits of the result, zeroing the upper 64 bits of the result. + */ + return vreinterpretq_m128i_s32( + vcombine_s32(vld1_s32((int32_t const *) p), vcreate_s32(0))); +} + +// Load a double-precision (64-bit) floating-point element from memory into the +// lower element of dst, and copy the upper element from a to dst. mem_addr does +// not need to be aligned on any particular boundary. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := a[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd +FORCE_INLINE __m128d _mm_loadl_pd(__m128d a, const double *p) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vcombine_f64(vld1_f64(p), vget_high_f64(vreinterpretq_f64_m128d(a)))); +#else + return vreinterpretq_m128d_f32( + vcombine_f32(vld1_f32((const float *) p), + vget_high_f32(vreinterpretq_f32_m128d(a)))); +#endif +} + +// Load 2 double-precision (64-bit) floating-point elements from memory into dst +// in reverse order. mem_addr must be aligned on a 16-byte boundary or a +// general-protection exception may be generated. +// +// dst[63:0] := MEM[mem_addr+127:mem_addr+64] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd +FORCE_INLINE __m128d _mm_loadr_pd(const double *p) +{ +#if defined(__aarch64__) + float64x2_t v = vld1q_f64(p); + return vreinterpretq_m128d_f64(vextq_f64(v, v, 1)); +#else + int64x2_t v = vld1q_s64((const int64_t *) p); + return vreinterpretq_m128d_s64(vextq_s64(v, v, 1)); +#endif +} + +// Sets the low word to the single-precision, floating-point value of b +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/35hdzazd(v=vs.100) +FORCE_INLINE __m128 _mm_move_ss(__m128 a, __m128 b) +{ + return vreinterpretq_m128_f32( + vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), 0), + vreinterpretq_f32_m128(a), 0)); +} + +// Move the lower double-precision (64-bit) floating-point element from b to the +// lower element of dst, and copy the upper element from a to the upper element +// of dst. +// +// dst[63:0] := b[63:0] +// dst[127:64] := a[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd +FORCE_INLINE __m128d _mm_move_sd(__m128d a, __m128d b) +{ + return vreinterpretq_m128d_f32( + vcombine_f32(vget_low_f32(vreinterpretq_f32_m128d(b)), + vget_high_f32(vreinterpretq_f32_m128d(a)))); +} + +// Copy the lower 64-bit integer in a to the lower element of dst, and zero the +// upper element. +// +// dst[63:0] := a[63:0] +// dst[127:64] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64 +FORCE_INLINE __m128i _mm_move_epi64(__m128i a) +{ + return vreinterpretq_m128i_s64( + vsetq_lane_s64(0, vreinterpretq_s64_m128i(a), 1)); +} + +// Return vector of type __m128 with undefined elements. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ps +FORCE_INLINE __m128 _mm_undefined_ps(void) +{ +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuninitialized" +#endif + __m128 a; + return a; +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif +} + +/* Logic/Binary operations */ + +// Computes the bitwise AND-NOT of the four single-precision, floating-point +// values of a and b. +// +// r0 := ~a0 & b0 +// r1 := ~a1 & b1 +// r2 := ~a2 & b2 +// r3 := ~a3 & b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx +FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_s32( + vbicq_s32(vreinterpretq_s32_m128(b), + vreinterpretq_s32_m128(a))); // *NOTE* argument swap +} + +// Compute the bitwise NOT of packed double-precision (64-bit) floating-point +// elements in a and then AND with b, and store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd +FORCE_INLINE __m128d _mm_andnot_pd(__m128d a, __m128d b) +{ + // *NOTE* argument swap + return vreinterpretq_m128d_s64( + vbicq_s64(vreinterpretq_s64_m128d(b), vreinterpretq_s64_m128d(a))); +} + +// Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the +// 128-bit value in a. +// +// r := (~a) & b +// +// https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx +FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vbicq_s32(vreinterpretq_s32_m128i(b), + vreinterpretq_s32_m128i(a))); // *NOTE* argument swap +} + +// Computes the bitwise AND of the 128-bit value in a and the 128-bit value in +// b. +// +// r := a & b +// +// https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx +FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vandq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Computes the bitwise AND of the four single-precision, floating-point values +// of a and b. +// +// r0 := a0 & b0 +// r1 := a1 & b1 +// r2 := a2 & b2 +// r3 := a3 & b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx +FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_s32( + vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b))); +} + +// Compute the bitwise AND of packed double-precision (64-bit) floating-point +// elements in a and b, and store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// dst[i+63:i] := a[i+63:i] AND b[i+63:i] +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd +FORCE_INLINE __m128d _mm_and_pd(__m128d a, __m128d b) +{ + return vreinterpretq_m128d_s64( + vandq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b))); +} + +// Computes the bitwise OR of the four single-precision, floating-point values +// of a and b. +// https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx +FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_s32( + vorrq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b))); +} + +// Computes bitwise EXOR (exclusive-or) of the four single-precision, +// floating-point values of a and b. +// https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx +FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_s32( + veorq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b))); +} + +// Compute the bitwise XOR of packed double-precision (64-bit) floating-point +// elements in a and b, and store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd +FORCE_INLINE __m128d _mm_xor_pd(__m128d a, __m128d b) +{ + return vreinterpretq_m128d_s64( + veorq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b))); +} + +// Compute the bitwise OR of packed double-precision (64-bit) floating-point +// elements in a and b, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_or_pd +FORCE_INLINE __m128d _mm_or_pd(__m128d a, __m128d b) +{ + return vreinterpretq_m128d_s64( + vorrq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b))); +} + +// Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b. +// +// r := a | b +// +// https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx +FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in +// b. https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx +FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + veorq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Duplicate the low double-precision (64-bit) floating-point element from a, +// and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movedup_pd +FORCE_INLINE __m128d _mm_movedup_pd(__m128d a) +{ +#if (__aarch64__) + return vreinterpretq_m128d_f64( + vdupq_laneq_f64(vreinterpretq_f64_m128d(a), 0)); +#else + return vreinterpretq_m128d_u64( + vdupq_n_u64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0))); +#endif +} + +// Duplicate odd-indexed single-precision (32-bit) floating-point elements +// from a, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps +FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a) +{ +#if __has_builtin(__builtin_shufflevector) + return vreinterpretq_m128_f32(__builtin_shufflevector( + vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 1, 1, 3, 3)); +#else + float32_t a1 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 1); + float32_t a3 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 3); + float ALIGN_STRUCT(16) data[4] = {a1, a1, a3, a3}; + return vreinterpretq_m128_f32(vld1q_f32(data)); +#endif +} + +// Duplicate even-indexed single-precision (32-bit) floating-point elements +// from a, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps +FORCE_INLINE __m128 _mm_moveldup_ps(__m128 a) +{ +#if __has_builtin(__builtin_shufflevector) + return vreinterpretq_m128_f32(__builtin_shufflevector( + vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 0, 0, 2, 2)); +#else + float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + float32_t a2 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 2); + float ALIGN_STRUCT(16) data[4] = {a0, a0, a2, a2}; + return vreinterpretq_m128_f32(vld1q_f32(data)); +#endif +} + +// Moves the upper two values of B into the lower two values of A. +// +// r3 := a3 +// r2 := a2 +// r1 := b3 +// r0 := b2 +FORCE_INLINE __m128 _mm_movehl_ps(__m128 __A, __m128 __B) +{ + float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(__A)); + float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(__B)); + return vreinterpretq_m128_f32(vcombine_f32(b32, a32)); +} + +// Moves the lower two values of B into the upper two values of A. +// +// r3 := b1 +// r2 := b0 +// r1 := a1 +// r0 := a0 +FORCE_INLINE __m128 _mm_movelh_ps(__m128 __A, __m128 __B) +{ + float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(__A)); + float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(__B)); + return vreinterpretq_m128_f32(vcombine_f32(a10, b10)); +} + +// Compute the absolute value of packed signed 32-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 3 +// i := j*32 +// dst[i+31:i] := ABS(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32 +FORCE_INLINE __m128i _mm_abs_epi32(__m128i a) +{ + return vreinterpretq_m128i_s32(vabsq_s32(vreinterpretq_s32_m128i(a))); +} + +// Compute the absolute value of packed signed 16-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// dst[i+15:i] := ABS(a[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16 +FORCE_INLINE __m128i _mm_abs_epi16(__m128i a) +{ + return vreinterpretq_m128i_s16(vabsq_s16(vreinterpretq_s16_m128i(a))); +} + +// Compute the absolute value of packed signed 8-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 15 +// i := j*8 +// dst[i+7:i] := ABS(a[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8 +FORCE_INLINE __m128i _mm_abs_epi8(__m128i a) +{ + return vreinterpretq_m128i_s8(vabsq_s8(vreinterpretq_s8_m128i(a))); +} + +// Compute the absolute value of packed signed 32-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 1 +// i := j*32 +// dst[i+31:i] := ABS(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi32 +FORCE_INLINE __m64 _mm_abs_pi32(__m64 a) +{ + return vreinterpret_m64_s32(vabs_s32(vreinterpret_s32_m64(a))); +} + +// Compute the absolute value of packed signed 16-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := ABS(a[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi16 +FORCE_INLINE __m64 _mm_abs_pi16(__m64 a) +{ + return vreinterpret_m64_s16(vabs_s16(vreinterpret_s16_m64(a))); +} + +// Compute the absolute value of packed signed 8-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := ABS(a[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi8 +FORCE_INLINE __m64 _mm_abs_pi8(__m64 a) +{ + return vreinterpret_m64_s8(vabs_s8(vreinterpret_s8_m64(a))); +} + +// Concatenate 16-byte blocks in a and b into a 32-byte temporary result, shift +// the result right by imm8 bytes, and store the low 16 bytes in dst. +// +// tmp[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) +// dst[127:0] := tmp[127:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8 +#define _mm_alignr_epi8(a, b, imm) \ + __extension__({ \ + __m128i ret; \ + if (unlikely((imm) >= 32)) { \ + ret = _mm_setzero_si128(); \ + } else { \ + uint8x16_t tmp_low, tmp_high; \ + if (imm >= 16) { \ + const int idx = imm - 16; \ + tmp_low = vreinterpretq_u8_m128i(a); \ + tmp_high = vdupq_n_u8(0); \ + ret = \ + vreinterpretq_m128i_u8(vextq_u8(tmp_low, tmp_high, idx)); \ + } else { \ + const int idx = imm; \ + tmp_low = vreinterpretq_u8_m128i(b); \ + tmp_high = vreinterpretq_u8_m128i(a); \ + ret = \ + vreinterpretq_m128i_u8(vextq_u8(tmp_low, tmp_high, idx)); \ + } \ + } \ + ret; \ + }) + +// Concatenate 8-byte blocks in a and b into a 16-byte temporary result, shift +// the result right by imm8 bytes, and store the low 8 bytes in dst. +// +// tmp[127:0] := ((a[63:0] << 64)[127:0] OR b[63:0]) >> (imm8*8) +// dst[63:0] := tmp[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_pi8 +#define _mm_alignr_pi8(a, b, imm) \ + __extension__({ \ + __m64 ret; \ + if (unlikely((imm) >= 16)) { \ + ret = vreinterpret_m64_s8(vdup_n_s8(0)); \ + } else { \ + uint8x8_t tmp_low, tmp_high; \ + if (imm >= 8) { \ + const int idx = imm - 8; \ + tmp_low = vreinterpret_u8_m64(a); \ + tmp_high = vdup_n_u8(0); \ + ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \ + } else { \ + const int idx = imm; \ + tmp_low = vreinterpret_u8_m64(b); \ + tmp_high = vreinterpret_u8_m64(a); \ + ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \ + } \ + } \ + ret; \ + }) + +// Takes the upper 64 bits of a and places it in the low end of the result +// Takes the lower 64 bits of b and places it into the high end of the result. +FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b) +{ + float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); + float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32(vcombine_f32(a32, b10)); +} + +// takes the lower two 32-bit values from a and swaps them and places in high +// end of result takes the higher two 32 bit values from b and swaps them and +// places in low end of result. +FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b) +{ + float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); + float32x2_t b23 = vrev64_f32(vget_high_f32(vreinterpretq_f32_m128(b))); + return vreinterpretq_m128_f32(vcombine_f32(a01, b23)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_0321(__m128 a, __m128 b) +{ + float32x2_t a21 = vget_high_f32( + vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3)); + float32x2_t b03 = vget_low_f32( + vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3)); + return vreinterpretq_m128_f32(vcombine_f32(a21, b03)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2103(__m128 a, __m128 b) +{ + float32x2_t a03 = vget_low_f32( + vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3)); + float32x2_t b21 = vget_high_f32( + vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3)); + return vreinterpretq_m128_f32(vcombine_f32(a03, b21)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_1010(__m128 a, __m128 b) +{ + float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); + float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32(vcombine_f32(a10, b10)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_1001(__m128 a, __m128 b) +{ + float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); + float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32(vcombine_f32(a01, b10)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_0101(__m128 a, __m128 b) +{ + float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); + float32x2_t b01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(b))); + return vreinterpretq_m128_f32(vcombine_f32(a01, b01)); +} + +// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the +// high +FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b) +{ + float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); + float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32(vcombine_f32(a10, b32)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b) +{ + float32x2_t a11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 1); + float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); + return vreinterpretq_m128_f32(vcombine_f32(a11, b00)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b) +{ + float32x2_t a22 = + vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0); + float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); + return vreinterpretq_m128_f32(vcombine_f32(a22, b00)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b) +{ + float32x2_t a00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 0); + float32x2_t b22 = + vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(b)), 0); + return vreinterpretq_m128_f32(vcombine_f32(a00, b22)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b) +{ + float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + float32x2_t a22 = + vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0); + float32x2_t a02 = vset_lane_f32(a0, a22, 1); /* TODO: use vzip ?*/ + float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32(vcombine_f32(a02, b32)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b) +{ + float32x2_t a33 = + vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 1); + float32x2_t b11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 1); + return vreinterpretq_m128_f32(vcombine_f32(a33, b11)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b) +{ + float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); + float32_t b2 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 2); + float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); + float32x2_t b20 = vset_lane_f32(b2, b00, 1); + return vreinterpretq_m128_f32(vcombine_f32(a10, b20)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b) +{ + float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); + float32_t b2 = vgetq_lane_f32(b, 2); + float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); + float32x2_t b20 = vset_lane_f32(b2, b00, 1); + return vreinterpretq_m128_f32(vcombine_f32(a01, b20)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b) +{ + float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); + float32_t b2 = vgetq_lane_f32(b, 2); + float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); + float32x2_t b20 = vset_lane_f32(b2, b00, 1); + return vreinterpretq_m128_f32(vcombine_f32(a32, b20)); +} + +// NEON does not support a general purpose permute intrinsic +// Selects four specific single-precision, floating-point values from a and b, +// based on the mask i. +// +// C equivalent: +// __m128 _mm_shuffle_ps_default(__m128 a, __m128 b, +// __constrange(0, 255) int imm) { +// __m128 ret; +// ret[0] = a[imm & 0x3]; ret[1] = a[(imm >> 2) & 0x3]; +// ret[2] = b[(imm >> 4) & 0x03]; ret[3] = b[(imm >> 6) & 0x03]; +// return ret; +// } +// +// https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx +#define _mm_shuffle_ps_default(a, b, imm) \ + __extension__({ \ + float32x4_t ret; \ + ret = vmovq_n_f32( \ + vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & (0x3))); \ + ret = vsetq_lane_f32( \ + vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), \ + ret, 1); \ + ret = vsetq_lane_f32( \ + vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3), \ + ret, 2); \ + ret = vsetq_lane_f32( \ + vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3), \ + ret, 3); \ + vreinterpretq_m128_f32(ret); \ + }) + +// FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255) +// int imm) +#if __has_builtin(__builtin_shufflevector) +#define _mm_shuffle_ps(a, b, imm) \ + __extension__({ \ + float32x4_t _input1 = vreinterpretq_f32_m128(a); \ + float32x4_t _input2 = vreinterpretq_f32_m128(b); \ + float32x4_t _shuf = __builtin_shufflevector( \ + _input1, _input2, (imm) & (0x3), ((imm) >> 2) & 0x3, \ + (((imm) >> 4) & 0x3) + 4, (((imm) >> 6) & 0x3) + 4); \ + vreinterpretq_m128_f32(_shuf); \ + }) +#else // generic +#define _mm_shuffle_ps(a, b, imm) \ + __extension__({ \ + __m128 ret; \ + switch (imm) { \ + case _MM_SHUFFLE(1, 0, 3, 2): \ + ret = _mm_shuffle_ps_1032((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 3, 0, 1): \ + ret = _mm_shuffle_ps_2301((a), (b)); \ + break; \ + case _MM_SHUFFLE(0, 3, 2, 1): \ + ret = _mm_shuffle_ps_0321((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 1, 0, 3): \ + ret = _mm_shuffle_ps_2103((a), (b)); \ + break; \ + case _MM_SHUFFLE(1, 0, 1, 0): \ + ret = _mm_movelh_ps((a), (b)); \ + break; \ + case _MM_SHUFFLE(1, 0, 0, 1): \ + ret = _mm_shuffle_ps_1001((a), (b)); \ + break; \ + case _MM_SHUFFLE(0, 1, 0, 1): \ + ret = _mm_shuffle_ps_0101((a), (b)); \ + break; \ + case _MM_SHUFFLE(3, 2, 1, 0): \ + ret = _mm_shuffle_ps_3210((a), (b)); \ + break; \ + case _MM_SHUFFLE(0, 0, 1, 1): \ + ret = _mm_shuffle_ps_0011((a), (b)); \ + break; \ + case _MM_SHUFFLE(0, 0, 2, 2): \ + ret = _mm_shuffle_ps_0022((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 2, 0, 0): \ + ret = _mm_shuffle_ps_2200((a), (b)); \ + break; \ + case _MM_SHUFFLE(3, 2, 0, 2): \ + ret = _mm_shuffle_ps_3202((a), (b)); \ + break; \ + case _MM_SHUFFLE(3, 2, 3, 2): \ + ret = _mm_movehl_ps((b), (a)); \ + break; \ + case _MM_SHUFFLE(1, 1, 3, 3): \ + ret = _mm_shuffle_ps_1133((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 0, 1, 0): \ + ret = _mm_shuffle_ps_2010((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 0, 0, 1): \ + ret = _mm_shuffle_ps_2001((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 0, 3, 2): \ + ret = _mm_shuffle_ps_2032((a), (b)); \ + break; \ + default: \ + ret = _mm_shuffle_ps_default((a), (b), (imm)); \ + break; \ + } \ + ret; \ + }) +#endif + +// Takes the upper 64 bits of a and places it in the low end of the result +// Takes the lower 64 bits of a and places it into the high end of the result. +FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a) +{ + int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a)); + int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); + return vreinterpretq_m128i_s32(vcombine_s32(a32, a10)); +} + +// takes the lower two 32-bit values from a and swaps them and places in low end +// of result takes the higher two 32 bit values from a and swaps them and places +// in high end of result. +FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a) +{ + int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); + int32x2_t a23 = vrev64_s32(vget_high_s32(vreinterpretq_s32_m128i(a))); + return vreinterpretq_m128i_s32(vcombine_s32(a01, a23)); +} + +// rotates the least significant 32 bits into the most signficant 32 bits, and +// shifts the rest down +FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a) +{ + return vreinterpretq_m128i_s32( + vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 1)); +} + +// rotates the most significant 32 bits into the least signficant 32 bits, and +// shifts the rest up +FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a) +{ + return vreinterpretq_m128i_s32( + vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 3)); +} + +// gets the lower 64 bits of a, and places it in the upper 64 bits +// gets the lower 64 bits of a and places it in the lower 64 bits +FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a) +{ + int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); + return vreinterpretq_m128i_s32(vcombine_s32(a10, a10)); +} + +// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the +// lower 64 bits gets the lower 64 bits of a, and places it in the upper 64 bits +FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a) +{ + int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); + int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); + return vreinterpretq_m128i_s32(vcombine_s32(a01, a10)); +} + +// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the +// upper 64 bits gets the lower 64 bits of a, swaps the 0 and 1 elements, and +// places it in the lower 64 bits +FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a) +{ + int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); + return vreinterpretq_m128i_s32(vcombine_s32(a01, a01)); +} + +FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a) +{ + int32x2_t a11 = vdup_lane_s32(vget_low_s32(vreinterpretq_s32_m128i(a)), 1); + int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0); + return vreinterpretq_m128i_s32(vcombine_s32(a11, a22)); +} + +FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a) +{ + int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0); + int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); + return vreinterpretq_m128i_s32(vcombine_s32(a22, a01)); +} + +FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a) +{ + int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a)); + int32x2_t a33 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 1); + return vreinterpretq_m128i_s32(vcombine_s32(a32, a33)); +} + +// Shuffle packed 8-bit integers in a according to shuffle control mask in the +// corresponding 8-bit element of b, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8 +FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b) +{ + int8x16_t tbl = vreinterpretq_s8_m128i(a); // input a + uint8x16_t idx = vreinterpretq_u8_m128i(b); // input b + uint8x16_t idx_masked = + vandq_u8(idx, vdupq_n_u8(0x8F)); // avoid using meaningless bits +#if defined(__aarch64__) + return vreinterpretq_m128i_s8(vqtbl1q_s8(tbl, idx_masked)); +#elif defined(__GNUC__) + int8x16_t ret; + // %e and %f represent the even and odd D registers + // respectively. + __asm__ __volatile__( + "vtbl.8 %e[ret], {%e[tbl], %f[tbl]}, %e[idx]\n" + "vtbl.8 %f[ret], {%e[tbl], %f[tbl]}, %f[idx]\n" + : [ret] "=&w"(ret) + : [tbl] "w"(tbl), [idx] "w"(idx_masked)); + return vreinterpretq_m128i_s8(ret); +#else + // use this line if testing on aarch64 + int8x8x2_t a_split = {vget_low_s8(tbl), vget_high_s8(tbl)}; + return vreinterpretq_m128i_s8( + vcombine_s8(vtbl2_s8(a_split, vget_low_u8(idx_masked)), + vtbl2_s8(a_split, vget_high_u8(idx_masked)))); +#endif +} + +// C equivalent: +// __m128i _mm_shuffle_epi32_default(__m128i a, +// __constrange(0, 255) int imm) { +// __m128i ret; +// ret[0] = a[imm & 0x3]; ret[1] = a[(imm >> 2) & 0x3]; +// ret[2] = a[(imm >> 4) & 0x03]; ret[3] = a[(imm >> 6) & 0x03]; +// return ret; +// } +#define _mm_shuffle_epi32_default(a, imm) \ + __extension__({ \ + int32x4_t ret; \ + ret = vmovq_n_s32( \ + vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm) & (0x3))); \ + ret = vsetq_lane_s32( \ + vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 2) & 0x3), \ + ret, 1); \ + ret = vsetq_lane_s32( \ + vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), \ + ret, 2); \ + ret = vsetq_lane_s32( \ + vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3), \ + ret, 3); \ + vreinterpretq_m128i_s32(ret); \ + }) + +// FORCE_INLINE __m128i _mm_shuffle_epi32_splat(__m128i a, __constrange(0,255) +// int imm) +#if defined(__aarch64__) +#define _mm_shuffle_epi32_splat(a, imm) \ + __extension__({ \ + vreinterpretq_m128i_s32( \ + vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm))); \ + }) +#else +#define _mm_shuffle_epi32_splat(a, imm) \ + __extension__({ \ + vreinterpretq_m128i_s32( \ + vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)))); \ + }) +#endif + +// Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. +// https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx +// FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a, +// __constrange(0,255) int imm) +#if __has_builtin(__builtin_shufflevector) +#define _mm_shuffle_epi32(a, imm) \ + __extension__({ \ + int32x4_t _input = vreinterpretq_s32_m128i(a); \ + int32x4_t _shuf = __builtin_shufflevector( \ + _input, _input, (imm) & (0x3), ((imm) >> 2) & 0x3, \ + ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); \ + vreinterpretq_m128i_s32(_shuf); \ + }) +#else // generic +#define _mm_shuffle_epi32(a, imm) \ + __extension__({ \ + __m128i ret; \ + switch (imm) { \ + case _MM_SHUFFLE(1, 0, 3, 2): \ + ret = _mm_shuffle_epi_1032((a)); \ + break; \ + case _MM_SHUFFLE(2, 3, 0, 1): \ + ret = _mm_shuffle_epi_2301((a)); \ + break; \ + case _MM_SHUFFLE(0, 3, 2, 1): \ + ret = _mm_shuffle_epi_0321((a)); \ + break; \ + case _MM_SHUFFLE(2, 1, 0, 3): \ + ret = _mm_shuffle_epi_2103((a)); \ + break; \ + case _MM_SHUFFLE(1, 0, 1, 0): \ + ret = _mm_shuffle_epi_1010((a)); \ + break; \ + case _MM_SHUFFLE(1, 0, 0, 1): \ + ret = _mm_shuffle_epi_1001((a)); \ + break; \ + case _MM_SHUFFLE(0, 1, 0, 1): \ + ret = _mm_shuffle_epi_0101((a)); \ + break; \ + case _MM_SHUFFLE(2, 2, 1, 1): \ + ret = _mm_shuffle_epi_2211((a)); \ + break; \ + case _MM_SHUFFLE(0, 1, 2, 2): \ + ret = _mm_shuffle_epi_0122((a)); \ + break; \ + case _MM_SHUFFLE(3, 3, 3, 2): \ + ret = _mm_shuffle_epi_3332((a)); \ + break; \ + case _MM_SHUFFLE(0, 0, 0, 0): \ + ret = _mm_shuffle_epi32_splat((a), 0); \ + break; \ + case _MM_SHUFFLE(1, 1, 1, 1): \ + ret = _mm_shuffle_epi32_splat((a), 1); \ + break; \ + case _MM_SHUFFLE(2, 2, 2, 2): \ + ret = _mm_shuffle_epi32_splat((a), 2); \ + break; \ + case _MM_SHUFFLE(3, 3, 3, 3): \ + ret = _mm_shuffle_epi32_splat((a), 3); \ + break; \ + default: \ + ret = _mm_shuffle_epi32_default((a), (imm)); \ + break; \ + } \ + ret; \ + }) +#endif + +// Shuffles the lower 4 signed or unsigned 16-bit integers in a as specified +// by imm. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/y41dkk37(v=vs.100) +// FORCE_INLINE __m128i _mm_shufflelo_epi16_function(__m128i a, +// __constrange(0,255) int +// imm) +#define _mm_shufflelo_epi16_function(a, imm) \ + __extension__({ \ + int16x8_t ret = vreinterpretq_s16_m128i(a); \ + int16x4_t lowBits = vget_low_s16(ret); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm) & (0x3)), ret, 0); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 2) & 0x3), ret, \ + 1); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 4) & 0x3), ret, \ + 2); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 6) & 0x3), ret, \ + 3); \ + vreinterpretq_m128i_s16(ret); \ + }) + +// FORCE_INLINE __m128i _mm_shufflelo_epi16(__m128i a, +// __constrange(0,255) int imm) +#if __has_builtin(__builtin_shufflevector) +#define _mm_shufflelo_epi16(a, imm) \ + __extension__({ \ + int16x8_t _input = vreinterpretq_s16_m128i(a); \ + int16x8_t _shuf = __builtin_shufflevector( \ + _input, _input, ((imm) & (0x3)), (((imm) >> 2) & 0x3), \ + (((imm) >> 4) & 0x3), (((imm) >> 6) & 0x3), 4, 5, 6, 7); \ + vreinterpretq_m128i_s16(_shuf); \ + }) +#else // generic +#define _mm_shufflelo_epi16(a, imm) _mm_shufflelo_epi16_function((a), (imm)) +#endif + +// Shuffles the upper 4 signed or unsigned 16-bit integers in a as specified +// by imm. +// https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx +// FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a, +// __constrange(0,255) int +// imm) +#define _mm_shufflehi_epi16_function(a, imm) \ + __extension__({ \ + int16x8_t ret = vreinterpretq_s16_m128i(a); \ + int16x4_t highBits = vget_high_s16(ret); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & (0x3)), ret, 4); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, \ + 5); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, \ + 6); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, \ + 7); \ + vreinterpretq_m128i_s16(ret); \ + }) + +// FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a, +// __constrange(0,255) int imm) +#if __has_builtin(__builtin_shufflevector) +#define _mm_shufflehi_epi16(a, imm) \ + __extension__({ \ + int16x8_t _input = vreinterpretq_s16_m128i(a); \ + int16x8_t _shuf = __builtin_shufflevector( \ + _input, _input, 0, 1, 2, 3, ((imm) & (0x3)) + 4, \ + (((imm) >> 2) & 0x3) + 4, (((imm) >> 4) & 0x3) + 4, \ + (((imm) >> 6) & 0x3) + 4); \ + vreinterpretq_m128i_s16(_shuf); \ + }) +#else // generic +#define _mm_shufflehi_epi16(a, imm) _mm_shufflehi_epi16_function((a), (imm)) +#endif + +// Shuffle double-precision (64-bit) floating-point elements using the control +// in imm8, and store the results in dst. +// +// dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +// dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd +#if __has_builtin(__builtin_shufflevector) +#define _mm_shuffle_pd(a, b, imm8) \ + vreinterpretq_m128d_s64(__builtin_shufflevector( \ + vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b), imm8 & 0x1, \ + ((imm8 & 0x2) >> 1) + 2)) +#else +#define _mm_shuffle_pd(a, b, imm8) \ + _mm_castsi128_pd(_mm_set_epi64x( \ + vgetq_lane_s64(vreinterpretq_s64_m128d(b), (imm8 & 0x2) >> 1), \ + vgetq_lane_s64(vreinterpretq_s64_m128d(a), imm8 & 0x1))) +#endif + +// Blend packed 16-bit integers from a and b using control mask imm8, and store +// the results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// IF imm8[j] +// dst[i+15:i] := b[i+15:i] +// ELSE +// dst[i+15:i] := a[i+15:i] +// FI +// ENDFOR +// FORCE_INLINE __m128i _mm_blend_epi16(__m128i a, __m128i b, +// __constrange(0,255) int imm) +#define _mm_blend_epi16(a, b, imm) \ + __extension__({ \ + const uint16_t _mask[8] = {((imm) & (1 << 0)) ? 0xFFFF : 0x0000, \ + ((imm) & (1 << 1)) ? 0xFFFF : 0x0000, \ + ((imm) & (1 << 2)) ? 0xFFFF : 0x0000, \ + ((imm) & (1 << 3)) ? 0xFFFF : 0x0000, \ + ((imm) & (1 << 4)) ? 0xFFFF : 0x0000, \ + ((imm) & (1 << 5)) ? 0xFFFF : 0x0000, \ + ((imm) & (1 << 6)) ? 0xFFFF : 0x0000, \ + ((imm) & (1 << 7)) ? 0xFFFF : 0x0000}; \ + uint16x8_t _mask_vec = vld1q_u16(_mask); \ + uint16x8_t _a = vreinterpretq_u16_m128i(a); \ + uint16x8_t _b = vreinterpretq_u16_m128i(b); \ + vreinterpretq_m128i_u16(vbslq_u16(_mask_vec, _b, _a)); \ + }) + +// Blend packed 8-bit integers from a and b using mask, and store the results in +// dst. +// +// FOR j := 0 to 15 +// i := j*8 +// IF mask[i+7] +// dst[i+7:i] := b[i+7:i] +// ELSE +// dst[i+7:i] := a[i+7:i] +// FI +// ENDFOR +FORCE_INLINE __m128i _mm_blendv_epi8(__m128i _a, __m128i _b, __m128i _mask) +{ + // Use a signed shift right to create a mask with the sign bit + uint8x16_t mask = + vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_m128i(_mask), 7)); + uint8x16_t a = vreinterpretq_u8_m128i(_a); + uint8x16_t b = vreinterpretq_u8_m128i(_b); + return vreinterpretq_m128i_u8(vbslq_u8(mask, b, a)); +} + +/* Shifts */ + + +// Shift packed 16-bit integers in a right by imm while shifting in sign +// bits, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16 +FORCE_INLINE __m128i _mm_srai_epi16(__m128i a, int imm) +{ + const int count = (imm & ~15) ? 15 : imm; + return (__m128i) vshlq_s16((int16x8_t) a, vdupq_n_s16(-count)); +} + +// Shifts the 8 signed or unsigned 16-bit integers in a left by count bits while +// shifting in zeros. +// +// r0 := a0 << count +// r1 := a1 << count +// ... +// r7 := a7 << count +// +// https://msdn.microsoft.com/en-us/library/es73bcsy(v=vs.90).aspx +#define _mm_slli_epi16(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (unlikely((imm)) <= 0) { \ + ret = a; \ + } \ + if (unlikely((imm) > 15)) { \ + ret = _mm_setzero_si128(); \ + } else { \ + ret = vreinterpretq_m128i_s16( \ + vshlq_n_s16(vreinterpretq_s16_m128i(a), (imm))); \ + } \ + ret; \ + }) + +// Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while +// shifting in zeros. : +// https://msdn.microsoft.com/en-us/library/z2k3bbtb%28v=vs.90%29.aspx +// FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, __constrange(0,255) int imm) +FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, int imm) +{ + if (unlikely(imm <= 0)) /* TODO: add constant range macro: [0, 255] */ + return a; + if (unlikely(imm > 31)) + return _mm_setzero_si128(); + return vreinterpretq_m128i_s32( + vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(imm))); +} + +// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and +// store the results in dst. +FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm) +{ + if (unlikely(imm <= 0)) /* TODO: add constant range macro: [0, 255] */ + return a; + if (unlikely(imm > 63)) + return _mm_setzero_si128(); + return vreinterpretq_m128i_s64( + vshlq_s64(vreinterpretq_s64_m128i(a), vdupq_n_s64(imm))); +} + +// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// IF imm8[7:0] > 15 +// dst[i+15:i] := 0 +// ELSE +// dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16 +#define _mm_srli_epi16(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (unlikely(imm) == 0) { \ + ret = a; \ + } \ + if (likely(0 < (imm) && (imm) < 16)) { \ + ret = vreinterpretq_m128i_u16( \ + vshlq_u16(vreinterpretq_u16_m128i(a), vdupq_n_s16(-imm))); \ + } else { \ + ret = _mm_setzero_si128(); \ + } \ + ret; \ + }) + +// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 3 +// i := j*32 +// IF imm8[7:0] > 31 +// dst[i+31:i] := 0 +// ELSE +// dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32 +// FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, __constrange(0,255) int imm) +#define _mm_srli_epi32(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (unlikely((imm) == 0)) { \ + ret = a; \ + } \ + if (likely(0 < (imm) && (imm) < 32)) { \ + ret = vreinterpretq_m128i_u32( \ + vshlq_u32(vreinterpretq_u32_m128i(a), vdupq_n_s32(-imm))); \ + } else { \ + ret = _mm_setzero_si128(); \ + } \ + ret; \ + }) + +// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// IF imm8[7:0] > 63 +// dst[i+63:i] := 0 +// ELSE +// dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64 +#define _mm_srli_epi64(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (unlikely((imm) == 0)) { \ + ret = a; \ + } \ + if (likely(0 < (imm) && (imm) < 64)) { \ + ret = vreinterpretq_m128i_u64( \ + vshlq_u64(vreinterpretq_u64_m128i(a), vdupq_n_s64(-imm))); \ + } else { \ + ret = _mm_setzero_si128(); \ + } \ + ret; \ + }) + +// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, +// and store the results in dst. +// +// FOR j := 0 to 3 +// i := j*32 +// IF imm8[7:0] > 31 +// dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) +// ELSE +// dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32 +// FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm) +#define _mm_srai_epi32(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (unlikely((imm) == 0)) { \ + ret = a; \ + } \ + if (likely(0 < (imm) && (imm) < 32)) { \ + ret = vreinterpretq_m128i_s32( \ + vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(-imm))); \ + } else { \ + ret = vreinterpretq_m128i_s32( \ + vshrq_n_s32(vreinterpretq_s32_m128i(a), 31)); \ + } \ + ret; \ + }) + +// Shifts the 128 - bit value in a right by imm bytes while shifting in +// zeros.imm must be an immediate. +// +// r := srl(a, imm*8) +// +// https://msdn.microsoft.com/en-us/library/305w28yz(v=vs.100).aspx +// FORCE_INLINE _mm_srli_si128(__m128i a, __constrange(0,255) int imm) +#define _mm_srli_si128(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (unlikely((imm) <= 0)) { \ + ret = a; \ + } \ + if (unlikely((imm) > 15)) { \ + ret = _mm_setzero_si128(); \ + } else { \ + ret = vreinterpretq_m128i_s8( \ + vextq_s8(vreinterpretq_s8_m128i(a), vdupq_n_s8(0), (imm))); \ + } \ + ret; \ + }) + +// Shifts the 128-bit value in a left by imm bytes while shifting in zeros. imm +// must be an immediate. +// +// r := a << (imm * 8) +// +// https://msdn.microsoft.com/en-us/library/34d3k2kt(v=vs.100).aspx +// FORCE_INLINE __m128i _mm_slli_si128(__m128i a, __constrange(0,255) int imm) +#define _mm_slli_si128(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (unlikely((imm) <= 0)) { \ + ret = a; \ + } \ + if (unlikely((imm) > 15)) { \ + ret = _mm_setzero_si128(); \ + } else { \ + ret = vreinterpretq_m128i_s8(vextq_s8( \ + vdupq_n_s8(0), vreinterpretq_s8_m128i(a), 16 - (imm))); \ + } \ + ret; \ + }) + +// Shifts the 8 signed or unsigned 16-bit integers in a left by count bits while +// shifting in zeros. +// +// r0 := a0 << count +// r1 := a1 << count +// ... +// r7 := a7 << count +// +// https://msdn.microsoft.com/en-us/library/c79w388h(v%3dvs.90).aspx +FORCE_INLINE __m128i _mm_sll_epi16(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (unlikely(c > 15)) + return _mm_setzero_si128(); + + int16x8_t vc = vdupq_n_s16((int16_t) c); + return vreinterpretq_m128i_s16(vshlq_s16(vreinterpretq_s16_m128i(a), vc)); +} + +// Shifts the 4 signed or unsigned 32-bit integers in a left by count bits while +// shifting in zeros. +// +// r0 := a0 << count +// r1 := a1 << count +// r2 := a2 << count +// r3 := a3 << count +// +// https://msdn.microsoft.com/en-us/library/6fe5a6s9(v%3dvs.90).aspx +FORCE_INLINE __m128i _mm_sll_epi32(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (unlikely(c > 31)) + return _mm_setzero_si128(); + + int32x4_t vc = vdupq_n_s32((int32_t) c); + return vreinterpretq_m128i_s32(vshlq_s32(vreinterpretq_s32_m128i(a), vc)); +} + +// Shifts the 2 signed or unsigned 64-bit integers in a left by count bits while +// shifting in zeros. +// +// r0 := a0 << count +// r1 := a1 << count +// +// https://msdn.microsoft.com/en-us/library/6ta9dffd(v%3dvs.90).aspx +FORCE_INLINE __m128i _mm_sll_epi64(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (unlikely(c > 63)) + return _mm_setzero_si128(); + + int64x2_t vc = vdupq_n_s64((int64_t) c); + return vreinterpretq_m128i_s64(vshlq_s64(vreinterpretq_s64_m128i(a), vc)); +} + +// Shifts the 8 signed or unsigned 16-bit integers in a right by count bits +// while shifting in zeros. +// +// r0 := srl(a0, count) +// r1 := srl(a1, count) +// ... +// r7 := srl(a7, count) +// +// https://msdn.microsoft.com/en-us/library/wd5ax830(v%3dvs.90).aspx +FORCE_INLINE __m128i _mm_srl_epi16(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (unlikely(c > 15)) + return _mm_setzero_si128(); + + int16x8_t vc = vdupq_n_s16(-(int16_t) c); + return vreinterpretq_m128i_u16(vshlq_u16(vreinterpretq_u16_m128i(a), vc)); +} + +// Shifts the 4 signed or unsigned 32-bit integers in a right by count bits +// while shifting in zeros. +// +// r0 := srl(a0, count) +// r1 := srl(a1, count) +// r2 := srl(a2, count) +// r3 := srl(a3, count) +// +// https://msdn.microsoft.com/en-us/library/a9cbttf4(v%3dvs.90).aspx +FORCE_INLINE __m128i _mm_srl_epi32(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (unlikely(c > 31)) + return _mm_setzero_si128(); + + int32x4_t vc = vdupq_n_s32(-(int32_t) c); + return vreinterpretq_m128i_u32(vshlq_u32(vreinterpretq_u32_m128i(a), vc)); +} + +// Shifts the 2 signed or unsigned 64-bit integers in a right by count bits +// while shifting in zeros. +// +// r0 := srl(a0, count) +// r1 := srl(a1, count) +// +// https://msdn.microsoft.com/en-us/library/yf6cf9k8(v%3dvs.90).aspx +FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (unlikely(c > 63)) + return _mm_setzero_si128(); + + int64x2_t vc = vdupq_n_s64(-(int64_t) c); + return vreinterpretq_m128i_u64(vshlq_u64(vreinterpretq_u64_m128i(a), vc)); +} + +// NEON does not provide a version of this function. +// Creates a 16-bit mask from the most significant bits of the 16 signed or +// unsigned 8-bit integers in a and zero extends the upper bits. +// https://msdn.microsoft.com/en-us/library/vstudio/s090c8fk(v=vs.100).aspx +FORCE_INLINE int _mm_movemask_epi8(__m128i a) +{ + // Use increasingly wide shifts+adds to collect the sign bits + // together. + // Since the widening shifts would be rather confusing to follow in little + // endian, everything will be illustrated in big endian order instead. This + // has a different result - the bits would actually be reversed on a big + // endian machine. + + // Starting input (only half the elements are shown): + // 89 ff 1d c0 00 10 99 33 + uint8x16_t input = vreinterpretq_u8_m128i(a); + + // Shift out everything but the sign bits with an unsigned shift right. + // + // Bytes of the vector:: + // 89 ff 1d c0 00 10 99 33 + // \ \ \ \ \ \ \ \ high_bits = (uint16x4_t)(input >> 7) + // | | | | | | | | + // 01 01 00 01 00 00 01 00 + // + // Bits of first important lane(s): + // 10001001 (89) + // \______ + // | + // 00000001 (01) + uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(input, 7)); + + // Merge the even lanes together with a 16-bit unsigned shift right + add. + // 'xx' represents garbage data which will be ignored in the final result. + // In the important bytes, the add functions like a binary OR. + // + // 01 01 00 01 00 00 01 00 + // \_ | \_ | \_ | \_ | paired16 = (uint32x4_t)(input + (input >> 7)) + // \| \| \| \| + // xx 03 xx 01 xx 00 xx 02 + // + // 00000001 00000001 (01 01) + // \_______ | + // \| + // xxxxxxxx xxxxxx11 (xx 03) + uint32x4_t paired16 = + vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7)); + + // Repeat with a wider 32-bit shift + add. + // xx 03 xx 01 xx 00 xx 02 + // \____ | \____ | paired32 = (uint64x1_t)(paired16 + (paired16 >> + // 14)) + // \| \| + // xx xx xx 0d xx xx xx 02 + // + // 00000011 00000001 (03 01) + // \\_____ || + // '----.\|| + // xxxxxxxx xxxx1101 (xx 0d) + uint64x2_t paired32 = + vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14)); + + // Last, an even wider 64-bit shift + add to get our result in the low 8 bit + // lanes. xx xx xx 0d xx xx xx 02 + // \_________ | paired64 = (uint8x8_t)(paired32 + (paired32 >> + // 28)) + // \| + // xx xx xx xx xx xx xx d2 + // + // 00001101 00000010 (0d 02) + // \ \___ | | + // '---. \| | + // xxxxxxxx 11010010 (xx d2) + uint8x16_t paired64 = + vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28)); + + // Extract the low 8 bits from each 64-bit lane with 2 8-bit extracts. + // xx xx xx xx xx xx xx d2 + // || return paired64[0] + // d2 + // Note: Little endian would return the correct value 4b (01001011) instead. + return vgetq_lane_u8(paired64, 0) | ((int) vgetq_lane_u8(paired64, 8) << 8); +} + +// Copy the lower 64-bit integer in a to dst. +// +// dst[63:0] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi64_pi64 +FORCE_INLINE __m64 _mm_movepi64_pi64(__m128i a) +{ + return vreinterpret_m64_s64(vget_low_s64(vreinterpretq_s64_m128i(a))); +} + +// Copy the 64-bit integer a to the lower element of dst, and zero the upper +// element. +// +// dst[63:0] := a[63:0] +// dst[127:64] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movpi64_epi64 +FORCE_INLINE __m128i _mm_movpi64_epi64(__m64 a) +{ + return vreinterpretq_m128i_s64( + vcombine_s64(vreinterpret_s64_m64(a), vdup_n_s64(0))); +} + +// NEON does not provide this method +// Creates a 4-bit mask from the most significant bits of the four +// single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/vstudio/4490ys29(v=vs.100).aspx +FORCE_INLINE int _mm_movemask_ps(__m128 a) +{ + uint32x4_t input = vreinterpretq_u32_m128(a); +#if defined(__aarch64__) + static const int32x4_t shift = {0, 1, 2, 3}; + uint32x4_t tmp = vshrq_n_u32(input, 31); + return vaddvq_u32(vshlq_u32(tmp, shift)); +#else + // Uses the exact same method as _mm_movemask_epi8, see that for details. + // Shift out everything but the sign bits with a 32-bit unsigned shift + // right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(input, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = + vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); +#endif +} + +// Compute the bitwise NOT of a and then AND with a 128-bit vector containing +// all 1's, and return 1 if the result is zero, otherwise return 0. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones +FORCE_INLINE int _mm_test_all_ones(__m128i a) +{ + return (uint64_t)(vgetq_lane_s64(a, 0) & vgetq_lane_s64(a, 1)) == + ~(uint64_t) 0; +} + +// Compute the bitwise AND of 128 bits (representing integer data) in a and +// mask, and return 1 if the result is zero, otherwise return 0. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros +FORCE_INLINE int _mm_test_all_zeros(__m128i a, __m128i mask) +{ + int64x2_t a_and_mask = + vandq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(mask)); + return (vgetq_lane_s64(a_and_mask, 0) | vgetq_lane_s64(a_and_mask, 1)) ? 0 + : 1; +} + +/* Math operations */ + +// Subtracts the four single-precision, floating-point values of a and b. +// +// r0 := a0 - b0 +// r1 := a1 - b1 +// r2 := a2 - b2 +// r3 := a3 - b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/1zad2k61(v=vs.100).aspx +FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_f32( + vsubq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Subtract the lower single-precision (32-bit) floating-point element in b from +// the lower single-precision (32-bit) floating-point element in a, store the +// result in the lower element of dst, and copy the upper 3 packed elements from +// a to the upper elements of dst. +// +// dst[31:0] := a[31:0] - b[31:0] +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ss +FORCE_INLINE __m128 _mm_sub_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_sub_ps(a, b)); +} + +// Subtract 2 packed 64-bit integers in b from 2 packed 64-bit integers in a, +// and store the results in dst. +// r0 := a0 - b0 +// r1 := a1 - b1 +FORCE_INLINE __m128i _mm_sub_epi64(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s64( + vsubq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b))); +} + +// Subtracts the 4 signed or unsigned 32-bit integers of b from the 4 signed or +// unsigned 32-bit integers of a. +// +// r0 := a0 - b0 +// r1 := a1 - b1 +// r2 := a2 - b2 +// r3 := a3 - b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/fhh866h0(v=vs.100).aspx +FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vsubq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and +// store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16 +FORCE_INLINE __m128i _mm_sub_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and +// store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8 +FORCE_INLINE __m128i _mm_sub_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Subtract 64-bit integer b from 64-bit integer a, and store the result in dst. +// +// dst[63:0] := a[63:0] - b[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_si64 +FORCE_INLINE __m64 _mm_sub_si64(__m64 a, __m64 b) +{ + return vreinterpret_m64_s64( + vsub_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b))); +} + +// Subtracts the 8 unsigned 16-bit integers of bfrom the 8 unsigned 16-bit +// integers of a and saturates.. +// https://technet.microsoft.com/en-us/subscriptions/index/f44y0s19(v=vs.90).aspx +FORCE_INLINE __m128i _mm_subs_epu16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vqsubq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b))); +} + +// Subtracts the 16 unsigned 8-bit integers of b from the 16 unsigned 8-bit +// integers of a and saturates. +// +// r0 := UnsignedSaturate(a0 - b0) +// r1 := UnsignedSaturate(a1 - b1) +// ... +// r15 := UnsignedSaturate(a15 - b15) +// +// https://technet.microsoft.com/en-us/subscriptions/yadkxc18(v=vs.90) +FORCE_INLINE __m128i _mm_subs_epu8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vqsubq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); +} + +// Subtracts the 16 signed 8-bit integers of b from the 16 signed 8-bit integers +// of a and saturates. +// +// r0 := SignedSaturate(a0 - b0) +// r1 := SignedSaturate(a1 - b1) +// ... +// r15 := SignedSaturate(a15 - b15) +// +// https://technet.microsoft.com/en-us/subscriptions/by7kzks1(v=vs.90) +FORCE_INLINE __m128i _mm_subs_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vqsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Subtracts the 8 signed 16-bit integers of b from the 8 signed 16-bit integers +// of a and saturates. +// +// r0 := SignedSaturate(a0 - b0) +// r1 := SignedSaturate(a1 - b1) +// ... +// r7 := SignedSaturate(a7 - b7) +// +// https://technet.microsoft.com/en-us/subscriptions/3247z5b8(v=vs.90) +FORCE_INLINE __m128i _mm_subs_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vqsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Subtract packed double-precision (64-bit) floating-point elements in b from +// packed double-precision (64-bit) floating-point elements in a, and store the +// results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// dst[i+63:i] := a[i+63:i] - b[i+63:i] +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_pd +FORCE_INLINE __m128d _mm_sub_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vsubq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2]; + c[0] = da[0] - db[0]; + c[1] = da[1] - db[1]; + return vld1q_f32((float32_t *) c); +#endif +} + +// Subtract the lower double-precision (64-bit) floating-point element in b from +// the lower double-precision (64-bit) floating-point element in a, store the +// result in the lower element of dst, and copy the upper element from a to the +// upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd +FORCE_INLINE __m128d _mm_sub_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_sub_pd(a, b)); +} + +// Add packed unsigned 16-bit integers in a and b using saturation, and store +// the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16 +FORCE_INLINE __m128i _mm_adds_epu16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vqaddq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b))); +} + +// Negate packed 8-bit integers in a when the corresponding signed +// 8-bit integer in b is negative, and store the results in dst. +// Element in dst are zeroed out when the corresponding element +// in b is zero. +// +// for i in 0..15 +// if b[i] < 0 +// r[i] := -a[i] +// else if b[i] == 0 +// r[i] := 0 +// else +// r[i] := a[i] +// fi +// done +FORCE_INLINE __m128i _mm_sign_epi8(__m128i _a, __m128i _b) +{ + int8x16_t a = vreinterpretq_s8_m128i(_a); + int8x16_t b = vreinterpretq_s8_m128i(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFF : 0 + uint8x16_t ltMask = vreinterpretq_u8_s8(vshrq_n_s8(b, 7)); + + // (b == 0) ? 0xFF : 0 +#if defined(__aarch64__) + int8x16_t zeroMask = vreinterpretq_s8_u8(vceqzq_s8(b)); +#else + int8x16_t zeroMask = vreinterpretq_s8_u8(vceqq_s8(b, vdupq_n_s8(0))); +#endif + + // bitwise select either a or nagative 'a' (vnegq_s8(a) return nagative 'a') + // based on ltMask + int8x16_t masked = vbslq_s8(ltMask, vnegq_s8(a), a); + // res = masked & (~zeroMask) + int8x16_t res = vbicq_s8(masked, zeroMask); + + return vreinterpretq_m128i_s8(res); +} + +// Negate packed 16-bit integers in a when the corresponding signed +// 16-bit integer in b is negative, and store the results in dst. +// Element in dst are zeroed out when the corresponding element +// in b is zero. +// +// for i in 0..7 +// if b[i] < 0 +// r[i] := -a[i] +// else if b[i] == 0 +// r[i] := 0 +// else +// r[i] := a[i] +// fi +// done +FORCE_INLINE __m128i _mm_sign_epi16(__m128i _a, __m128i _b) +{ + int16x8_t a = vreinterpretq_s16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFFFF : 0 + uint16x8_t ltMask = vreinterpretq_u16_s16(vshrq_n_s16(b, 15)); + // (b == 0) ? 0xFFFF : 0 +#if defined(__aarch64__) + int16x8_t zeroMask = vreinterpretq_s16_u16(vceqzq_s16(b)); +#else + int16x8_t zeroMask = vreinterpretq_s16_u16(vceqq_s16(b, vdupq_n_s16(0))); +#endif + + // bitwise select either a or negative 'a' (vnegq_s16(a) equals to negative + // 'a') based on ltMask + int16x8_t masked = vbslq_s16(ltMask, vnegq_s16(a), a); + // res = masked & (~zeroMask) + int16x8_t res = vbicq_s16(masked, zeroMask); + return vreinterpretq_m128i_s16(res); +} + +// Negate packed 32-bit integers in a when the corresponding signed +// 32-bit integer in b is negative, and store the results in dst. +// Element in dst are zeroed out when the corresponding element +// in b is zero. +// +// for i in 0..3 +// if b[i] < 0 +// r[i] := -a[i] +// else if b[i] == 0 +// r[i] := 0 +// else +// r[i] := a[i] +// fi +// done +FORCE_INLINE __m128i _mm_sign_epi32(__m128i _a, __m128i _b) +{ + int32x4_t a = vreinterpretq_s32_m128i(_a); + int32x4_t b = vreinterpretq_s32_m128i(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFFFFFFFF : 0 + uint32x4_t ltMask = vreinterpretq_u32_s32(vshrq_n_s32(b, 31)); + + // (b == 0) ? 0xFFFFFFFF : 0 +#if defined(__aarch64__) + int32x4_t zeroMask = vreinterpretq_s32_u32(vceqzq_s32(b)); +#else + int32x4_t zeroMask = vreinterpretq_s32_u32(vceqq_s32(b, vdupq_n_s32(0))); +#endif + + // bitwise select either a or negative 'a' (vnegq_s32(a) equals to negative + // 'a') based on ltMask + int32x4_t masked = vbslq_s32(ltMask, vnegq_s32(a), a); + // res = masked & (~zeroMask) + int32x4_t res = vbicq_s32(masked, zeroMask); + return vreinterpretq_m128i_s32(res); +} + +// Negate packed 16-bit integers in a when the corresponding signed 16-bit +// integer in b is negative, and store the results in dst. Element in dst are +// zeroed out when the corresponding element in b is zero. +// +// FOR j := 0 to 3 +// i := j*16 +// IF b[i+15:i] < 0 +// dst[i+15:i] := -(a[i+15:i]) +// ELSE IF b[i+15:i] == 0 +// dst[i+15:i] := 0 +// ELSE +// dst[i+15:i] := a[i+15:i] +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi16 +FORCE_INLINE __m64 _mm_sign_pi16(__m64 _a, __m64 _b) +{ + int16x4_t a = vreinterpret_s16_m64(_a); + int16x4_t b = vreinterpret_s16_m64(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFFFF : 0 + uint16x4_t ltMask = vreinterpret_u16_s16(vshr_n_s16(b, 15)); + + // (b == 0) ? 0xFFFF : 0 +#if defined(__aarch64__) + int16x4_t zeroMask = vreinterpret_s16_u16(vceqz_s16(b)); +#else + int16x4_t zeroMask = vreinterpret_s16_u16(vceq_s16(b, vdup_n_s16(0))); +#endif + + // bitwise select either a or nagative 'a' (vneg_s16(a) return nagative 'a') + // based on ltMask + int16x4_t masked = vbsl_s16(ltMask, vneg_s16(a), a); + // res = masked & (~zeroMask) + int16x4_t res = vbic_s16(masked, zeroMask); + + return vreinterpret_m64_s16(res); +} + +// Negate packed 32-bit integers in a when the corresponding signed 32-bit +// integer in b is negative, and store the results in dst. Element in dst are +// zeroed out when the corresponding element in b is zero. +// +// FOR j := 0 to 1 +// i := j*32 +// IF b[i+31:i] < 0 +// dst[i+31:i] := -(a[i+31:i]) +// ELSE IF b[i+31:i] == 0 +// dst[i+31:i] := 0 +// ELSE +// dst[i+31:i] := a[i+31:i] +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi32 +FORCE_INLINE __m64 _mm_sign_pi32(__m64 _a, __m64 _b) +{ + int32x2_t a = vreinterpret_s32_m64(_a); + int32x2_t b = vreinterpret_s32_m64(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFFFFFFFF : 0 + uint32x2_t ltMask = vreinterpret_u32_s32(vshr_n_s32(b, 31)); + + // (b == 0) ? 0xFFFFFFFF : 0 +#if defined(__aarch64__) + int32x2_t zeroMask = vreinterpret_s32_u32(vceqz_s32(b)); +#else + int32x2_t zeroMask = vreinterpret_s32_u32(vceq_s32(b, vdup_n_s32(0))); +#endif + + // bitwise select either a or nagative 'a' (vneg_s32(a) return nagative 'a') + // based on ltMask + int32x2_t masked = vbsl_s32(ltMask, vneg_s32(a), a); + // res = masked & (~zeroMask) + int32x2_t res = vbic_s32(masked, zeroMask); + + return vreinterpret_m64_s32(res); +} + +// Negate packed 8-bit integers in a when the corresponding signed 8-bit integer +// in b is negative, and store the results in dst. Element in dst are zeroed out +// when the corresponding element in b is zero. +// +// FOR j := 0 to 7 +// i := j*8 +// IF b[i+7:i] < 0 +// dst[i+7:i] := -(a[i+7:i]) +// ELSE IF b[i+7:i] == 0 +// dst[i+7:i] := 0 +// ELSE +// dst[i+7:i] := a[i+7:i] +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi8 +FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b) +{ + int8x8_t a = vreinterpret_s8_m64(_a); + int8x8_t b = vreinterpret_s8_m64(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFF : 0 + uint8x8_t ltMask = vreinterpret_u8_s8(vshr_n_s8(b, 7)); + + // (b == 0) ? 0xFF : 0 +#if defined(__aarch64__) + int8x8_t zeroMask = vreinterpret_s8_u8(vceqz_s8(b)); +#else + int8x8_t zeroMask = vreinterpret_s8_u8(vceq_s8(b, vdup_n_s8(0))); +#endif + + // bitwise select either a or nagative 'a' (vneg_s8(a) return nagative 'a') + // based on ltMask + int8x8_t masked = vbsl_s8(ltMask, vneg_s8(a), a); + // res = masked & (~zeroMask) + int8x8_t res = vbic_s8(masked, zeroMask); + + return vreinterpret_m64_s8(res); +} + +// Average packed unsigned 16-bit integers in a and b, and store the results in +// dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu16 +FORCE_INLINE __m64 _mm_avg_pu16(__m64 a, __m64 b) +{ + return vreinterpret_m64_u16( + vrhadd_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b))); +} + +// Average packed unsigned 8-bit integers in a and b, and store the results in +// dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu8 +FORCE_INLINE __m64 _mm_avg_pu8(__m64 a, __m64 b) +{ + return vreinterpret_m64_u8( + vrhadd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))); +} + +// Average packed unsigned 8-bit integers in a and b, and store the results in +// dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgb +#define _m_pavgb(a, b) _mm_avg_pu8(a, b) + +// Average packed unsigned 16-bit integers in a and b, and store the results in +// dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgw +#define _m_pavgw(a, b) _mm_avg_pu16(a, b) + +// Extract a 16-bit integer from a, selected with imm8, and store the result in +// the lower element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pextrw +#define _m_pextrw(a, imm) _mm_extract_pi16(a, imm) + +// Copy a to dst, and insert the 16-bit integer i into dst at the location +// specified by imm8. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=m_pinsrw +#define _m_pinsrw(a, i, imm) _mm_insert_pi16(a, i, imm) + +// Compare packed signed 16-bit integers in a and b, and store packed maximum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmaxsw +#define _m_pmaxsw(a, b) _mm_max_pi16(a, b) + +// Compare packed unsigned 8-bit integers in a and b, and store packed maximum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmaxub +#define _m_pmaxub(a, b) _mm_max_pu8(a, b) + +// Compare packed signed 16-bit integers in a and b, and store packed minimum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pminsw +#define _m_pminsw(a, b) _mm_min_pi16(a, b) + +// Compare packed unsigned 8-bit integers in a and b, and store packed minimum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pminub +#define _m_pminub(a, b) _mm_min_pu8(a, b) + +// Computes the average of the 16 unsigned 8-bit integers in a and the 16 +// unsigned 8-bit integers in b and rounds. +// +// r0 := (a0 + b0) / 2 +// r1 := (a1 + b1) / 2 +// ... +// r15 := (a15 + b15) / 2 +// +// https://msdn.microsoft.com/en-us/library/vstudio/8zwh554a(v%3dvs.90).aspx +FORCE_INLINE __m128i _mm_avg_epu8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vrhaddq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); +} + +// Computes the average of the 8 unsigned 16-bit integers in a and the 8 +// unsigned 16-bit integers in b and rounds. +// +// r0 := (a0 + b0) / 2 +// r1 := (a1 + b1) / 2 +// ... +// r7 := (a7 + b7) / 2 +// +// https://msdn.microsoft.com/en-us/library/vstudio/y13ca3c8(v=vs.90).aspx +FORCE_INLINE __m128i _mm_avg_epu16(__m128i a, __m128i b) +{ + return (__m128i) vrhaddq_u16(vreinterpretq_u16_m128i(a), + vreinterpretq_u16_m128i(b)); +} + +// Adds the four single-precision, floating-point values of a and b. +// +// r0 := a0 + b0 +// r1 := a1 + b1 +// r2 := a2 + b2 +// r3 := a3 + b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/c9848chc(v=vs.100).aspx +FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_f32( + vaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Add packed double-precision (64-bit) floating-point elements in a and b, and +// store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd +FORCE_INLINE __m128d _mm_add_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2]; + c[0] = da[0] + db[0]; + c[1] = da[1] + db[1]; + return vld1q_f32((float32_t *) c); +#endif +} + +// Add the lower double-precision (64-bit) floating-point element in a and b, +// store the result in the lower element of dst, and copy the upper element from +// a to the upper element of dst. +// +// dst[63:0] := a[63:0] + b[63:0] +// dst[127:64] := a[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd +FORCE_INLINE __m128d _mm_add_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_add_pd(a, b)); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2]; + c[0] = da[0] + db[0]; + c[1] = da[1]; + return vld1q_f32((float32_t *) c); +#endif +} + +// Add 64-bit integers a and b, and store the result in dst. +// +// dst[63:0] := a[63:0] + b[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_si64 +FORCE_INLINE __m64 _mm_add_si64(__m64 a, __m64 b) +{ + return vreinterpret_m64_s64( + vadd_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b))); +} + +// adds the scalar single-precision floating point values of a and b. +// https://msdn.microsoft.com/en-us/library/be94x2y6(v=vs.100).aspx +FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b) +{ + float32_t b0 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of <a>. + return vreinterpretq_m128_f32(vaddq_f32(a, value)); +} + +// Adds the 4 signed or unsigned 64-bit integers in a to the 4 signed or +// unsigned 32-bit integers in b. +// https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx +FORCE_INLINE __m128i _mm_add_epi64(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s64( + vaddq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b))); +} + +// Adds the 4 signed or unsigned 32-bit integers in a to the 4 signed or +// unsigned 32-bit integers in b. +// +// r0 := a0 + b0 +// r1 := a1 + b1 +// r2 := a2 + b2 +// r3 := a3 + b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx +FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vaddq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Adds the 8 signed or unsigned 16-bit integers in a to the 8 signed or +// unsigned 16-bit integers in b. +// https://msdn.microsoft.com/en-us/library/fceha5k4(v=vs.100).aspx +FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Adds the 16 signed or unsigned 8-bit integers in a to the 16 signed or +// unsigned 8-bit integers in b. +// https://technet.microsoft.com/en-us/subscriptions/yc7tcyzs(v=vs.90) +FORCE_INLINE __m128i _mm_add_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vaddq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Adds the 8 signed 16-bit integers in a to the 8 signed 16-bit integers in b +// and saturates. +// +// r0 := SignedSaturate(a0 + b0) +// r1 := SignedSaturate(a1 + b1) +// ... +// r7 := SignedSaturate(a7 + b7) +// +// https://msdn.microsoft.com/en-us/library/1a306ef8(v=vs.100).aspx +FORCE_INLINE __m128i _mm_adds_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vqaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Add packed signed 8-bit integers in a and b using saturation, and store the +// results in dst. +// +// FOR j := 0 to 15 +// i := j*8 +// dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8 +FORCE_INLINE __m128i _mm_adds_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vqaddq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Adds the 16 unsigned 8-bit integers in a to the 16 unsigned 8-bit integers in +// b and saturates.. +// https://msdn.microsoft.com/en-us/library/9hahyddy(v=vs.100).aspx +FORCE_INLINE __m128i _mm_adds_epu8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vqaddq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); +} + +// Multiplies the 8 signed or unsigned 16-bit integers from a by the 8 signed or +// unsigned 16-bit integers from b. +// +// r0 := (a0 * b0)[15:0] +// r1 := (a1 * b1)[15:0] +// ... +// r7 := (a7 * b7)[15:0] +// +// https://msdn.microsoft.com/en-us/library/vstudio/9ks1472s(v=vs.100).aspx +FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vmulq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or +// unsigned 32-bit integers from b. +// https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx +FORCE_INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vmulq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Multiply the packed unsigned 16-bit integers in a and b, producing +// intermediate 32-bit integers, and store the high 16 bits of the intermediate +// integers in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// tmp[31:0] := a[i+15:i] * b[i+15:i] +// dst[i+15:i] := tmp[31:16] +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmulhuw +#define _m_pmulhuw(a, b) _mm_mulhi_pu16(a, b) + +// Multiplies the four single-precision, floating-point values of a and b. +// +// r0 := a0 * b0 +// r1 := a1 * b1 +// r2 := a2 * b2 +// r3 := a3 * b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx +FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_f32( + vmulq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Multiply packed double-precision (64-bit) floating-point elements in a and b, +// and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd +FORCE_INLINE __m128d _mm_mul_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vmulq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2]; + c[0] = da[0] * db[0]; + c[1] = da[1] * db[1]; + return vld1q_f32((float32_t *) c); +#endif +} + +// Multiply the lower double-precision (64-bit) floating-point element in a and +// b, store the result in the lower element of dst, and copy the upper element +// from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_sd +FORCE_INLINE __m128d _mm_mul_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_mul_pd(a, b)); +} + +// Multiply the lower single-precision (32-bit) floating-point element in a and +// b, store the result in the lower element of dst, and copy the upper 3 packed +// elements from a to the upper elements of dst. +// +// dst[31:0] := a[31:0] * b[31:0] +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ss +FORCE_INLINE __m128 _mm_mul_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_mul_ps(a, b)); +} + +// Multiply the low unsigned 32-bit integers from each packed 64-bit element in +// a and b, and store the unsigned 64-bit results in dst. +// +// r0 := (a0 & 0xFFFFFFFF) * (b0 & 0xFFFFFFFF) +// r1 := (a2 & 0xFFFFFFFF) * (b2 & 0xFFFFFFFF) +FORCE_INLINE __m128i _mm_mul_epu32(__m128i a, __m128i b) +{ + // vmull_u32 upcasts instead of masking, so we downcast. + uint32x2_t a_lo = vmovn_u64(vreinterpretq_u64_m128i(a)); + uint32x2_t b_lo = vmovn_u64(vreinterpretq_u64_m128i(b)); + return vreinterpretq_m128i_u64(vmull_u32(a_lo, b_lo)); +} + +// Multiply the low unsigned 32-bit integers from a and b, and store the +// unsigned 64-bit result in dst. +// +// dst[63:0] := a[31:0] * b[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_su32 +FORCE_INLINE __m64 _mm_mul_su32(__m64 a, __m64 b) +{ + return vreinterpret_m64_u64(vget_low_u64( + vmull_u32(vreinterpret_u32_m64(a), vreinterpret_u32_m64(b)))); +} + +// Multiply the low signed 32-bit integers from each packed 64-bit element in +// a and b, and store the signed 64-bit results in dst. +// +// r0 := (int64_t)(int32_t)a0 * (int64_t)(int32_t)b0 +// r1 := (int64_t)(int32_t)a2 * (int64_t)(int32_t)b2 +FORCE_INLINE __m128i _mm_mul_epi32(__m128i a, __m128i b) +{ + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(vreinterpretq_s64_m128i(a)); + int32x2_t b_lo = vmovn_s64(vreinterpretq_s64_m128i(b)); + return vreinterpretq_m128i_s64(vmull_s32(a_lo, b_lo)); +} + +// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit +// integers from b. +// +// r0 := (a0 * b0) + (a1 * b1) +// r1 := (a2 * b2) + (a3 * b3) +// r2 := (a4 * b4) + (a5 * b5) +// r3 := (a6 * b6) + (a7 * b7) +// https://msdn.microsoft.com/en-us/library/yht36sa6(v=vs.90).aspx +FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b) +{ + int32x4_t low = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)), + vget_low_s16(vreinterpretq_s16_m128i(b))); + int32x4_t high = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)), + vget_high_s16(vreinterpretq_s16_m128i(b))); + + int32x2_t low_sum = vpadd_s32(vget_low_s32(low), vget_high_s32(low)); + int32x2_t high_sum = vpadd_s32(vget_low_s32(high), vget_high_s32(high)); + + return vreinterpretq_m128i_s32(vcombine_s32(low_sum, high_sum)); +} + +// Multiply packed signed 16-bit integers in a and b, producing intermediate +// signed 32-bit integers. Shift right by 15 bits while rounding up, and store +// the packed 16-bit integers in dst. +// +// r0 := Round(((int32_t)a0 * (int32_t)b0) >> 15) +// r1 := Round(((int32_t)a1 * (int32_t)b1) >> 15) +// r2 := Round(((int32_t)a2 * (int32_t)b2) >> 15) +// ... +// r7 := Round(((int32_t)a7 * (int32_t)b7) >> 15) +FORCE_INLINE __m128i _mm_mulhrs_epi16(__m128i a, __m128i b) +{ + // Has issues due to saturation + // return vreinterpretq_m128i_s16(vqrdmulhq_s16(a, b)); + + // Multiply + int32x4_t mul_lo = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)), + vget_low_s16(vreinterpretq_s16_m128i(b))); + int32x4_t mul_hi = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)), + vget_high_s16(vreinterpretq_s16_m128i(b))); + + // Rounding narrowing shift right + // narrow = (int16_t)((mul + 16384) >> 15); + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + // Join together + return vreinterpretq_m128i_s16(vcombine_s16(narrow_lo, narrow_hi)); +} + +// Vertically multiply each unsigned 8-bit integer from a with the corresponding +// signed 8-bit integer from b, producing intermediate signed 16-bit integers. +// Horizontally add adjacent pairs of intermediate signed 16-bit integers, +// and pack the saturated results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + +// a[i+7:i]*b[i+7:i] ) +// ENDFOR +FORCE_INLINE __m128i _mm_maddubs_epi16(__m128i _a, __m128i _b) +{ +#if defined(__aarch64__) + uint8x16_t a = vreinterpretq_u8_m128i(_a); + int8x16_t b = vreinterpretq_s8_m128i(_b); + int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a))), + vmovl_s8(vget_low_s8(b))); + int16x8_t th = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a))), + vmovl_s8(vget_high_s8(b))); + return vreinterpretq_m128i_s16( + vqaddq_s16(vuzp1q_s16(tl, th), vuzp2q_s16(tl, th))); +#else + // This would be much simpler if x86 would choose to zero extend OR sign + // extend, not both. This could probably be optimized better. + uint16x8_t a = vreinterpretq_u16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); + + // Zero extend a + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a, vdupq_n_u16(0xff00))); + + // Sign extend by shifting left then shifting right. + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b, 8); + + // multiply + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + // saturated add + return vreinterpretq_m128i_s16(vqaddq_s16(prod1, prod2)); +#endif +} + +// Computes the fused multiple add product of 32-bit floating point numbers. +// +// Return Value +// Multiplies A and B, and adds C to the temporary result before returning it. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd +FORCE_INLINE __m128 _mm_fmadd_ps(__m128 a, __m128 b, __m128 c) +{ +#if defined(__aarch64__) + return vreinterpretq_m128_f32(vfmaq_f32(vreinterpretq_f32_m128(c), + vreinterpretq_f32_m128(b), + vreinterpretq_f32_m128(a))); +#else + return _mm_add_ps(_mm_mul_ps(a, b), c); +#endif +} + +// Alternatively add and subtract packed single-precision (32-bit) +// floating-point elements in a to/from packed elements in b, and store the +// results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=addsub_ps +FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b) +{ + __m128 mask = {-1.0f, 1.0f, -1.0f, 1.0f}; + return _mm_fmadd_ps(b, mask, a); +} + +// Horizontally add adjacent pairs of double-precision (64-bit) floating-point +// elements in a and b, and pack the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd +FORCE_INLINE __m128d _mm_hadd_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vpaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[] = {da[0] + da[1], db[0] + db[1]}; + return vreinterpretq_m128d_u64(vld1q_u64((uint64_t *) c)); +#endif +} + +// Compute the absolute differences of packed unsigned 8-bit integers in a and +// b, then horizontally sum each consecutive 8 differences to produce two +// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low +// 16 bits of 64-bit elements in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8 +FORCE_INLINE __m128i _mm_sad_epu8(__m128i a, __m128i b) +{ + uint16x8_t t = vpaddlq_u8(vabdq_u8((uint8x16_t) a, (uint8x16_t) b)); + uint16_t r0 = t[0] + t[1] + t[2] + t[3]; + uint16_t r4 = t[4] + t[5] + t[6] + t[7]; + uint16x8_t r = vsetq_lane_u16(r0, vdupq_n_u16(0), 0); + return (__m128i) vsetq_lane_u16(r4, r, 4); +} + +// Compute the absolute differences of packed unsigned 8-bit integers in a and +// b, then horizontally sum each consecutive 8 differences to produce four +// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low +// 16 bits of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_pu8 +FORCE_INLINE __m64 _mm_sad_pu8(__m64 a, __m64 b) +{ + uint16x4_t t = + vpaddl_u8(vabd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))); + uint16_t r0 = t[0] + t[1] + t[2] + t[3]; + return vreinterpret_m64_u16(vset_lane_u16(r0, vdup_n_u16(0), 0)); +} + +// Compute the absolute differences of packed unsigned 8-bit integers in a and +// b, then horizontally sum each consecutive 8 differences to produce four +// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low +// 16 bits of dst. +// +// FOR j := 0 to 7 +// i := j*8 +// tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i]) +// ENDFOR +// dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + +// tmp[47:40] + tmp[55:48] + tmp[63:56] dst[63:16] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_psadbw +#define _m_psadbw(a, b) _mm_sad_pu8(a, b) + +// Divides the four single-precision, floating-point values of a and b. +// +// r0 := a0 / b0 +// r1 := a1 / b1 +// r2 := a2 / b2 +// r3 := a3 / b3 +// +// https://msdn.microsoft.com/en-us/library/edaw8147(v=vs.100).aspx +FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b) +{ +#if defined(__aarch64__) && !SSE2NEON_PRECISE_DIV + return vreinterpretq_m128_f32( + vdivq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#else + float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(b)); + recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(b))); +#if SSE2NEON_PRECISE_DIV + // Additional Netwon-Raphson iteration for accuracy + recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(b))); +#endif + return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), recip)); +#endif +} + +// Divides the scalar single-precision floating point value of a by b. +// https://msdn.microsoft.com/en-us/library/4y73xa49(v=vs.100).aspx +FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b) +{ + float32_t value = + vgetq_lane_f32(vreinterpretq_f32_m128(_mm_div_ps(a, b)), 0); + return vreinterpretq_m128_f32( + vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); +} + +// Divide packed double-precision (64-bit) floating-point elements in a by +// packed elements in b, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 64*j +// dst[i+63:i] := a[i+63:i] / b[i+63:i] +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd +FORCE_INLINE __m128d _mm_div_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vdivq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2]; + c[0] = da[0] / db[0]; + c[1] = da[1] / db[1]; + return vld1q_f32((float32_t *) c); +#endif +} + +// Divide the lower double-precision (64-bit) floating-point element in a by the +// lower double-precision (64-bit) floating-point element in b, store the result +// in the lower element of dst, and copy the upper element from a to the upper +// element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd +FORCE_INLINE __m128d _mm_div_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + float64x2_t tmp = + vdivq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)); + return vreinterpretq_m128d_f64( + vsetq_lane_f64(vgetq_lane_f64(vreinterpretq_f64_m128d(a), 1), tmp, 1)); +#else + return _mm_move_sd(a, _mm_div_pd(a, b)); +#endif +} + +// Compute the approximate reciprocal of packed single-precision (32-bit) +// floating-point elements in a, and store the results in dst. The maximum +// relative error for this approximation is less than 1.5*2^-12. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ps +FORCE_INLINE __m128 _mm_rcp_ps(__m128 in) +{ + float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in)); + recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in))); +#if SSE2NEON_PRECISE_DIV + // Additional Netwon-Raphson iteration for accuracy + recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in))); +#endif + return vreinterpretq_m128_f32(recip); +} + +// Compute the approximate reciprocal of the lower single-precision (32-bit) +// floating-point element in a, store the result in the lower element of dst, +// and copy the upper 3 packed elements from a to the upper elements of dst. The +// maximum relative error for this approximation is less than 1.5*2^-12. +// +// dst[31:0] := (1.0 / a[31:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ss +FORCE_INLINE __m128 _mm_rcp_ss(__m128 a) +{ + return _mm_move_ss(a, _mm_rcp_ps(a)); +} + +// Computes the approximations of square roots of the four single-precision, +// floating-point values of a. First computes reciprocal square roots and then +// reciprocals of the four values. +// +// r0 := sqrt(a0) +// r1 := sqrt(a1) +// r2 := sqrt(a2) +// r3 := sqrt(a3) +// +// https://msdn.microsoft.com/en-us/library/vstudio/8z67bwwk(v=vs.100).aspx +FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in) +{ +#if SSE2NEON_PRECISE_SQRT + float32x4_t recip = vrsqrteq_f32(vreinterpretq_f32_m128(in)); + + // Test for vrsqrteq_f32(0) -> positive infinity case. + // Change to zero, so that s * 1/sqrt(s) result is zero too. + const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000); + const uint32x4_t div_by_zero = + vceqq_u32(pos_inf, vreinterpretq_u32_f32(recip)); + recip = vreinterpretq_f32_u32( + vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(recip))); + + // Additional Netwon-Raphson iteration for accuracy + recip = vmulq_f32( + vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)), + recip); + recip = vmulq_f32( + vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)), + recip); + + // sqrt(s) = s * 1/sqrt(s) + return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(in), recip)); +#elif defined(__aarch64__) + return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in))); +#else + float32x4_t recipsq = vrsqrteq_f32(vreinterpretq_f32_m128(in)); + float32x4_t sq = vrecpeq_f32(recipsq); + return vreinterpretq_m128_f32(sq); +#endif +} + +// Computes the approximation of the square root of the scalar single-precision +// floating point value of in. +// https://msdn.microsoft.com/en-us/library/ahfsc22d(v=vs.100).aspx +FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in) +{ + float32_t value = + vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0); + return vreinterpretq_m128_f32( + vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0)); +} + +// Computes the approximations of the reciprocal square roots of the four +// single-precision floating point values of in. +// https://msdn.microsoft.com/en-us/library/22hfsh53(v=vs.100).aspx +FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in) +{ + float32x4_t out = vrsqrteq_f32(vreinterpretq_f32_m128(in)); +#if SSE2NEON_PRECISE_RSQRT + // Additional Netwon-Raphson iteration for accuracy + out = vmulq_f32( + out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out)); + out = vmulq_f32( + out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out)); +#endif + return vreinterpretq_m128_f32(out); +} + +// Compute the approximate reciprocal square root of the lower single-precision +// (32-bit) floating-point element in a, store the result in the lower element +// of dst, and copy the upper 3 packed elements from a to the upper elements of +// dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ss +FORCE_INLINE __m128 _mm_rsqrt_ss(__m128 in) +{ + return vsetq_lane_f32(vgetq_lane_f32(_mm_rsqrt_ps(in), 0), in, 0); +} + +// Compare packed signed 16-bit integers in a and b, and store packed maximum +// values in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pi16 +FORCE_INLINE __m64 _mm_max_pi16(__m64 a, __m64 b) +{ + return vreinterpret_m64_s16( + vmax_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b))); +} + +// Compare packed signed 16-bit integers in a and b, and store packed maximum +// values in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pi16 +#define _m_pmaxsw(a, b) _mm_max_pi16(a, b) + +// Computes the maximums of the four single-precision, floating-point values of +// a and b. +// https://msdn.microsoft.com/en-us/library/vstudio/ff5d607a(v=vs.100).aspx +FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b) +{ +#if SSE2NEON_PRECISE_MINMAX + float32x4_t _a = vreinterpretq_f32_m128(a); + float32x4_t _b = vreinterpretq_f32_m128(b); + return vbslq_f32(vcltq_f32(_b, _a), _a, _b); +#else + return vreinterpretq_m128_f32( + vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#endif +} + +// Compare packed unsigned 8-bit integers in a and b, and store packed maximum +// values in dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pu8 +FORCE_INLINE __m64 _mm_max_pu8(__m64 a, __m64 b) +{ + return vreinterpret_m64_u8( + vmax_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))); +} + +// Compare packed unsigned 8-bit integers in a and b, and store packed maximum +// values in dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pu8 +#define _m_pmaxub(a, b) _mm_max_pu8(a, b) + +// Compare packed signed 16-bit integers in a and b, and store packed minimum +// values in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pi16 +FORCE_INLINE __m64 _mm_min_pi16(__m64 a, __m64 b) +{ + return vreinterpret_m64_s16( + vmin_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b))); +} + +// Compare packed signed 16-bit integers in a and b, and store packed minimum +// values in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pi16 +#define _m_pminsw(a, b) _mm_min_pi16(a, b) + +// Computes the minima of the four single-precision, floating-point values of a +// and b. +// https://msdn.microsoft.com/en-us/library/vstudio/wh13kadz(v=vs.100).aspx +FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b) +{ +#if SSE2NEON_PRECISE_MINMAX + float32x4_t _a = vreinterpretq_f32_m128(a); + float32x4_t _b = vreinterpretq_f32_m128(b); + return vbslq_f32(vcltq_f32(_a, _b), _a, _b); +#else + return vreinterpretq_m128_f32( + vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#endif +} + +// Compare packed unsigned 8-bit integers in a and b, and store packed minimum +// values in dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pu8 +FORCE_INLINE __m64 _mm_min_pu8(__m64 a, __m64 b) +{ + return vreinterpret_m64_u8( + vmin_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))); +} + +// Compare packed unsigned 8-bit integers in a and b, and store packed minimum +// values in dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pu8 +#define _m_pminub(a, b) _mm_min_pu8(a, b) + +// Computes the maximum of the two lower scalar single-precision floating point +// values of a and b. +// https://msdn.microsoft.com/en-us/library/s6db5esz(v=vs.100).aspx +FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b) +{ + float32_t value = vgetq_lane_f32(_mm_max_ps(a, b), 0); + return vreinterpretq_m128_f32( + vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); +} + +// Computes the minimum of the two lower scalar single-precision floating point +// values of a and b. +// https://msdn.microsoft.com/en-us/library/0a9y7xaa(v=vs.100).aspx +FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b) +{ + float32_t value = vgetq_lane_f32(_mm_min_ps(a, b), 0); + return vreinterpretq_m128_f32( + vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); +} + +// Computes the pairwise maxima of the 16 unsigned 8-bit integers from a and the +// 16 unsigned 8-bit integers from b. +// https://msdn.microsoft.com/en-us/library/st6634za(v=vs.100).aspx +FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vmaxq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); +} + +// Computes the pairwise minima of the 16 unsigned 8-bit integers from a and the +// 16 unsigned 8-bit integers from b. +// https://msdn.microsoft.com/ko-kr/library/17k8cf58(v=vs.100).aspxx +FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vminq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); +} + +// Computes the pairwise minima of the 8 signed 16-bit integers from a and the 8 +// signed 16-bit integers from b. +// https://msdn.microsoft.com/en-us/library/vstudio/6te997ew(v=vs.100).aspx +FORCE_INLINE __m128i _mm_min_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vminq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Compare packed signed 8-bit integers in a and b, and store packed maximum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8 +FORCE_INLINE __m128i _mm_max_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vmaxq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Compare packed unsigned 16-bit integers in a and b, and store packed maximum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16 +FORCE_INLINE __m128i _mm_max_epu16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vmaxq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b))); +} + +// Compare packed signed 8-bit integers in a and b, and store packed minimum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8 +FORCE_INLINE __m128i _mm_min_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vminq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Compare packed unsigned 16-bit integers in a and b, and store packed minimum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16 +FORCE_INLINE __m128i _mm_min_epu16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vminq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b))); +} + +// Computes the pairwise maxima of the 8 signed 16-bit integers from a and the 8 +// signed 16-bit integers from b. +// https://msdn.microsoft.com/en-us/LIBRary/3x060h7c(v=vs.100).aspx +FORCE_INLINE __m128i _mm_max_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vmaxq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// epi versions of min/max +// Computes the pariwise maximums of the four signed 32-bit integer values of a +// and b. +// +// A 128-bit parameter that can be defined with the following equations: +// r0 := (a0 > b0) ? a0 : b0 +// r1 := (a1 > b1) ? a1 : b1 +// r2 := (a2 > b2) ? a2 : b2 +// r3 := (a3 > b3) ? a3 : b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/bb514055(v=vs.100).aspx +FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vmaxq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Computes the pariwise minima of the four signed 32-bit integer values of a +// and b. +// +// A 128-bit parameter that can be defined with the following equations: +// r0 := (a0 < b0) ? a0 : b0 +// r1 := (a1 < b1) ? a1 : b1 +// r2 := (a2 < b2) ? a2 : b2 +// r3 := (a3 < b3) ? a3 : b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/bb531476(v=vs.100).aspx +FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vminq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Compare packed unsigned 32-bit integers in a and b, and store packed maximum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32 +FORCE_INLINE __m128i _mm_max_epu32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u32( + vmaxq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b))); +} + +// Compare packed unsigned 32-bit integers in a and b, and store packed minimum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32 +FORCE_INLINE __m128i _mm_min_epu32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u32( + vminq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b))); +} + +// Multiply the packed unsigned 16-bit integers in a and b, producing +// intermediate 32-bit integers, and store the high 16 bits of the intermediate +// integers in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_pu16 +FORCE_INLINE __m64 _mm_mulhi_pu16(__m64 a, __m64 b) +{ + return vreinterpret_m64_u16(vshrn_n_u32( + vmull_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b)), 16)); +} + +// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit +// integers from b. +// +// r0 := (a0 * b0)[31:16] +// r1 := (a1 * b1)[31:16] +// ... +// r7 := (a7 * b7)[31:16] +// +// https://msdn.microsoft.com/en-us/library/vstudio/59hddw1d(v=vs.100).aspx +FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b) +{ + /* FIXME: issue with large values because of result saturation */ + // int16x8_t ret = vqdmulhq_s16(vreinterpretq_s16_m128i(a), + // vreinterpretq_s16_m128i(b)); /* =2*a*b */ return + // vreinterpretq_m128i_s16(vshrq_n_s16(ret, 1)); + int16x4_t a3210 = vget_low_s16(vreinterpretq_s16_m128i(a)); + int16x4_t b3210 = vget_low_s16(vreinterpretq_s16_m128i(b)); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + int16x4_t a7654 = vget_high_s16(vreinterpretq_s16_m128i(a)); + int16x4_t b7654 = vget_high_s16(vreinterpretq_s16_m128i(b)); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t r = + vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + return vreinterpretq_m128i_u16(r.val[1]); +} + +// Multiply the packed unsigned 16-bit integers in a and b, producing +// intermediate 32-bit integers, and store the high 16 bits of the intermediate +// integers in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16 +FORCE_INLINE __m128i _mm_mulhi_epu16(__m128i a, __m128i b) +{ + uint16x4_t a3210 = vget_low_u16(vreinterpretq_u16_m128i(a)); + uint16x4_t b3210 = vget_low_u16(vreinterpretq_u16_m128i(b)); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); +#if defined(__aarch64__) + uint32x4_t ab7654 = + vmull_high_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)); + uint16x8_t r = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), + vreinterpretq_u16_u32(ab7654)); + return vreinterpretq_m128i_u16(r); +#else + uint16x4_t a7654 = vget_high_u16(vreinterpretq_u16_m128i(a)); + uint16x4_t b7654 = vget_high_u16(vreinterpretq_u16_m128i(b)); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); + uint16x8x2_t r = + vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + return vreinterpretq_m128i_u16(r.val[1]); +#endif +} + +// Computes pairwise add of each argument as single-precision, floating-point +// values a and b. +// https://msdn.microsoft.com/en-us/library/yd9wecaa.aspx +FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128_f32( + vpaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#else + float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); + float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); + float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); + float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32( + vcombine_f32(vpadd_f32(a10, a32), vpadd_f32(b10, b32))); +#endif +} + +// Computes pairwise add of each argument as a 16-bit signed or unsigned integer +// values a and b. +FORCE_INLINE __m128i _mm_hadd_epi16(__m128i _a, __m128i _b) +{ + int16x8_t a = vreinterpretq_s16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); +#if defined(__aarch64__) + return vreinterpretq_m128i_s16(vpaddq_s16(a, b)); +#else + return vreinterpretq_m128i_s16( + vcombine_s16(vpadd_s16(vget_low_s16(a), vget_high_s16(a)), + vpadd_s16(vget_low_s16(b), vget_high_s16(b)))); +#endif +} + +// Horizontally substract adjacent pairs of single-precision (32-bit) +// floating-point elements in a and b, and pack the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps +FORCE_INLINE __m128 _mm_hsub_ps(__m128 _a, __m128 _b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128_f32(vsubq_f32( + vuzp1q_f32(vreinterpretq_f32_m128(_a), vreinterpretq_f32_m128(_b)), + vuzp2q_f32(vreinterpretq_f32_m128(_a), vreinterpretq_f32_m128(_b)))); +#else + float32x4x2_t c = + vuzpq_f32(vreinterpretq_f32_m128(_a), vreinterpretq_f32_m128(_b)); + return vreinterpretq_m128_f32(vsubq_f32(c.val[0], c.val[1])); +#endif +} + +// Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the +// signed 16-bit results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi16 +FORCE_INLINE __m64 _mm_hadd_pi16(__m64 a, __m64 b) +{ + return vreinterpret_m64_s16( + vpadd_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b))); +} + +// Horizontally add adjacent pairs of 32-bit integers in a and b, and pack the +// signed 32-bit results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi32 +FORCE_INLINE __m64 _mm_hadd_pi32(__m64 a, __m64 b) +{ + return vreinterpret_m64_s32( + vpadd_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b))); +} + +// Computes pairwise difference of each argument as a 16-bit signed or unsigned +// integer values a and b. +FORCE_INLINE __m128i _mm_hsub_epi16(__m128i _a, __m128i _b) +{ + int32x4_t a = vreinterpretq_s32_m128i(_a); + int32x4_t b = vreinterpretq_s32_m128i(_b); + // Interleave using vshrn/vmovn + // [a0|a2|a4|a6|b0|b2|b4|b6] + // [a1|a3|a5|a7|b1|b3|b5|b7] + int16x8_t ab0246 = vcombine_s16(vmovn_s32(a), vmovn_s32(b)); + int16x8_t ab1357 = vcombine_s16(vshrn_n_s32(a, 16), vshrn_n_s32(b, 16)); + // Subtract + return vreinterpretq_m128i_s16(vsubq_s16(ab0246, ab1357)); +} + +// Computes saturated pairwise sub of each argument as a 16-bit signed +// integer values a and b. +FORCE_INLINE __m128i _mm_hadds_epi16(__m128i _a, __m128i _b) +{ +#if defined(__aarch64__) + int16x8_t a = vreinterpretq_s16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); + return vreinterpretq_s64_s16( + vqaddq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b))); +#else + int32x4_t a = vreinterpretq_s32_m128i(_a); + int32x4_t b = vreinterpretq_s32_m128i(_b); + // Interleave using vshrn/vmovn + // [a0|a2|a4|a6|b0|b2|b4|b6] + // [a1|a3|a5|a7|b1|b3|b5|b7] + int16x8_t ab0246 = vcombine_s16(vmovn_s32(a), vmovn_s32(b)); + int16x8_t ab1357 = vcombine_s16(vshrn_n_s32(a, 16), vshrn_n_s32(b, 16)); + // Saturated add + return vreinterpretq_m128i_s16(vqaddq_s16(ab0246, ab1357)); +#endif +} + +// Computes saturated pairwise difference of each argument as a 16-bit signed +// integer values a and b. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16 +FORCE_INLINE __m128i _mm_hsubs_epi16(__m128i _a, __m128i _b) +{ +#if defined(__aarch64__) + int16x8_t a = vreinterpretq_s16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); + return vreinterpretq_s64_s16( + vqsubq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b))); +#else + int32x4_t a = vreinterpretq_s32_m128i(_a); + int32x4_t b = vreinterpretq_s32_m128i(_b); + // Interleave using vshrn/vmovn + // [a0|a2|a4|a6|b0|b2|b4|b6] + // [a1|a3|a5|a7|b1|b3|b5|b7] + int16x8_t ab0246 = vcombine_s16(vmovn_s32(a), vmovn_s32(b)); + int16x8_t ab1357 = vcombine_s16(vshrn_n_s32(a, 16), vshrn_n_s32(b, 16)); + // Saturated subtract + return vreinterpretq_m128i_s16(vqsubq_s16(ab0246, ab1357)); +#endif +} + +// Computes pairwise add of each argument as a 32-bit signed or unsigned integer +// values a and b. +FORCE_INLINE __m128i _mm_hadd_epi32(__m128i _a, __m128i _b) +{ + int32x4_t a = vreinterpretq_s32_m128i(_a); + int32x4_t b = vreinterpretq_s32_m128i(_b); + return vreinterpretq_m128i_s32( + vcombine_s32(vpadd_s32(vget_low_s32(a), vget_high_s32(a)), + vpadd_s32(vget_low_s32(b), vget_high_s32(b)))); +} + +// Computes pairwise difference of each argument as a 32-bit signed or unsigned +// integer values a and b. +FORCE_INLINE __m128i _mm_hsub_epi32(__m128i _a, __m128i _b) +{ + int64x2_t a = vreinterpretq_s64_m128i(_a); + int64x2_t b = vreinterpretq_s64_m128i(_b); + // Interleave using vshrn/vmovn + // [a0|a2|b0|b2] + // [a1|a2|b1|b3] + int32x4_t ab02 = vcombine_s32(vmovn_s64(a), vmovn_s64(b)); + int32x4_t ab13 = vcombine_s32(vshrn_n_s64(a, 32), vshrn_n_s64(b, 32)); + // Subtract + return vreinterpretq_m128i_s32(vsubq_s32(ab02, ab13)); +} + +// Kahan summation for accurate summation of floating-point numbers. +// http://blog.zachbjornson.com/2019/08/11/fast-float-summation.html +FORCE_INLINE void _sse2neon_kadd_f32(float *sum, float *c, float y) +{ + y -= *c; + float t = *sum + y; + *c = (t - *sum) - y; + *sum = t; +} + +// Conditionally multiply the packed single-precision (32-bit) floating-point +// elements in a and b using the high 4 bits in imm8, sum the four products, +// and conditionally store the sum in dst using the low 4 bits of imm. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps +FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm) +{ +#if defined(__aarch64__) + /* shortcuts */ + if (imm == 0xFF) { + return _mm_set1_ps(vaddvq_f32(_mm_mul_ps(a, b))); + } + if (imm == 0x7F) { + float32x4_t m = _mm_mul_ps(a, b); + m[3] = 0; + return _mm_set1_ps(vaddvq_f32(m)); + } +#endif + + float s = 0, c = 0; + float32x4_t f32a = vreinterpretq_f32_m128(a); + float32x4_t f32b = vreinterpretq_f32_m128(b); + + /* To improve the accuracy of floating-point summation, Kahan algorithm + * is used for each operation. + */ + if (imm & (1 << 4)) + _sse2neon_kadd_f32(&s, &c, f32a[0] * f32b[0]); + if (imm & (1 << 5)) + _sse2neon_kadd_f32(&s, &c, f32a[1] * f32b[1]); + if (imm & (1 << 6)) + _sse2neon_kadd_f32(&s, &c, f32a[2] * f32b[2]); + if (imm & (1 << 7)) + _sse2neon_kadd_f32(&s, &c, f32a[3] * f32b[3]); + s += c; + + float32x4_t res = { + (imm & 0x1) ? s : 0, + (imm & 0x2) ? s : 0, + (imm & 0x4) ? s : 0, + (imm & 0x8) ? s : 0, + }; + return vreinterpretq_m128_f32(res); +} + +/* Compare operations */ + +// Compares for less than +// https://msdn.microsoft.com/en-us/library/vstudio/f330yhc8(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32( + vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Compares for less than +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/fy94wye7(v=vs.100) +FORCE_INLINE __m128 _mm_cmplt_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmplt_ps(a, b)); +} + +// Compares for greater than. +// +// r0 := (a0 > b0) ? 0xffffffff : 0x0 +// r1 := (a1 > b1) ? 0xffffffff : 0x0 +// r2 := (a2 > b2) ? 0xffffffff : 0x0 +// r3 := (a3 > b3) ? 0xffffffff : 0x0 +// +// https://msdn.microsoft.com/en-us/library/vstudio/11dy102s(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32( + vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Compares for greater than. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/1xyyyy9e(v=vs.100) +FORCE_INLINE __m128 _mm_cmpgt_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpgt_ps(a, b)); +} + +// Compares for greater than or equal. +// https://msdn.microsoft.com/en-us/library/vstudio/fs813y2t(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32( + vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Compares for greater than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/kesh3ddc(v=vs.100) +FORCE_INLINE __m128 _mm_cmpge_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpge_ps(a, b)); +} + +// Compares for less than or equal. +// +// r0 := (a0 <= b0) ? 0xffffffff : 0x0 +// r1 := (a1 <= b1) ? 0xffffffff : 0x0 +// r2 := (a2 <= b2) ? 0xffffffff : 0x0 +// r3 := (a3 <= b3) ? 0xffffffff : 0x0 +// +// https://msdn.microsoft.com/en-us/library/vstudio/1s75w83z(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmple_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32( + vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Compares for less than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/a7x0hbhw(v=vs.100) +FORCE_INLINE __m128 _mm_cmple_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmple_ps(a, b)); +} + +// Compares for equality. +// https://msdn.microsoft.com/en-us/library/vstudio/36aectz5(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmpeq_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32( + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Compares for equality. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/k423z28e(v=vs.100) +FORCE_INLINE __m128 _mm_cmpeq_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpeq_ps(a, b)); +} + +// Compares for inequality. +// https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32(vmvnq_u32( + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); +} + +// Compares for inequality. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/ekya8fh4(v=vs.100) +FORCE_INLINE __m128 _mm_cmpneq_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpneq_ps(a, b)); +} + +// Compares for not greater than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/wsexys62(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnge_ps(__m128 a, __m128 b) +{ + return _mm_cmplt_ps(a, b); +} + +// Compares for not greater than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/fk2y80s8(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnge_ss(__m128 a, __m128 b) +{ + return _mm_cmplt_ss(a, b); +} + +// Compares for not greater than. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/d0xh7w0s(v=vs.100) +FORCE_INLINE __m128 _mm_cmpngt_ps(__m128 a, __m128 b) +{ + return _mm_cmple_ps(a, b); +} + +// Compares for not greater than. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/z7x9ydwh(v=vs.100) +FORCE_INLINE __m128 _mm_cmpngt_ss(__m128 a, __m128 b) +{ + return _mm_cmple_ss(a, b); +} + +// Compares for not less than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/6a330kxw(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnle_ps(__m128 a, __m128 b) +{ + return _mm_cmpgt_ps(a, b); +} + +// Compares for not less than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/z7x9ydwh(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnle_ss(__m128 a, __m128 b) +{ + return _mm_cmpgt_ss(a, b); +} + +// Compares for not less than. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/4686bbdw(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnlt_ps(__m128 a, __m128 b) +{ + return _mm_cmpge_ps(a, b); +} + +// Compares for not less than. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/56b9z2wf(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnlt_ss(__m128 a, __m128 b) +{ + return _mm_cmpge_ss(a, b); +} + +// Compares the 16 signed or unsigned 8-bit integers in a and the 16 signed or +// unsigned 8-bit integers in b for equality. +// https://msdn.microsoft.com/en-us/library/windows/desktop/bz5xk21a(v=vs.90).aspx +FORCE_INLINE __m128i _mm_cmpeq_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vceqq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for equality, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd +FORCE_INLINE __m128d _mm_cmpeq_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_u64( + vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) + uint32x4_t cmp = + vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(b)); + uint32x4_t swapped = vrev64q_u32(cmp); + return vreinterpretq_m128d_u32(vandq_u32(cmp, swapped)); +#endif +} + +// Compares the 8 signed or unsigned 16-bit integers in a and the 8 signed or +// unsigned 16-bit integers in b for equality. +// https://msdn.microsoft.com/en-us/library/2ay060te(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmpeq_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vceqq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Compare packed 32-bit integers in a and b for equality, and store the results +// in dst +FORCE_INLINE __m128i _mm_cmpeq_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u32( + vceqq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Compare packed 64-bit integers in a and b for equality, and store the results +// in dst +FORCE_INLINE __m128i _mm_cmpeq_epi64(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_u64( + vceqq_u64(vreinterpretq_u64_m128i(a), vreinterpretq_u64_m128i(b))); +#else + // ARMv7 lacks vceqq_u64 + // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) + uint32x4_t cmp = + vceqq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b)); + uint32x4_t swapped = vrev64q_u32(cmp); + return vreinterpretq_m128i_u32(vandq_u32(cmp, swapped)); +#endif +} + +// Compares the 16 signed 8-bit integers in a and the 16 signed 8-bit integers +// in b for lesser than. +// https://msdn.microsoft.com/en-us/library/windows/desktop/9s46csht(v=vs.90).aspx +FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vcltq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Compares the 16 signed 8-bit integers in a and the 16 signed 8-bit integers +// in b for greater than. +// +// r0 := (a0 > b0) ? 0xff : 0x0 +// r1 := (a1 > b1) ? 0xff : 0x0 +// ... +// r15 := (a15 > b15) ? 0xff : 0x0 +// +// https://msdn.microsoft.com/zh-tw/library/wf45zt2b(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vcgtq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Compares the 8 signed 16-bit integers in a and the 8 signed 16-bit integers +// in b for less than. +// +// r0 := (a0 < b0) ? 0xffff : 0x0 +// r1 := (a1 < b1) ? 0xffff : 0x0 +// ... +// r7 := (a7 < b7) ? 0xffff : 0x0 +// +// https://technet.microsoft.com/en-us/library/t863edb2(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmplt_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vcltq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Compares the 8 signed 16-bit integers in a and the 8 signed 16-bit integers +// in b for greater than. +// +// r0 := (a0 > b0) ? 0xffff : 0x0 +// r1 := (a1 > b1) ? 0xffff : 0x0 +// ... +// r7 := (a7 > b7) ? 0xffff : 0x0 +// +// https://technet.microsoft.com/en-us/library/xd43yfsa(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmpgt_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vcgtq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + + +// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers +// in b for less than. +// https://msdn.microsoft.com/en-us/library/vstudio/4ak0bf5d(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u32( + vcltq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers +// in b for greater than. +// https://msdn.microsoft.com/en-us/library/vstudio/1s9f2z0y(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u32( + vcgtq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Compares the 2 signed 64-bit integers in a and the 2 signed 64-bit integers +// in b for greater than. +FORCE_INLINE __m128i _mm_cmpgt_epi64(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_u64( + vcgtq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b))); +#else + // ARMv7 lacks vcgtq_s64. + // This is based off of Clang's SSE2 polyfill: + // (a > b) -> ((a_hi > b_hi) || (a_lo > b_lo && a_hi == b_hi)) + + // Mask the sign bit out since we need a signed AND an unsigned comparison + // and it is ugly to try and split them. + int32x4_t mask = vreinterpretq_s32_s64(vdupq_n_s64(0x80000000ull)); + int32x4_t a_mask = veorq_s32(vreinterpretq_s32_m128i(a), mask); + int32x4_t b_mask = veorq_s32(vreinterpretq_s32_m128i(b), mask); + // Check if a > b + int64x2_t greater = vreinterpretq_s64_u32(vcgtq_s32(a_mask, b_mask)); + // Copy upper mask to lower mask + // a_hi > b_hi + int64x2_t gt_hi = vshrq_n_s64(greater, 63); + // Copy lower mask to upper mask + // a_lo > b_lo + int64x2_t gt_lo = vsliq_n_s64(greater, greater, 32); + // Compare for equality + int64x2_t equal = vreinterpretq_s64_u32(vceqq_s32(a_mask, b_mask)); + // Copy upper mask to lower mask + // a_hi == b_hi + int64x2_t eq_hi = vshrq_n_s64(equal, 63); + // a_hi > b_hi || (a_lo > b_lo && a_hi == b_hi) + int64x2_t ret = vorrq_s64(gt_hi, vandq_s64(gt_lo, eq_hi)); + return vreinterpretq_m128i_s64(ret); +#endif +} + +// Compares the four 32-bit floats in a and b to check if any values are NaN. +// Ordered compare between each value returns true for "orderable" and false for +// "not orderable" (NaN). +// https://msdn.microsoft.com/en-us/library/vstudio/0h9w00fx(v=vs.100).aspx see +// also: +// http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean +// http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics +FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b) +{ + // Note: NEON does not have ordered compare builtin + // Need to compare a eq a and b eq b to check for NaN + // Do AND of results to get final + uint32x4_t ceqaa = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); + uint32x4_t ceqbb = + vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_u32(vandq_u32(ceqaa, ceqbb)); +} + +// Compares for ordered. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/343t62da(v=vs.100) +FORCE_INLINE __m128 _mm_cmpord_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpord_ps(a, b)); +} + +// Compares for unordered. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/khy6fk1t(v=vs.100) +FORCE_INLINE __m128 _mm_cmpunord_ps(__m128 a, __m128 b) +{ + uint32x4_t f32a = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); + uint32x4_t f32b = + vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_u32(vmvnq_u32(vandq_u32(f32a, f32b))); +} + +// Compares for unordered. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/2as2387b(v=vs.100) +FORCE_INLINE __m128 _mm_cmpunord_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpunord_ps(a, b)); +} + +// Compares the lower single-precision floating point scalar values of a and b +// using a less than operation. : +// https://msdn.microsoft.com/en-us/library/2kwe606b(v=vs.90).aspx Important +// note!! The documentation on MSDN is incorrect! If either of the values is a +// NAN the docs say you will get a one, but in fact, it will return a zero!! +FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b) +{ + uint32x4_t a_not_nan = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); + uint32x4_t b_not_nan = + vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_lt_b = + vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); + return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_lt_b), 0) != 0) ? 1 : 0; +} + +// Compares the lower single-precision floating point scalar values of a and b +// using a greater than operation. : +// https://msdn.microsoft.com/en-us/library/b0738e0t(v=vs.100).aspx +FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b) +{ + // return vgetq_lane_u32(vcgtq_f32(vreinterpretq_f32_m128(a), + // vreinterpretq_f32_m128(b)), 0); + uint32x4_t a_not_nan = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); + uint32x4_t b_not_nan = + vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = + vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); + return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0) ? 1 : 0; +} + +// Compares the lower single-precision floating point scalar values of a and b +// using a less than or equal operation. : +// https://msdn.microsoft.com/en-us/library/1w4t7c57(v=vs.90).aspx +FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b) +{ + // return vgetq_lane_u32(vcleq_f32(vreinterpretq_f32_m128(a), + // vreinterpretq_f32_m128(b)), 0); + uint32x4_t a_not_nan = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); + uint32x4_t b_not_nan = + vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_le_b = + vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); + return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_le_b), 0) != 0) ? 1 : 0; +} + +// Compares the lower single-precision floating point scalar values of a and b +// using a greater than or equal operation. : +// https://msdn.microsoft.com/en-us/library/8t80des6(v=vs.100).aspx +FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b) +{ + // return vgetq_lane_u32(vcgeq_f32(vreinterpretq_f32_m128(a), + // vreinterpretq_f32_m128(b)), 0); + uint32x4_t a_not_nan = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); + uint32x4_t b_not_nan = + vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = + vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); + return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0) ? 1 : 0; +} + +// Compares the lower single-precision floating point scalar values of a and b +// using an equality operation. : +// https://msdn.microsoft.com/en-us/library/93yx2h2b(v=vs.100).aspx +FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b) +{ + // return vgetq_lane_u32(vceqq_f32(vreinterpretq_f32_m128(a), + // vreinterpretq_f32_m128(b)), 0); + uint32x4_t a_not_nan = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); + uint32x4_t b_not_nan = + vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_eq_b = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); + return (vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_eq_b), 0) != 0) ? 1 : 0; +} + +// Compares the lower single-precision floating point scalar values of a and b +// using an inequality operation. : +// https://msdn.microsoft.com/en-us/library/bafh5e0a(v=vs.90).aspx +FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b) +{ + // return !vgetq_lane_u32(vceqq_f32(vreinterpretq_f32_m128(a), + // vreinterpretq_f32_m128(b)), 0); + uint32x4_t a_not_nan = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); + uint32x4_t b_not_nan = + vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_neq_b = vmvnq_u32( + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); + return (vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_neq_b), 0) != 0) ? 1 : 0; +} + +// according to the documentation, these intrinsics behave the same as the +// non-'u' versions. We'll just alias them here. +#define _mm_ucomieq_ss _mm_comieq_ss +#define _mm_ucomige_ss _mm_comige_ss +#define _mm_ucomigt_ss _mm_comigt_ss +#define _mm_ucomile_ss _mm_comile_ss +#define _mm_ucomilt_ss _mm_comilt_ss +#define _mm_ucomineq_ss _mm_comineq_ss + +/* Conversions */ + +// Convert packed signed 32-bit integers in b to packed single-precision +// (32-bit) floating-point elements, store the results in the lower 2 elements +// of dst, and copy the upper 2 packed elements from a to the upper elements of +// dst. +// +// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +// dst[63:32] := Convert_Int32_To_FP32(b[63:32]) +// dst[95:64] := a[95:64] +// dst[127:96] := a[127:96] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_pi2ps +FORCE_INLINE __m128 _mm_cvt_pi2ps(__m128 a, __m64 b) +{ + return vreinterpretq_m128_f32( + vcombine_f32(vcvt_f32_s32(vreinterpret_s32_m64(b)), + vget_high_f32(vreinterpretq_f32_m128(a)))); +} + +// Convert the signed 32-bit integer b to a single-precision (32-bit) +// floating-point element, store the result in the lower element of dst, and +// copy the upper 3 packed elements from a to the upper elements of dst. +// +// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_si2ss +FORCE_INLINE __m128 _mm_cvt_si2ss(__m128 a, int b) +{ + return vreinterpretq_m128_f32( + vsetq_lane_f32((float) b, vreinterpretq_f32_m128(a), 0)); +} + +// Convert the signed 32-bit integer b to a single-precision (32-bit) +// floating-point element, store the result in the lower element of dst, and +// copy the upper 3 packed elements from a to the upper elements of dst. +// +// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_ss +#define _mm_cvtsi32_ss(a, b) _mm_cvt_si2ss(a, b) + +// Convert the signed 64-bit integer b to a single-precision (32-bit) +// floating-point element, store the result in the lower element of dst, and +// copy the upper 3 packed elements from a to the upper elements of dst. +// +// dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss +FORCE_INLINE __m128 _mm_cvtsi64_ss(__m128 a, int64_t b) +{ + return vreinterpretq_m128_f32( + vsetq_lane_f32((float) b, vreinterpretq_f32_m128(a), 0)); +} + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 32-bit integer, and store the result in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ss2si +FORCE_INLINE int _mm_cvt_ss2si(__m128 a) +{ +#if defined(__aarch64__) + return vgetq_lane_s32(vcvtnq_s32_f32(vreinterpretq_f32_m128(a)), 0); +#else + float32_t data = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + float32_t diff = data - floor(data); + if (diff > 0.5) + return (int32_t) ceil(data); + if (unlikely(diff == 0.5)) { + int32_t f = (int32_t) floor(data); + int32_t c = (int32_t) ceil(data); + return c & 1 ? f : c; + } + return (int32_t) floor(data); +#endif +} + +// Convert packed 16-bit integers in a to packed single-precision (32-bit) +// floating-point elements, and store the results in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// m := j*32 +// dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi16_ps +FORCE_INLINE __m128 _mm_cvtpi16_ps(__m64 a) +{ + return vreinterpretq_m128_f32( + vcvtq_f32_s32(vmovl_s16(vreinterpret_s16_m64(a)))); +} + +// Convert packed 32-bit integers in b to packed single-precision (32-bit) +// floating-point elements, store the results in the lower 2 elements of dst, +// and copy the upper 2 packed elements from a to the upper elements of dst. +// +// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +// dst[63:32] := Convert_Int32_To_FP32(b[63:32]) +// dst[95:64] := a[95:64] +// dst[127:96] := a[127:96] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32_ps +FORCE_INLINE __m128 _mm_cvtpi32_ps(__m128 a, __m64 b) +{ + return vreinterpretq_m128_f32( + vcombine_f32(vcvt_f32_s32(vreinterpret_s32_m64(b)), + vget_high_f32(vreinterpretq_f32_m128(a)))); +} + +// Convert packed signed 32-bit integers in a to packed single-precision +// (32-bit) floating-point elements, store the results in the lower 2 elements +// of dst, then covert the packed signed 32-bit integers in b to +// single-precision (32-bit) floating-point element, and store the results in +// the upper 2 elements of dst. +// +// dst[31:0] := Convert_Int32_To_FP32(a[31:0]) +// dst[63:32] := Convert_Int32_To_FP32(a[63:32]) +// dst[95:64] := Convert_Int32_To_FP32(b[31:0]) +// dst[127:96] := Convert_Int32_To_FP32(b[63:32]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32x2_ps +FORCE_INLINE __m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b) +{ + return vreinterpretq_m128_f32(vcvtq_f32_s32( + vcombine_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b)))); +} + +// Convert the lower packed 8-bit integers in a to packed single-precision +// (32-bit) floating-point elements, and store the results in dst. +// +// FOR j := 0 to 3 +// i := j*8 +// m := j*32 +// dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi8_ps +FORCE_INLINE __m128 _mm_cvtpi8_ps(__m64 a) +{ + return vreinterpretq_m128_f32(vcvtq_f32_s32( + vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_m64(a)))))); +} + +// Convert packed unsigned 16-bit integers in a to packed single-precision +// (32-bit) floating-point elements, and store the results in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// m := j*32 +// dst[m+31:m] := Convert_UInt16_To_FP32(a[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu16_ps +FORCE_INLINE __m128 _mm_cvtpu16_ps(__m64 a) +{ + return vreinterpretq_m128_f32( + vcvtq_f32_u32(vmovl_u16(vreinterpret_u16_m64(a)))); +} + +// Convert the lower packed unsigned 8-bit integers in a to packed +// single-precision (32-bit) floating-point elements, and store the results in +// dst. +// +// FOR j := 0 to 3 +// i := j*8 +// m := j*32 +// dst[m+31:m] := Convert_UInt8_To_FP32(a[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu8_ps +FORCE_INLINE __m128 _mm_cvtpu8_ps(__m64 a) +{ + return vreinterpretq_m128_f32(vcvtq_f32_u32( + vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_m64(a)))))); +} + +// Converts the four single-precision, floating-point values of a to signed +// 32-bit integer values using truncate. +// https://msdn.microsoft.com/en-us/library/vstudio/1h005y6x(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a) +{ + return vreinterpretq_m128i_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a))); +} + +// Convert the lower double-precision (64-bit) floating-point element in a to a +// 64-bit integer with truncation, and store the result in dst. +// +// dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64 +FORCE_INLINE int64_t _mm_cvttsd_si64(__m128d a) +{ +#if defined(__aarch64__) + return vgetq_lane_s64(vcvtq_s64_f64(vreinterpretq_f64_m128d(a)), 0); +#else + double ret = *((double *) &a); + return (int64_t) ret; +#endif +} + +// Convert the lower double-precision (64-bit) floating-point element in a to a +// 64-bit integer with truncation, and store the result in dst. +// +// dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64x +#define _mm_cvttsd_si64x(a) _mm_cvttsd_si64(a) + +// Converts the four signed 32-bit integer values of a to single-precision, +// floating-point values +// https://msdn.microsoft.com/en-us/library/vstudio/36bwxcx5(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a) +{ + return vreinterpretq_m128_f32(vcvtq_f32_s32(vreinterpretq_s32_m128i(a))); +} + +// Converts the four unsigned 8-bit integers in the lower 16 bits to four +// unsigned 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepu8_epi16(__m128i a) +{ + uint8x16_t u8x16 = vreinterpretq_u8_m128i(a); /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + return vreinterpretq_m128i_u16(u16x8); +} + +// Converts the four unsigned 8-bit integers in the lower 32 bits to four +// unsigned 32-bit integers. +// https://msdn.microsoft.com/en-us/library/bb531467%28v=vs.100%29.aspx +FORCE_INLINE __m128i _mm_cvtepu8_epi32(__m128i a) +{ + uint8x16_t u8x16 = vreinterpretq_u8_m128i(a); /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + return vreinterpretq_m128i_u32(u32x4); +} + +// Converts the two unsigned 8-bit integers in the lower 16 bits to two +// unsigned 64-bit integers. +FORCE_INLINE __m128i _mm_cvtepu8_epi64(__m128i a) +{ + uint8x16_t u8x16 = vreinterpretq_u8_m128i(a); /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + return vreinterpretq_m128i_u64(u64x2); +} + +// Converts the four unsigned 8-bit integers in the lower 16 bits to four +// unsigned 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepi8_epi16(__m128i a) +{ + int8x16_t s8x16 = vreinterpretq_s8_m128i(a); /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + return vreinterpretq_m128i_s16(s16x8); +} + +// Converts the four unsigned 8-bit integers in the lower 32 bits to four +// unsigned 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepi8_epi32(__m128i a) +{ + int8x16_t s8x16 = vreinterpretq_s8_m128i(a); /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + return vreinterpretq_m128i_s32(s32x4); +} + +// Converts the two signed 8-bit integers in the lower 32 bits to four +// signed 64-bit integers. +FORCE_INLINE __m128i _mm_cvtepi8_epi64(__m128i a) +{ + int8x16_t s8x16 = vreinterpretq_s8_m128i(a); /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + return vreinterpretq_m128i_s64(s64x2); +} + +// Converts the four signed 16-bit integers in the lower 64 bits to four signed +// 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a) +{ + return vreinterpretq_m128i_s32( + vmovl_s16(vget_low_s16(vreinterpretq_s16_m128i(a)))); +} + +// Converts the two signed 16-bit integers in the lower 32 bits two signed +// 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepi16_epi64(__m128i a) +{ + int16x8_t s16x8 = vreinterpretq_s16_m128i(a); /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + return vreinterpretq_m128i_s64(s64x2); +} + +// Converts the four unsigned 16-bit integers in the lower 64 bits to four +// unsigned 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepu16_epi32(__m128i a) +{ + return vreinterpretq_m128i_u32( + vmovl_u16(vget_low_u16(vreinterpretq_u16_m128i(a)))); +} + +// Converts the two unsigned 16-bit integers in the lower 32 bits to two +// unsigned 64-bit integers. +FORCE_INLINE __m128i _mm_cvtepu16_epi64(__m128i a) +{ + uint16x8_t u16x8 = vreinterpretq_u16_m128i(a); /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + return vreinterpretq_m128i_u64(u64x2); +} + +// Converts the two unsigned 32-bit integers in the lower 64 bits to two +// unsigned 64-bit integers. +FORCE_INLINE __m128i _mm_cvtepu32_epi64(__m128i a) +{ + return vreinterpretq_m128i_u64( + vmovl_u32(vget_low_u32(vreinterpretq_u32_m128i(a)))); +} + +// Converts the two signed 32-bit integers in the lower 64 bits to two signed +// 64-bit integers. +FORCE_INLINE __m128i _mm_cvtepi32_epi64(__m128i a) +{ + return vreinterpretq_m128i_s64( + vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a)))); +} + +// Converts the four single-precision, floating-point values of a to signed +// 32-bit integer values. +// +// r0 := (int) a0 +// r1 := (int) a1 +// r2 := (int) a2 +// r3 := (int) a3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/xdc42k5e(v=vs.100).aspx +// *NOTE*. The default rounding mode on SSE is 'round to even', which ARMv7-A +// does not support! It is supported on ARMv8-A however. +FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s32(vcvtnq_s32_f32(a)); +#else + uint32x4_t signmask = vdupq_n_u32(0x80000000); + float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a), + vdupq_n_f32(0.5f)); /* +/- 0.5 */ + int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32( + vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/ + int32x4_t r_trunc = + vcvtq_s32_f32(vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */ + int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32( + vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */ + int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone), + vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */ + float32x4_t delta = vsubq_f32( + vreinterpretq_f32_m128(a), + vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */ + uint32x4_t is_delta_half = vceqq_f32(delta, half); /* delta == +/- 0.5 */ + return vreinterpretq_m128i_s32(vbslq_s32(is_delta_half, r_even, r_normal)); +#endif +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 16-bit integers, and store the results in dst. Note: this intrinsic +// will generate 0x7FFF, rather than 0x8000, for input values between 0x7FFF and +// 0x7FFFFFFF. +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi16 +FORCE_INLINE __m64 _mm_cvtps_pi16(__m128 a) +{ + return vreinterpret_m64_s16( + vmovn_s32(vreinterpretq_s32_m128i(_mm_cvtps_epi32(a)))); +} + +// Copy the lower 32-bit integer in a to dst. +// +// dst[31:0] := a[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32 +FORCE_INLINE int _mm_cvtsi128_si32(__m128i a) +{ + return vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0); +} + +// Copy the lower 64-bit integer in a to dst. +// +// dst[63:0] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64 +FORCE_INLINE int64_t _mm_cvtsi128_si64(__m128i a) +{ + return vgetq_lane_s64(vreinterpretq_s64_m128i(a), 0); +} + +// Copy the lower 64-bit integer in a to dst. +// +// dst[63:0] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x +#define _mm_cvtsi128_si64x(a) _mm_cvtsi128_si64(a) + +// Moves 32-bit integer a to the least significant 32 bits of an __m128 object, +// zero extending the upper bits. +// +// r0 := a +// r1 := 0x0 +// r2 := 0x0 +// r3 := 0x0 +// +// https://msdn.microsoft.com/en-us/library/ct3539ha%28v=vs.90%29.aspx +FORCE_INLINE __m128i _mm_cvtsi32_si128(int a) +{ + return vreinterpretq_m128i_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0)); +} + +// Moves 64-bit integer a to the least significant 64 bits of an __m128 object, +// zero extending the upper bits. +// +// r0 := a +// r1 := 0x0 +FORCE_INLINE __m128i _mm_cvtsi64_si128(int64_t a) +{ + return vreinterpretq_m128i_s64(vsetq_lane_s64(a, vdupq_n_s64(0), 0)); +} + +// Cast vector of type __m128 to type __m128d. This intrinsic is only used for +// compilation and does not generate any instructions, thus it has zero latency. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd +FORCE_INLINE __m128d _mm_castps_pd(__m128 a) +{ + return vreinterpretq_m128d_s32(vreinterpretq_s32_m128(a)); +} + +// Applies a type cast to reinterpret four 32-bit floating point values passed +// in as a 128-bit parameter as packed 32-bit integers. +// https://msdn.microsoft.com/en-us/library/bb514099.aspx +FORCE_INLINE __m128i _mm_castps_si128(__m128 a) +{ + return vreinterpretq_m128i_s32(vreinterpretq_s32_m128(a)); +} + +// Cast vector of type __m128i to type __m128d. This intrinsic is only used for +// compilation and does not generate any instructions, thus it has zero latency. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd +FORCE_INLINE __m128d _mm_castsi128_pd(__m128i a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vreinterpretq_f64_m128i(a)); +#else + return vreinterpretq_m128d_f32(vreinterpretq_f32_m128i(a)); +#endif +} + +// Applies a type cast to reinterpret four 32-bit integers passed in as a +// 128-bit parameter as packed 32-bit floating point values. +// https://msdn.microsoft.com/en-us/library/bb514029.aspx +FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a) +{ + return vreinterpretq_m128_s32(vreinterpretq_s32_m128i(a)); +} + +// Loads 128-bit value. : +// https://msdn.microsoft.com/en-us/library/atzzad1h(v=vs.80).aspx +FORCE_INLINE __m128i _mm_load_si128(const __m128i *p) +{ + return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p)); +} + +// Load a double-precision (64-bit) floating-point element from memory into both +// elements of dst. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd +FORCE_INLINE __m128d _mm_load1_pd(const double *p) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vld1q_dup_f64(p)); +#else + return vreinterpretq_m128d_s64(vdupq_n_s64(*(const int64_t *) p)); +#endif +} + +// Load a double-precision (64-bit) floating-point element from memory into both +// elements of dst. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1 +#define _mm_load_pd1 _mm_load1_pd + +// Load a double-precision (64-bit) floating-point element from memory into the +// upper element of dst, and copy the lower element from a to dst. mem_addr does +// not need to be aligned on any particular boundary. +// +// dst[63:0] := a[63:0] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd +FORCE_INLINE __m128d _mm_loadh_pd(__m128d a, const double *p) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vcombine_f64(vget_low_f64(vreinterpretq_f64_m128d(a)), vld1_f64(p))); +#else + return vreinterpretq_m128d_f32(vcombine_f32( + vget_low_f32(vreinterpretq_f32_m128d(a)), vld1_f32((const float *) p))); +#endif +} + +// Load a double-precision (64-bit) floating-point element from memory into both +// elements of dst. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1 +#define _mm_load_pd1 _mm_load1_pd + +// Load a double-precision (64-bit) floating-point element from memory into both +// elements of dst. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd +#define _mm_loaddup_pd _mm_load1_pd + +// Loads 128-bit value. : +// https://msdn.microsoft.com/zh-cn/library/f4k12ae8(v=vs.90).aspx +FORCE_INLINE __m128i _mm_loadu_si128(const __m128i *p) +{ + return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p)); +} + +// Load unaligned 32-bit integer from memory into the first element of dst. +// +// dst[31:0] := MEM[mem_addr+31:mem_addr] +// dst[MAX:32] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si32 +FORCE_INLINE __m128i _mm_loadu_si32(const void *p) +{ + return vreinterpretq_m128i_s32( + vsetq_lane_s32(*(const int32_t *) p, vdupq_n_s32(0), 0)); +} + +// Convert packed double-precision (64-bit) floating-point elements in a to +// packed single-precision (32-bit) floating-point elements, and store the +// results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// k := 64*j +// dst[i+31:i] := Convert_FP64_To_FP32(a[k+64:k]) +// ENDFOR +// dst[127:64] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps +FORCE_INLINE __m128 _mm_cvtpd_ps(__m128d a) +{ +#if defined(__aarch64__) + float32x2_t tmp = vcvt_f32_f64(vreinterpretq_f64_m128d(a)); + return vreinterpretq_m128_f32(vcombine_f32(tmp, vdup_n_f32(0))); +#else + float a0 = (float) ((double *) &a)[0]; + float a1 = (float) ((double *) &a)[1]; + return _mm_set_ps(0, 0, a1, a0); +#endif +} + +// Copy the lower double-precision (64-bit) floating-point element of a to dst. +// +// dst[63:0] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64 +FORCE_INLINE double _mm_cvtsd_f64(__m128d a) +{ +#if defined(__aarch64__) + return (double) vgetq_lane_f64(vreinterpretq_f64_m128d(a), 0); +#else + return ((double *) &a)[0]; +#endif +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed double-precision (64-bit) floating-point elements, and store the +// results in dst. +// +// FOR j := 0 to 1 +// i := 64*j +// k := 32*j +// dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd +FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vcvt_f64_f32(vget_low_f32(vreinterpretq_f32_m128(a)))); +#else + double a0 = (double) vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + double a1 = (double) vgetq_lane_f32(vreinterpretq_f32_m128(a), 1); + return _mm_set_pd(a1, a0); +#endif +} + +// Cast vector of type __m128d to type __m128i. This intrinsic is only used for +// compilation and does not generate any instructions, thus it has zero latency. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128 +FORCE_INLINE __m128i _mm_castpd_si128(__m128d a) +{ + return vreinterpretq_m128i_s64(vreinterpretq_s64_m128d(a)); +} + +// Cast vector of type __m128d to type __m128. This intrinsic is only used for +// compilation and does not generate any instructions, thus it has zero latency. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps +FORCE_INLINE __m128 _mm_castpd_ps(__m128d a) +{ + return vreinterpretq_m128_s64(vreinterpretq_s64_m128d(a)); +} + +// Blend packed single-precision (32-bit) floating-point elements from a and b +// using mask, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps +FORCE_INLINE __m128 _mm_blendv_ps(__m128 _a, __m128 _b, __m128 _mask) +{ + // Use a signed shift right to create a mask with the sign bit + uint32x4_t mask = + vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_m128(_mask), 31)); + float32x4_t a = vreinterpretq_f32_m128(_a); + float32x4_t b = vreinterpretq_f32_m128(_b); + return vreinterpretq_m128_f32(vbslq_f32(mask, b, a)); +} + +// Blend packed single-precision (32-bit) floating-point elements from a and b +// using mask, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps +FORCE_INLINE __m128 _mm_blend_ps(__m128 _a, __m128 _b, const char imm8) +{ + const uint32_t ALIGN_STRUCT(16) + data[4] = {((imm8) & (1 << 0)) ? UINT32_MAX : 0, + ((imm8) & (1 << 1)) ? UINT32_MAX : 0, + ((imm8) & (1 << 2)) ? UINT32_MAX : 0, + ((imm8) & (1 << 3)) ? UINT32_MAX : 0}; + uint32x4_t mask = vld1q_u32(data); + float32x4_t a = vreinterpretq_f32_m128(_a); + float32x4_t b = vreinterpretq_f32_m128(_b); + return vreinterpretq_m128_f32(vbslq_f32(mask, b, a)); +} + +// Blend packed double-precision (64-bit) floating-point elements from a and b +// using mask, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd +FORCE_INLINE __m128d _mm_blendv_pd(__m128d _a, __m128d _b, __m128d _mask) +{ + uint64x2_t mask = + vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_m128d(_mask), 63)); +#if defined(__aarch64__) + float64x2_t a = vreinterpretq_f64_m128d(_a); + float64x2_t b = vreinterpretq_f64_m128d(_b); + return vreinterpretq_m128d_f64(vbslq_f64(mask, b, a)); +#else + uint64x2_t a = vreinterpretq_u64_m128d(_a); + uint64x2_t b = vreinterpretq_u64_m128d(_b); + return vreinterpretq_m128d_u64(vbslq_u64(mask, b, a)); +#endif +} + +typedef struct { + uint16_t res0; + uint8_t res1 : 6; + uint8_t bit22 : 1; + uint8_t bit23 : 1; + uint8_t res2; +#if defined(__aarch64__) + uint32_t res3; +#endif +} fpcr_bitfield; + +// Macro: Set the rounding mode bits of the MXCSR control and status register to +// the value in unsigned 32-bit integer a. The rounding mode may contain any of +// the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, +// _MM_ROUND_TOWARD_ZERO +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_ROUNDING_MODE +FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding) +{ + union { + fpcr_bitfield field; +#if defined(__aarch64__) + uint64_t value; +#else + uint32_t value; +#endif + } r; + +#if defined(__aarch64__) + asm volatile("mrs %0, FPCR" : "=r"(r.value)); /* read */ +#else + asm volatile("vmrs %0, FPSCR" : "=r"(r.value)); /* read */ +#endif + + switch (rounding) { + case _MM_ROUND_TOWARD_ZERO: + r.field.bit22 = 1; + r.field.bit23 = 1; + break; + case _MM_ROUND_DOWN: + r.field.bit22 = 0; + r.field.bit23 = 1; + break; + case _MM_ROUND_UP: + r.field.bit22 = 1; + r.field.bit23 = 0; + break; + default: //_MM_ROUND_NEAREST + r.field.bit22 = 0; + r.field.bit23 = 0; + } + +#if defined(__aarch64__) + asm volatile("msr FPCR, %0" ::"r"(r)); /* write */ +#else + asm volatile("vmsr FPSCR, %0" ::"r"(r)); /* write */ +#endif +} + +FORCE_INLINE void _mm_setcsr(unsigned int a) +{ + _MM_SET_ROUNDING_MODE(a); +} + +// Round the packed single-precision (32-bit) floating-point elements in a using +// the rounding parameter, and store the results as packed single-precision +// floating-point elements in dst. +// software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps +FORCE_INLINE __m128 _mm_round_ps(__m128 a, int rounding) +{ +#if defined(__aarch64__) + switch (rounding) { + case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC): + return vreinterpretq_m128_f32(vrndnq_f32(vreinterpretq_f32_m128(a))); + case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC): + return vreinterpretq_m128_f32(vrndmq_f32(vreinterpretq_f32_m128(a))); + case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC): + return vreinterpretq_m128_f32(vrndpq_f32(vreinterpretq_f32_m128(a))); + case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC): + return vreinterpretq_m128_f32(vrndq_f32(vreinterpretq_f32_m128(a))); + default: //_MM_FROUND_CUR_DIRECTION + return vreinterpretq_m128_f32(vrndiq_f32(vreinterpretq_f32_m128(a))); + } +#else + float *v_float = (float *) &a; + __m128 zero, neg_inf, pos_inf; + + switch (rounding) { + case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC): + return _mm_cvtepi32_ps(_mm_cvtps_epi32(a)); + case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC): + return (__m128){floorf(v_float[0]), floorf(v_float[1]), + floorf(v_float[2]), floorf(v_float[3])}; + case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC): + return (__m128){ceilf(v_float[0]), ceilf(v_float[1]), ceilf(v_float[2]), + ceilf(v_float[3])}; + case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC): + zero = _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f); + neg_inf = _mm_set_ps(floorf(v_float[0]), floorf(v_float[1]), + floorf(v_float[2]), floorf(v_float[3])); + pos_inf = _mm_set_ps(ceilf(v_float[0]), ceilf(v_float[1]), + ceilf(v_float[2]), ceilf(v_float[3])); + return _mm_blendv_ps(pos_inf, neg_inf, _mm_cmple_ps(a, zero)); + default: //_MM_FROUND_CUR_DIRECTION + return (__m128){roundf(v_float[0]), roundf(v_float[1]), + roundf(v_float[2]), roundf(v_float[3])}; + } +#endif +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 32-bit integers, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ps2pi +FORCE_INLINE __m64 _mm_cvt_ps2pi(__m128 a) +{ +#if defined(__aarch64__) + return vreinterpret_m64_s32( + vget_low_s32(vcvtnq_s32_f32(vreinterpretq_f32_m128(a)))); +#else + return vreinterpret_m64_s32( + vcvt_s32_f32(vget_low_f32(vreinterpretq_f32_m128( + _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))))); +#endif +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 32-bit integers, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi32 +#define _mm_cvtps_pi32(a) _mm_cvt_ps2pi(a) + +// Round the packed single-precision (32-bit) floating-point elements in a up to +// an integer value, and store the results as packed single-precision +// floating-point elements in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps +FORCE_INLINE __m128 _mm_ceil_ps(__m128 a) +{ + return _mm_round_ps(a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +} + +// Round the lower single-precision (32-bit) floating-point element in b up to +// an integer value, store the result as a single-precision floating-point +// element in the lower element of dst, and copy the upper 3 packed elements +// from a to the upper elements of dst. +// +// dst[31:0] := CEIL(b[31:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss +FORCE_INLINE __m128 _mm_ceil_ss(__m128 a, __m128 b) +{ + return _mm_move_ss( + a, _mm_round_ps(b, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC)); +} + +// Round the packed single-precision (32-bit) floating-point elements in a down +// to an integer value, and store the results as packed single-precision +// floating-point elements in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps +FORCE_INLINE __m128 _mm_floor_ps(__m128 a) +{ + return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +} + +// Round the lower single-precision (32-bit) floating-point element in b down to +// an integer value, store the result as a single-precision floating-point +// element in the lower element of dst, and copy the upper 3 packed elements +// from a to the upper elements of dst. +// +// dst[31:0] := FLOOR(b[31:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss +FORCE_INLINE __m128 _mm_floor_ss(__m128 a, __m128 b) +{ + return _mm_move_ss( + a, _mm_round_ps(b, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC)); +} + +// Load 128-bits of integer data from unaligned memory into dst. This intrinsic +// may perform better than _mm_loadu_si128 when the data crosses a cache line +// boundary. +// +// dst[127:0] := MEM[mem_addr+127:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128 +#define _mm_lddqu_si128 _mm_loadu_si128 + +/* Miscellaneous Operations */ + +// Shifts the 8 signed 16-bit integers in a right by count bits while shifting +// in the sign bit. +// +// r0 := a0 >> count +// r1 := a1 >> count +// ... +// r7 := a7 >> count +// +// https://msdn.microsoft.com/en-us/library/3c9997dk(v%3dvs.90).aspx +FORCE_INLINE __m128i _mm_sra_epi16(__m128i a, __m128i count) +{ + int64_t c = (int64_t) vget_low_s64((int64x2_t) count); + if (unlikely(c > 15)) + return _mm_cmplt_epi16(a, _mm_setzero_si128()); + return vreinterpretq_m128i_s16(vshlq_s16((int16x8_t) a, vdupq_n_s16(-c))); +} + +// Shifts the 4 signed 32-bit integers in a right by count bits while shifting +// in the sign bit. +// +// r0 := a0 >> count +// r1 := a1 >> count +// r2 := a2 >> count +// r3 := a3 >> count +// +// https://msdn.microsoft.com/en-us/library/ce40009e(v%3dvs.100).aspx +FORCE_INLINE __m128i _mm_sra_epi32(__m128i a, __m128i count) +{ + int64_t c = (int64_t) vget_low_s64((int64x2_t) count); + if (unlikely(c > 31)) + return _mm_cmplt_epi32(a, _mm_setzero_si128()); + return vreinterpretq_m128i_s32(vshlq_s32((int32x4_t) a, vdupq_n_s32(-c))); +} + +// Packs the 16 signed 16-bit integers from a and b into 8-bit integers and +// saturates. +// https://msdn.microsoft.com/en-us/library/k4y4f7w5%28v=vs.90%29.aspx +FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vcombine_s8(vqmovn_s16(vreinterpretq_s16_m128i(a)), + vqmovn_s16(vreinterpretq_s16_m128i(b)))); +} + +// Packs the 16 signed 16 - bit integers from a and b into 8 - bit unsigned +// integers and saturates. +// +// r0 := UnsignedSaturate(a0) +// r1 := UnsignedSaturate(a1) +// ... +// r7 := UnsignedSaturate(a7) +// r8 := UnsignedSaturate(b0) +// r9 := UnsignedSaturate(b1) +// ... +// r15 := UnsignedSaturate(b7) +// +// https://msdn.microsoft.com/en-us/library/07ad1wx4(v=vs.100).aspx +FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b) +{ + return vreinterpretq_m128i_u8( + vcombine_u8(vqmovun_s16(vreinterpretq_s16_m128i(a)), + vqmovun_s16(vreinterpretq_s16_m128i(b)))); +} + +// Packs the 8 signed 32-bit integers from a and b into signed 16-bit integers +// and saturates. +// +// r0 := SignedSaturate(a0) +// r1 := SignedSaturate(a1) +// r2 := SignedSaturate(a2) +// r3 := SignedSaturate(a3) +// r4 := SignedSaturate(b0) +// r5 := SignedSaturate(b1) +// r6 := SignedSaturate(b2) +// r7 := SignedSaturate(b3) +// +// https://msdn.microsoft.com/en-us/library/393t56f9%28v=vs.90%29.aspx +FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vcombine_s16(vqmovn_s32(vreinterpretq_s32_m128i(a)), + vqmovn_s32(vreinterpretq_s32_m128i(b)))); +} + +// Packs the 8 unsigned 32-bit integers from a and b into unsigned 16-bit +// integers and saturates. +// +// r0 := UnsignedSaturate(a0) +// r1 := UnsignedSaturate(a1) +// r2 := UnsignedSaturate(a2) +// r3 := UnsignedSaturate(a3) +// r4 := UnsignedSaturate(b0) +// r5 := UnsignedSaturate(b1) +// r6 := UnsignedSaturate(b2) +// r7 := UnsignedSaturate(b3) +FORCE_INLINE __m128i _mm_packus_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vcombine_u16(vqmovun_s32(vreinterpretq_s32_m128i(a)), + vqmovun_s32(vreinterpretq_s32_m128i(b)))); +} + +// Interleaves the lower 8 signed or unsigned 8-bit integers in a with the lower +// 8 signed or unsigned 8-bit integers in b. +// +// r0 := a0 +// r1 := b0 +// r2 := a1 +// r3 := b1 +// ... +// r14 := a7 +// r15 := b7 +// +// https://msdn.microsoft.com/en-us/library/xf7k860c%28v=vs.90%29.aspx +FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s8( + vzip1q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +#else + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(a))); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(b))); + int8x8x2_t result = vzip_s8(a1, b1); + return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1])); +#endif +} + +// Interleaves the lower 4 signed or unsigned 16-bit integers in a with the +// lower 4 signed or unsigned 16-bit integers in b. +// +// r0 := a0 +// r1 := b0 +// r2 := a1 +// r3 := b1 +// r4 := a2 +// r5 := b2 +// r6 := a3 +// r7 := b3 +// +// https://msdn.microsoft.com/en-us/library/btxb17bw%28v=vs.90%29.aspx +FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s16( + vzip1q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +#else + int16x4_t a1 = vget_low_s16(vreinterpretq_s16_m128i(a)); + int16x4_t b1 = vget_low_s16(vreinterpretq_s16_m128i(b)); + int16x4x2_t result = vzip_s16(a1, b1); + return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1])); +#endif +} + +// Interleaves the lower 2 signed or unsigned 32 - bit integers in a with the +// lower 2 signed or unsigned 32 - bit integers in b. +// +// r0 := a0 +// r1 := b0 +// r2 := a1 +// r3 := b1 +// +// https://msdn.microsoft.com/en-us/library/x8atst9d(v=vs.100).aspx +FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s32( + vzip1q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +#else + int32x2_t a1 = vget_low_s32(vreinterpretq_s32_m128i(a)); + int32x2_t b1 = vget_low_s32(vreinterpretq_s32_m128i(b)); + int32x2x2_t result = vzip_s32(a1, b1); + return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1])); +#endif +} + +FORCE_INLINE __m128i _mm_unpacklo_epi64(__m128i a, __m128i b) +{ + int64x1_t a_l = vget_low_s64(vreinterpretq_s64_m128i(a)); + int64x1_t b_l = vget_low_s64(vreinterpretq_s64_m128i(b)); + return vreinterpretq_m128i_s64(vcombine_s64(a_l, b_l)); +} + +// Selects and interleaves the lower two single-precision, floating-point values +// from a and b. +// +// r0 := a0 +// r1 := b0 +// r2 := a1 +// r3 := b1 +// +// https://msdn.microsoft.com/en-us/library/25st103b%28v=vs.90%29.aspx +FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128_f32( + vzip1q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#else + float32x2_t a1 = vget_low_f32(vreinterpretq_f32_m128(a)); + float32x2_t b1 = vget_low_f32(vreinterpretq_f32_m128(b)); + float32x2x2_t result = vzip_f32(a1, b1); + return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1])); +#endif +} + +// Unpack and interleave double-precision (64-bit) floating-point elements from +// the low half of a and b, and store the results in dst. +// +// DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { +// dst[63:0] := src1[63:0] +// dst[127:64] := src2[63:0] +// RETURN dst[127:0] +// } +// dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd +FORCE_INLINE __m128d _mm_unpacklo_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vzip1q_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + return vreinterpretq_m128d_s64( + vcombine_s64(vget_low_s64(vreinterpretq_s64_m128d(a)), + vget_low_s64(vreinterpretq_s64_m128d(b)))); +#endif +} + +// Unpack and interleave double-precision (64-bit) floating-point elements from +// the high half of a and b, and store the results in dst. +// +// DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { +// dst[63:0] := src1[127:64] +// dst[127:64] := src2[127:64] +// RETURN dst[127:0] +// } +// dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd +FORCE_INLINE __m128d _mm_unpackhi_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vzip2q_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + return vreinterpretq_m128d_s64( + vcombine_s64(vget_high_s64(vreinterpretq_s64_m128d(a)), + vget_high_s64(vreinterpretq_s64_m128d(b)))); +#endif +} + +// Selects and interleaves the upper two single-precision, floating-point values +// from a and b. +// +// r0 := a2 +// r1 := b2 +// r2 := a3 +// r3 := b3 +// +// https://msdn.microsoft.com/en-us/library/skccxx7d%28v=vs.90%29.aspx +FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128_f32( + vzip2q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#else + float32x2_t a1 = vget_high_f32(vreinterpretq_f32_m128(a)); + float32x2_t b1 = vget_high_f32(vreinterpretq_f32_m128(b)); + float32x2x2_t result = vzip_f32(a1, b1); + return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1])); +#endif +} + +// Interleaves the upper 8 signed or unsigned 8-bit integers in a with the upper +// 8 signed or unsigned 8-bit integers in b. +// +// r0 := a8 +// r1 := b8 +// r2 := a9 +// r3 := b9 +// ... +// r14 := a15 +// r15 := b15 +// +// https://msdn.microsoft.com/en-us/library/t5h7783k(v=vs.100).aspx +FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s8( + vzip2q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +#else + int8x8_t a1 = + vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(a))); + int8x8_t b1 = + vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(b))); + int8x8x2_t result = vzip_s8(a1, b1); + return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1])); +#endif +} + +// Interleaves the upper 4 signed or unsigned 16-bit integers in a with the +// upper 4 signed or unsigned 16-bit integers in b. +// +// r0 := a4 +// r1 := b4 +// r2 := a5 +// r3 := b5 +// r4 := a6 +// r5 := b6 +// r6 := a7 +// r7 := b7 +// +// https://msdn.microsoft.com/en-us/library/03196cz7(v=vs.100).aspx +FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s16( + vzip2q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +#else + int16x4_t a1 = vget_high_s16(vreinterpretq_s16_m128i(a)); + int16x4_t b1 = vget_high_s16(vreinterpretq_s16_m128i(b)); + int16x4x2_t result = vzip_s16(a1, b1); + return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1])); +#endif +} + +// Interleaves the upper 2 signed or unsigned 32-bit integers in a with the +// upper 2 signed or unsigned 32-bit integers in b. +// https://msdn.microsoft.com/en-us/library/65sa7cbs(v=vs.100).aspx +FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s32( + vzip2q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +#else + int32x2_t a1 = vget_high_s32(vreinterpretq_s32_m128i(a)); + int32x2_t b1 = vget_high_s32(vreinterpretq_s32_m128i(b)); + int32x2x2_t result = vzip_s32(a1, b1); + return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1])); +#endif +} + +// Interleaves the upper signed or unsigned 64-bit integer in a with the +// upper signed or unsigned 64-bit integer in b. +// +// r0 := a1 +// r1 := b1 +FORCE_INLINE __m128i _mm_unpackhi_epi64(__m128i a, __m128i b) +{ + int64x1_t a_h = vget_high_s64(vreinterpretq_s64_m128i(a)); + int64x1_t b_h = vget_high_s64(vreinterpretq_s64_m128i(b)); + return vreinterpretq_m128i_s64(vcombine_s64(a_h, b_h)); +} + +// Horizontally compute the minimum amongst the packed unsigned 16-bit integers +// in a, store the minimum and index in dst, and zero the remaining bits in dst. +// +// index[2:0] := 0 +// min[15:0] := a[15:0] +// FOR j := 0 to 7 +// i := j*16 +// IF a[i+15:i] < min[15:0] +// index[2:0] := j +// min[15:0] := a[i+15:i] +// FI +// ENDFOR +// dst[15:0] := min[15:0] +// dst[18:16] := index[2:0] +// dst[127:19] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16 +FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a) +{ + __m128i dst; + uint16_t min, idx = 0; + // Find the minimum value +#if defined(__aarch64__) + min = vminvq_u16(vreinterpretq_u16_m128i(a)); +#else + __m64 tmp; + tmp = vreinterpret_m64_u16( + vmin_u16(vget_low_u16(vreinterpretq_u16_m128i(a)), + vget_high_u16(vreinterpretq_u16_m128i(a)))); + tmp = vreinterpret_m64_u16( + vpmin_u16(vreinterpret_u16_m64(tmp), vreinterpret_u16_m64(tmp))); + tmp = vreinterpret_m64_u16( + vpmin_u16(vreinterpret_u16_m64(tmp), vreinterpret_u16_m64(tmp))); + min = vget_lane_u16(vreinterpret_u16_m64(tmp), 0); +#endif + // Get the index of the minimum value + int i; + for (i = 0; i < 8; i++) { + if (min == vgetq_lane_u16(vreinterpretq_u16_m128i(a), 0)) { + idx = (uint16_t) i; + break; + } + a = _mm_srli_si128(a, 2); + } + // Generate result + dst = _mm_setzero_si128(); + dst = vreinterpretq_m128i_u16( + vsetq_lane_u16(min, vreinterpretq_u16_m128i(dst), 0)); + dst = vreinterpretq_m128i_u16( + vsetq_lane_u16(idx, vreinterpretq_u16_m128i(dst), 1)); + return dst; +} + +// Compute the bitwise AND of 128 bits (representing integer data) in a and b, +// and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the +// bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, +// otherwise set CF to 0. Return the CF value. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128 +FORCE_INLINE int _mm_testc_si128(__m128i a, __m128i b) +{ + int64x2_t s64 = + vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_m128i(a))), + vreinterpretq_s64_m128i(b)); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); +} + +// Compute the bitwise AND of 128 bits (representing integer data) in a and b, +// and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the +// bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, +// otherwise set CF to 0. Return the ZF value. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128 +FORCE_INLINE int _mm_testz_si128(__m128i a, __m128i b) +{ + int64x2_t s64 = + vandq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); +} + +// Extracts the selected signed or unsigned 8-bit integer from a and zero +// extends. +// FORCE_INLINE int _mm_extract_epi8(__m128i a, __constrange(0,16) int imm) +#define _mm_extract_epi8(a, imm) vgetq_lane_u8(vreinterpretq_u8_m128i(a), (imm)) + +// Inserts the least significant 8 bits of b into the selected 8-bit integer +// of a. +// FORCE_INLINE __m128i _mm_insert_epi8(__m128i a, int b, +// __constrange(0,16) int imm) +#define _mm_insert_epi8(a, b, imm) \ + __extension__({ \ + vreinterpretq_m128i_s8( \ + vsetq_lane_s8((b), vreinterpretq_s8_m128i(a), (imm))); \ + }) + +// Extracts the selected signed or unsigned 16-bit integer from a and zero +// extends. +// https://msdn.microsoft.com/en-us/library/6dceta0c(v=vs.100).aspx +// FORCE_INLINE int _mm_extract_epi16(__m128i a, __constrange(0,8) int imm) +#define _mm_extract_epi16(a, imm) \ + vgetq_lane_u16(vreinterpretq_u16_m128i(a), (imm)) + +// Inserts the least significant 16 bits of b into the selected 16-bit integer +// of a. +// https://msdn.microsoft.com/en-us/library/kaze8hz1%28v=vs.100%29.aspx +// FORCE_INLINE __m128i _mm_insert_epi16(__m128i a, int b, +// __constrange(0,8) int imm) +#define _mm_insert_epi16(a, b, imm) \ + __extension__({ \ + vreinterpretq_m128i_s16( \ + vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm))); \ + }) + +// Copy a to dst, and insert the 16-bit integer i into dst at the location +// specified by imm8. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_pi16 +#define _mm_insert_pi16(a, b, imm) \ + __extension__({ \ + vreinterpret_m64_s16( \ + vset_lane_s16((b), vreinterpret_s16_m64(a), (imm))); \ + }) + +// Extracts the selected signed or unsigned 32-bit integer from a and zero +// extends. +// FORCE_INLINE int _mm_extract_epi32(__m128i a, __constrange(0,4) int imm) +#define _mm_extract_epi32(a, imm) \ + vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)) + +// Extracts the selected single-precision (32-bit) floating-point from a. +// FORCE_INLINE int _mm_extract_ps(__m128 a, __constrange(0,4) int imm) +#define _mm_extract_ps(a, imm) vgetq_lane_s32(vreinterpretq_s32_m128(a), (imm)) + +// Inserts the least significant 32 bits of b into the selected 32-bit integer +// of a. +// FORCE_INLINE __m128i _mm_insert_epi32(__m128i a, int b, +// __constrange(0,4) int imm) +#define _mm_insert_epi32(a, b, imm) \ + __extension__({ \ + vreinterpretq_m128i_s32( \ + vsetq_lane_s32((b), vreinterpretq_s32_m128i(a), (imm))); \ + }) + +// Extracts the selected signed or unsigned 64-bit integer from a and zero +// extends. +// FORCE_INLINE __int64 _mm_extract_epi64(__m128i a, __constrange(0,2) int imm) +#define _mm_extract_epi64(a, imm) \ + vgetq_lane_s64(vreinterpretq_s64_m128i(a), (imm)) + +// Inserts the least significant 64 bits of b into the selected 64-bit integer +// of a. +// FORCE_INLINE __m128i _mm_insert_epi64(__m128i a, __int64 b, +// __constrange(0,2) int imm) +#define _mm_insert_epi64(a, b, imm) \ + __extension__({ \ + vreinterpretq_m128i_s64( \ + vsetq_lane_s64((b), vreinterpretq_s64_m128i(a), (imm))); \ + }) + +// Count the number of bits set to 1 in unsigned 32-bit integer a, and +// return that count in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u32 +FORCE_INLINE int _mm_popcnt_u32(unsigned int a) +{ +#if defined(__aarch64__) +#if __has_builtin(__builtin_popcount) + return __builtin_popcount(a); +#else + return (int) vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t) a))); +#endif +#else + uint32_t count = 0; + uint8x8_t input_val, count8x8_val; + uint16x4_t count16x4_val; + uint32x2_t count32x2_val; + + input_val = vld1_u8((uint8_t *) &a); + count8x8_val = vcnt_u8(input_val); + count16x4_val = vpaddl_u8(count8x8_val); + count32x2_val = vpaddl_u16(count16x4_val); + + vst1_u32(&count, count32x2_val); + return count; +#endif +} + +// Count the number of bits set to 1 in unsigned 64-bit integer a, and +// return that count in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u64 +FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a) +{ +#if defined(__aarch64__) +#if __has_builtin(__builtin_popcountll) + return __builtin_popcountll(a); +#else + return (int64_t) vaddlv_u8(vcnt_u8(vcreate_u8(a))); +#endif +#else + uint64_t count = 0; + uint8x8_t input_val, count8x8_val; + uint16x4_t count16x4_val; + uint32x2_t count32x2_val; + uint64x1_t count64x1_val; + + input_val = vld1_u8((uint8_t *) &a); + count8x8_val = vcnt_u8(input_val); + count16x4_val = vpaddl_u8(count8x8_val); + count32x2_val = vpaddl_u16(count16x4_val); + count64x1_val = vpaddl_u32(count32x2_val); + vst1_u64(&count, count64x1_val); + return count; +#endif +} + +// Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision +// (32-bit) floating-point elements in row0, row1, row2, and row3, and store the +// transposed matrix in these vectors (row0 now contains column 0, etc.). +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=MM_TRANSPOSE4_PS +#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(ROW01.val[0]), \ + vget_low_f32(ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(ROW01.val[1]), \ + vget_low_f32(ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(ROW01.val[0]), \ + vget_high_f32(ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(ROW01.val[1]), \ + vget_high_f32(ROW23.val[1])); \ + } while (0) + +/* Crypto Extensions */ + +#if defined(__ARM_FEATURE_CRYPTO) +// Wraps vmull_p64 +FORCE_INLINE uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b) +{ + poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0); + poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0); + return vreinterpretq_u64_p128(vmull_p64(a, b)); +} +#else // ARMv7 polyfill +// ARMv7/some A64 lacks vmull_p64, but it has vmull_p8. +// +// vmull_p8 calculates 8 8-bit->16-bit polynomial multiplies, but we need a +// 64-bit->128-bit polynomial multiply. +// +// It needs some work and is somewhat slow, but it is still faster than all +// known scalar methods. +// +// Algorithm adapted to C from +// https://www.workofard.com/2017/07/ghash-for-low-end-cores/, which is adapted +// from "Fast Software Polynomial Multiplication on ARM Processors Using the +// NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and Ricardo Dahab +// (https://hal.inria.fr/hal-01506572) +static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b) +{ + poly8x8_t a = vreinterpret_p8_u64(_a); + poly8x8_t b = vreinterpret_p8_u64(_b); + + // Masks + uint8x16_t k48_32 = vcombine_u8(vcreate_u8(0x0000ffffffffffff), + vcreate_u8(0x00000000ffffffff)); + uint8x16_t k16_00 = vcombine_u8(vcreate_u8(0x000000000000ffff), + vcreate_u8(0x0000000000000000)); + + // Do the multiplies, rotating with vext to get all combinations + uint8x16_t d = vreinterpretq_u8_p16(vmull_p8(a, b)); // D = A0 * B0 + uint8x16_t e = + vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 1))); // E = A0 * B1 + uint8x16_t f = + vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 1), b)); // F = A1 * B0 + uint8x16_t g = + vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 2))); // G = A0 * B2 + uint8x16_t h = + vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 2), b)); // H = A2 * B0 + uint8x16_t i = + vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 3))); // I = A0 * B3 + uint8x16_t j = + vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 3), b)); // J = A3 * B0 + uint8x16_t k = + vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 4))); // L = A0 * B4 + + // Add cross products + uint8x16_t l = veorq_u8(e, f); // L = E + F + uint8x16_t m = veorq_u8(g, h); // M = G + H + uint8x16_t n = veorq_u8(i, j); // N = I + J + + // Interleave. Using vzip1 and vzip2 prevents Clang from emitting TBL + // instructions. +#if defined(__aarch64__) + uint8x16_t lm_p0 = vreinterpretq_u8_u64( + vzip1q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m))); + uint8x16_t lm_p1 = vreinterpretq_u8_u64( + vzip2q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m))); + uint8x16_t nk_p0 = vreinterpretq_u8_u64( + vzip1q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k))); + uint8x16_t nk_p1 = vreinterpretq_u8_u64( + vzip2q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k))); +#else + uint8x16_t lm_p0 = vcombine_u8(vget_low_u8(l), vget_low_u8(m)); + uint8x16_t lm_p1 = vcombine_u8(vget_high_u8(l), vget_high_u8(m)); + uint8x16_t nk_p0 = vcombine_u8(vget_low_u8(n), vget_low_u8(k)); + uint8x16_t nk_p1 = vcombine_u8(vget_high_u8(n), vget_high_u8(k)); +#endif + // t0 = (L) (P0 + P1) << 8 + // t1 = (M) (P2 + P3) << 16 + uint8x16_t t0t1_tmp = veorq_u8(lm_p0, lm_p1); + uint8x16_t t0t1_h = vandq_u8(lm_p1, k48_32); + uint8x16_t t0t1_l = veorq_u8(t0t1_tmp, t0t1_h); + + // t2 = (N) (P4 + P5) << 24 + // t3 = (K) (P6 + P7) << 32 + uint8x16_t t2t3_tmp = veorq_u8(nk_p0, nk_p1); + uint8x16_t t2t3_h = vandq_u8(nk_p1, k16_00); + uint8x16_t t2t3_l = veorq_u8(t2t3_tmp, t2t3_h); + + // De-interleave +#if defined(__aarch64__) + uint8x16_t t0 = vreinterpretq_u8_u64( + vuzp1q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h))); + uint8x16_t t1 = vreinterpretq_u8_u64( + vuzp2q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h))); + uint8x16_t t2 = vreinterpretq_u8_u64( + vuzp1q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h))); + uint8x16_t t3 = vreinterpretq_u8_u64( + vuzp2q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h))); +#else + uint8x16_t t1 = vcombine_u8(vget_high_u8(t0t1_l), vget_high_u8(t0t1_h)); + uint8x16_t t0 = vcombine_u8(vget_low_u8(t0t1_l), vget_low_u8(t0t1_h)); + uint8x16_t t3 = vcombine_u8(vget_high_u8(t2t3_l), vget_high_u8(t2t3_h)); + uint8x16_t t2 = vcombine_u8(vget_low_u8(t2t3_l), vget_low_u8(t2t3_h)); +#endif + // Shift the cross products + uint8x16_t t0_shift = vextq_u8(t0, t0, 15); // t0 << 8 + uint8x16_t t1_shift = vextq_u8(t1, t1, 14); // t1 << 16 + uint8x16_t t2_shift = vextq_u8(t2, t2, 13); // t2 << 24 + uint8x16_t t3_shift = vextq_u8(t3, t3, 12); // t3 << 32 + + // Accumulate the products + uint8x16_t cross1 = veorq_u8(t0_shift, t1_shift); + uint8x16_t cross2 = veorq_u8(t2_shift, t3_shift); + uint8x16_t mix = veorq_u8(d, cross1); + uint8x16_t r = veorq_u8(mix, cross2); + return vreinterpretq_u64_u8(r); +} +#endif // ARMv7 polyfill + +// Perform a carry-less multiplication of two 64-bit integers, selected from a +// and b according to imm8, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128 +FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm) +{ + uint64x2_t a = vreinterpretq_u64_m128i(_a); + uint64x2_t b = vreinterpretq_u64_m128i(_b); + switch (imm & 0x11) { + case 0x00: + return vreinterpretq_m128i_u64( + _sse2neon_vmull_p64(vget_low_u64(a), vget_low_u64(b))); + case 0x01: + return vreinterpretq_m128i_u64( + _sse2neon_vmull_p64(vget_high_u64(a), vget_low_u64(b))); + case 0x10: + return vreinterpretq_m128i_u64( + _sse2neon_vmull_p64(vget_low_u64(a), vget_high_u64(b))); + case 0x11: + return vreinterpretq_m128i_u64( + _sse2neon_vmull_p64(vget_high_u64(a), vget_high_u64(b))); + default: + abort(); + } +} + +#if !defined(__ARM_FEATURE_CRYPTO) +/* clang-format off */ +#define SSE2NEON_AES_DATA(w) \ + { \ + w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), \ + w(0xc5), w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), \ + w(0xab), w(0x76), w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), \ + w(0x59), w(0x47), w(0xf0), w(0xad), w(0xd4), w(0xa2), w(0xaf), \ + w(0x9c), w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), \ + w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), \ + w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15), w(0x04), \ + w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a), \ + w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), \ + w(0x75), w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), \ + w(0x5a), w(0xa0), w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), \ + w(0xe3), w(0x2f), w(0x84), w(0x53), w(0xd1), w(0x00), w(0xed), \ + w(0x20), w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe), \ + w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf), w(0xd0), w(0xef), \ + w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), \ + w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8), \ + w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), \ + w(0xf5), w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), \ + w(0xf3), w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), \ + w(0x97), w(0x44), w(0x17), w(0xc4), w(0xa7), w(0x7e), w(0x3d), \ + w(0x64), w(0x5d), w(0x19), w(0x73), w(0x60), w(0x81), w(0x4f), \ + w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee), \ + w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), \ + w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), \ + w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), \ + w(0x79), w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), \ + w(0x4e), w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), \ + w(0x7a), w(0xae), w(0x08), w(0xba), w(0x78), w(0x25), w(0x2e), \ + w(0x1c), w(0xa6), w(0xb4), w(0xc6), w(0xe8), w(0xdd), w(0x74), \ + w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a), w(0x70), w(0x3e), \ + w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), w(0x61), \ + w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), \ + w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), \ + w(0x94), w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), \ + w(0x28), w(0xdf), w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), \ + w(0xe6), w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), \ + w(0xb0), w(0x54), w(0xbb), w(0x16) \ + } +/* clang-format on */ + +/* X Macro trick. See https://en.wikipedia.org/wiki/X_Macro */ +#define SSE2NEON_AES_H0(x) (x) +static const uint8_t SSE2NEON_sbox[256] = SSE2NEON_AES_DATA(SSE2NEON_AES_H0); +#undef SSE2NEON_AES_H0 + +// In the absence of crypto extensions, implement aesenc using regular neon +// intrinsics instead. See: +// https://www.workofard.com/2017/01/accelerated-aes-for-the-arm64-linux-kernel/ +// https://www.workofard.com/2017/07/ghash-for-low-end-cores/ and +// https://github.com/ColinIanKing/linux-next-mirror/blob/b5f466091e130caaf0735976648f72bd5e09aa84/crypto/aegis128-neon-inner.c#L52 +// for more information Reproduced with permission of the author. +FORCE_INLINE __m128i _mm_aesenc_si128(__m128i EncBlock, __m128i RoundKey) +{ +#if defined(__aarch64__) + static const uint8_t shift_rows[] = {0x0, 0x5, 0xa, 0xf, 0x4, 0x9, + 0xe, 0x3, 0x8, 0xd, 0x2, 0x7, + 0xc, 0x1, 0x6, 0xb}; + static const uint8_t ror32by8[] = {0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, + 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc}; + + uint8x16_t v; + uint8x16_t w = vreinterpretq_u8_m128i(EncBlock); + + // shift rows + w = vqtbl1q_u8(w, vld1q_u8(shift_rows)); + + // sub bytes + v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(SSE2NEON_sbox), w); + v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0x40), w - 0x40); + v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0x80), w - 0x80); + v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0xc0), w - 0xc0); + + // mix columns + w = (v << 1) ^ (uint8x16_t)(((int8x16_t) v >> 7) & 0x1b); + w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v); + w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8)); + + // add round key + return vreinterpretq_m128i_u8(w) ^ RoundKey; + +#else /* ARMv7-A NEON implementation */ +#define SSE2NEON_AES_B2W(b0, b1, b2, b3) \ + (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | \ + (b0)) +#define SSE2NEON_AES_F2(x) ((x << 1) ^ (((x >> 7) & 1) * 0x011b /* WPOLY */)) +#define SSE2NEON_AES_F3(x) (SSE2NEON_AES_F2(x) ^ x) +#define SSE2NEON_AES_U0(p) \ + SSE2NEON_AES_B2W(SSE2NEON_AES_F2(p), p, p, SSE2NEON_AES_F3(p)) +#define SSE2NEON_AES_U1(p) \ + SSE2NEON_AES_B2W(SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p, p) +#define SSE2NEON_AES_U2(p) \ + SSE2NEON_AES_B2W(p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p) +#define SSE2NEON_AES_U3(p) \ + SSE2NEON_AES_B2W(p, p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p)) + static const uint32_t ALIGN_STRUCT(16) aes_table[4][256] = { + SSE2NEON_AES_DATA(SSE2NEON_AES_U0), + SSE2NEON_AES_DATA(SSE2NEON_AES_U1), + SSE2NEON_AES_DATA(SSE2NEON_AES_U2), + SSE2NEON_AES_DATA(SSE2NEON_AES_U3), + }; +#undef SSE2NEON_AES_B2W +#undef SSE2NEON_AES_F2 +#undef SSE2NEON_AES_F3 +#undef SSE2NEON_AES_U0 +#undef SSE2NEON_AES_U1 +#undef SSE2NEON_AES_U2 +#undef SSE2NEON_AES_U3 + + uint32_t x0 = _mm_cvtsi128_si32(EncBlock); + uint32_t x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0x55)); + uint32_t x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0xAA)); + uint32_t x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0xFF)); + + __m128i out = _mm_set_epi32( + (aes_table[0][x3 & 0xff] ^ aes_table[1][(x0 >> 8) & 0xff] ^ + aes_table[2][(x1 >> 16) & 0xff] ^ aes_table[3][x2 >> 24]), + (aes_table[0][x2 & 0xff] ^ aes_table[1][(x3 >> 8) & 0xff] ^ + aes_table[2][(x0 >> 16) & 0xff] ^ aes_table[3][x1 >> 24]), + (aes_table[0][x1 & 0xff] ^ aes_table[1][(x2 >> 8) & 0xff] ^ + aes_table[2][(x3 >> 16) & 0xff] ^ aes_table[3][x0 >> 24]), + (aes_table[0][x0 & 0xff] ^ aes_table[1][(x1 >> 8) & 0xff] ^ + aes_table[2][(x2 >> 16) & 0xff] ^ aes_table[3][x3 >> 24])); + + return _mm_xor_si128(out, RoundKey); +#endif +} + +// Perform the last round of an AES encryption flow on data (state) in a using +// the round key in RoundKey, and store the result in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128 +FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey) +{ + /* FIXME: optimized for NEON */ + uint8_t v[4][4] = { + [0] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 0)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 5)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 10)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 15)]}, + [1] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 4)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 9)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 14)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 3)]}, + [2] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 8)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 13)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 2)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 7)]}, + [3] = {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 12)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 1)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 6)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 11)]}, + }; + for (int i = 0; i < 16; i++) + vreinterpretq_nth_u8_m128i(a, i) = + v[i / 4][i % 4] ^ vreinterpretq_nth_u8_m128i(RoundKey, i); + return a; +} + +// Emits the Advanced Encryption Standard (AES) instruction aeskeygenassist. +// This instruction generates a round key for AES encryption. See +// https://kazakov.life/2017/11/01/cryptocurrency-mining-on-ios-devices/ +// for details. +// +// https://msdn.microsoft.com/en-us/library/cc714138(v=vs.120).aspx +FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i key, const int rcon) +{ + uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)); + uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)); + for (int i = 0; i < 4; ++i) { + ((uint8_t *) &X1)[i] = SSE2NEON_sbox[((uint8_t *) &X1)[i]]; + ((uint8_t *) &X3)[i] = SSE2NEON_sbox[((uint8_t *) &X3)[i]]; + } + return _mm_set_epi32(((X3 >> 8) | (X3 << 24)) ^ rcon, X3, + ((X1 >> 8) | (X1 << 24)) ^ rcon, X1); +} +#undef SSE2NEON_AES_DATA + +#else /* __ARM_FEATURE_CRYPTO */ +// Implements equivalent of 'aesenc' by combining AESE (with an empty key) and +// AESMC and then manually applying the real key as an xor operation. This +// unfortunately means an additional xor op; the compiler should be able to +// optimize this away for repeated calls however. See +// https://blog.michaelbrase.com/2018/05/08/emulating-x86-aes-intrinsics-on-armv8-a +// for more details. +FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0))) ^ + vreinterpretq_u8_m128i(b)); +} + +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128 +FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey) +{ + return _mm_xor_si128(vreinterpretq_m128i_u8(vaeseq_u8( + vreinterpretq_u8_m128i(a), vdupq_n_u8(0))), + RoundKey); +} + +FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon) +{ + // AESE does ShiftRows and SubBytes on A + uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0)); + + uint8x16_t dest = { + // Undo ShiftRows step from AESE and extract X1 and X3 + u8[0x4], u8[0x1], u8[0xE], u8[0xB], // SubBytes(X1) + u8[0x1], u8[0xE], u8[0xB], u8[0x4], // ROT(SubBytes(X1)) + u8[0xC], u8[0x9], u8[0x6], u8[0x3], // SubBytes(X3) + u8[0x9], u8[0x6], u8[0x3], u8[0xC], // ROT(SubBytes(X3)) + }; + uint32x4_t r = {0, (unsigned) rcon, 0, (unsigned) rcon}; + return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r); +} +#endif + +/* Streaming Extensions */ + +// Guarantees that every preceding store is globally visible before any +// subsequent store. +// https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx +FORCE_INLINE void _mm_sfence(void) +{ + __sync_synchronize(); +} + +// Store 128-bits (composed of 4 packed single-precision (32-bit) floating- +// point elements) from a into memory using a non-temporal memory hint. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_ps +FORCE_INLINE void _mm_stream_ps(float *p, __m128 a) +{ +#if __has_builtin(__builtin_nontemporal_store) + __builtin_nontemporal_store(a, (float32x4_t *) p); +#else + vst1q_f32(p, vreinterpretq_f32_m128(a)); +#endif +} + +// Stores the data in a to the address p without polluting the caches. If the +// cache line containing address p is already in the cache, the cache will be +// updated. +// https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx +FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a) +{ +#if __has_builtin(__builtin_nontemporal_store) + __builtin_nontemporal_store(a, p); +#else + vst1q_s64((int64_t *) p, vreinterpretq_s64_m128i(a)); +#endif +} + +// Load 128-bits of integer data from memory into dst using a non-temporal +// memory hint. mem_addr must be aligned on a 16-byte boundary or a +// general-protection exception may be generated. +// +// dst[127:0] := MEM[mem_addr+127:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_load_si128 +FORCE_INLINE __m128i _mm_stream_load_si128(__m128i *p) +{ +#if __has_builtin(__builtin_nontemporal_store) + return __builtin_nontemporal_load(p); +#else + return vreinterpretq_m128i_s64(vld1q_s64((int64_t *) p)); +#endif +} + +// Cache line containing p is flushed and invalidated from all caches in the +// coherency domain. : +// https://msdn.microsoft.com/en-us/library/ba08y07y(v=vs.100).aspx +FORCE_INLINE void _mm_clflush(void const *p) +{ + (void) p; + // no corollary for Neon? +} + +// Allocate aligned blocks of memory. +// https://software.intel.com/en-us/ +// cpp-compiler-developer-guide-and-reference-allocating-and-freeing-aligned-memory-blocks +FORCE_INLINE void *_mm_malloc(size_t size, size_t align) +{ + void *ptr; + if (align == 1) + return malloc(size); + if (align == 2 || (sizeof(void *) == 8 && align == 4)) + align = sizeof(void *); + if (!posix_memalign(&ptr, align, size)) + return ptr; + return NULL; +} + +// Free aligned memory that was allocated with _mm_malloc. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_free +FORCE_INLINE void _mm_free(void *addr) +{ + free(addr); +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 8-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb514036(v=vs.100) +FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#else + crc ^= v; + for (int bit = 0; bit < 8; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } +#endif + return crc; +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 16-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb531411(v=vs.100) +FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#else + crc = _mm_crc32_u8(crc, v & 0xff); + crc = _mm_crc32_u8(crc, (v >> 8) & 0xff); +#endif + return crc; +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 32-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb531394(v=vs.100) +FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#else + crc = _mm_crc32_u16(crc, v & 0xffff); + crc = _mm_crc32_u16(crc, (v >> 16) & 0xffff); +#endif + return crc; +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 64-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb514033(v=vs.100) +FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#else + crc = _mm_crc32_u32((uint32_t)(crc), v & 0xffffffff); + crc = _mm_crc32_u32((uint32_t)(crc), (v >> 32) & 0xffffffff); +#endif + return crc; +} + +#if defined(__GNUC__) || defined(__clang__) +#pragma pop_macro("ALIGN_STRUCT") +#pragma pop_macro("FORCE_INLINE") +#endif + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC pop_options +#endif + +#endif diff --git a/thirdparty/embree-aarch64/common/simd/avx.h b/thirdparty/embree/common/simd/avx.h index c840e41805..d3100306ee 100644 --- a/thirdparty/embree-aarch64/common/simd/avx.h +++ b/thirdparty/embree/common/simd/avx.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/simd/avx512.h b/thirdparty/embree/common/simd/avx512.h index 25414ab5b1..d43bbacea1 100644 --- a/thirdparty/embree-aarch64/common/simd/avx512.h +++ b/thirdparty/embree/common/simd/avx512.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/simd/simd.h b/thirdparty/embree/common/simd/simd.h index 647851110b..195506b530 100644 --- a/thirdparty/embree-aarch64/common/simd/simd.h +++ b/thirdparty/embree/common/simd/simd.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -6,7 +6,7 @@ #include "../math/math.h" /* include SSE wrapper classes */ -#if defined(__SSE__) || defined(__ARM_NEON) +#if defined(__SSE__) # include "sse.h" #endif diff --git a/thirdparty/embree-aarch64/common/simd/sse.cpp b/thirdparty/embree/common/simd/sse.cpp index 1732cfa421..535d6943d8 100644 --- a/thirdparty/embree-aarch64/common/simd/sse.cpp +++ b/thirdparty/embree/common/simd/sse.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "sse.h" diff --git a/thirdparty/embree-aarch64/common/simd/sse.h b/thirdparty/embree/common/simd/sse.h index 6bc818b55b..1465fb4fb0 100644 --- a/thirdparty/embree-aarch64/common/simd/sse.h +++ b/thirdparty/embree/common/simd/sse.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -11,7 +11,7 @@ namespace embree { -#if (defined(__aarch64__) && defined(BUILD_IOS)) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline __m128 blendv_ps(__m128 f, __m128 t, __m128 mask) { return _mm_blendv_ps(f,t,mask); } diff --git a/thirdparty/embree-aarch64/common/simd/varying.h b/thirdparty/embree/common/simd/varying.h index 9a46817da9..9b98d326be 100644 --- a/thirdparty/embree-aarch64/common/simd/varying.h +++ b/thirdparty/embree/common/simd/varying.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -9,7 +9,7 @@ namespace embree { /* Varying numeric types */ template<int N> - struct vfloat + struct vfloat_impl { union { float f[N]; int i[N]; }; __forceinline const float& operator [](size_t index) const { assert(index < N); return f[index]; } @@ -17,7 +17,7 @@ namespace embree }; template<int N> - struct vdouble + struct vdouble_impl { union { double f[N]; long long i[N]; }; __forceinline const double& operator [](size_t index) const { assert(index < N); return f[index]; } @@ -25,7 +25,7 @@ namespace embree }; template<int N> - struct vint + struct vint_impl { int i[N]; __forceinline const int& operator [](size_t index) const { assert(index < N); return i[index]; } @@ -33,7 +33,7 @@ namespace embree }; template<int N> - struct vuint + struct vuint_impl { unsigned int i[N]; __forceinline const unsigned int& operator [](size_t index) const { assert(index < N); return i[index]; } @@ -41,7 +41,7 @@ namespace embree }; template<int N> - struct vllong + struct vllong_impl { long long i[N]; __forceinline const long long& operator [](size_t index) const { assert(index < N); return i[index]; } @@ -49,20 +49,13 @@ namespace embree }; /* Varying bool types */ - template<int N> struct vboolf { int i[N]; }; // for float/int - template<int N> struct vboold { long long i[N]; }; // for double/long long - - /* Aliases to default types */ - template<int N> using vreal = vfloat<N>; - template<int N> using vbool = vboolf<N>; - + template<int N> struct vboolf_impl { int i[N]; }; // for float/int + template<int N> struct vboold_impl { long long i[N]; }; // for double/long long + /* Varying size constants */ #if defined(__AVX512VL__) // SKX const int VSIZEX = 8; // default size const int VSIZEL = 16; // large size -#elif defined(__AVX512F__) // KNL - const int VSIZEX = 16; - const int VSIZEL = 16; #elif defined(__AVX__) const int VSIZEX = 8; const int VSIZEL = 8; @@ -71,21 +64,41 @@ namespace embree const int VSIZEL = 4; #endif - /* Extends varying size N to optimal or up to max(N, N2) */ - template<int N, int N2 = VSIZEX> - struct vextend - { -#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL - /* use 16-wide SIMD calculations on KNL even for 4 and 8 wide SIMD */ - static const int size = (N2 == VSIZEX) ? VSIZEX : N; - #define SIMD_MODE(N) N, 16 -#else - /* calculate with same SIMD width otherwise */ - static const int size = N; - #define SIMD_MODE(N) N, N -#endif + template<int N> + struct vtypes { + using vbool = vboolf_impl<N>; + using vboolf = vboolf_impl<N>; + using vboold = vboold_impl<N>; + using vint = vint_impl<N>; + using vuint = vuint_impl<N>; + using vllong = vllong_impl<N>; + using vfloat = vfloat_impl<N>; + using vdouble = vdouble_impl<N>; + }; + + template<> + struct vtypes<1> { + using vbool = bool; + using vboolf = bool; + using vboold = bool; + using vint = int; + using vuint = unsigned int; + using vllong = long long; + using vfloat = float; + using vdouble = double; }; + /* Aliases to default types */ + template<int N> using vbool = typename vtypes<N>::vbool; + template<int N> using vboolf = typename vtypes<N>::vboolf; + template<int N> using vboold = typename vtypes<N>::vboold; + template<int N> using vint = typename vtypes<N>::vint; + template<int N> using vuint = typename vtypes<N>::vuint; + template<int N> using vllong = typename vtypes<N>::vllong; + template<int N> using vreal = typename vtypes<N>::vfloat; + template<int N> using vfloat = typename vtypes<N>::vfloat; + template<int N> using vdouble = typename vtypes<N>::vdouble; + /* 4-wide shortcuts */ typedef vfloat<4> vfloat4; typedef vdouble<4> vdouble4; diff --git a/thirdparty/embree-aarch64/common/simd/vboold4_avx.h b/thirdparty/embree/common/simd/vboold4_avx.h index 6505ee56f3..7db0d1c5c1 100644 --- a/thirdparty/embree-aarch64/common/simd/vboold4_avx.h +++ b/thirdparty/embree/common/simd/vboold4_avx.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 4-wide AVX bool type for 64bit data types*/ @@ -49,19 +57,13 @@ namespace embree #endif } - __forceinline vboold(__m128d a, __m128d b) : vl(a), vh(b) {} - //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// __forceinline vboold(FalseTy) : v(_mm256_setzero_pd()) {} -#if !defined(__aarch64__) __forceinline vboold(TrueTy) : v(_mm256_cmp_pd(_mm256_setzero_pd(), _mm256_setzero_pd(), _CMP_EQ_OQ)) {} -#else - __forceinline vboold(TrueTy) : v(_mm256_cmpeq_pd(_mm256_setzero_pd(), _mm256_setzero_pd())) {} -#endif - + //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// @@ -105,10 +107,9 @@ namespace embree /// Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// -#if !defined(__aarch64__) __forceinline vboold4 unpacklo(const vboold4& a, const vboold4& b) { return _mm256_unpacklo_pd(a, b); } __forceinline vboold4 unpackhi(const vboold4& a, const vboold4& b) { return _mm256_unpackhi_pd(a, b); } -#endif + #if defined(__AVX2__) template<int i0, int i1, int i2, int i3> @@ -158,3 +159,11 @@ namespace embree << a[4] << ", " << a[5] << ", " << a[6] << ", " << a[7] << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vboold4_avx512.h b/thirdparty/embree/common/simd/vboold4_avx512.h index 4fe730d713..ceaad7bba5 100644 --- a/thirdparty/embree-aarch64/common/simd/vboold4_avx512.h +++ b/thirdparty/embree/common/simd/vboold4_avx512.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 4-wide AVX-512 bool type */ @@ -138,3 +146,11 @@ namespace embree return cout << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vboold8_avx512.h b/thirdparty/embree/common/simd/vboold8_avx512.h index fdf3f00de5..66d2054872 100644 --- a/thirdparty/embree-aarch64/common/simd/vboold8_avx512.h +++ b/thirdparty/embree/common/simd/vboold8_avx512.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 8-wide AVX-512 bool type */ @@ -32,25 +40,12 @@ namespace embree /* return int8 mask */ __forceinline __m128i mask8() const { -#if defined(__AVX512BW__) return _mm_movm_epi8(v); -#else - const __m512i f = _mm512_set1_epi64(0); - const __m512i t = _mm512_set1_epi64(-1); - const __m512i m = _mm512_mask_or_epi64(f,v,t,t); - return _mm512_cvtepi64_epi8(m); -#endif } /* return int64 mask */ __forceinline __m512i mask64() const { -#if defined(__AVX512DQ__) return _mm512_movm_epi64(v); -#else - const __m512i f = _mm512_set1_epi64(0); - const __m512i t = _mm512_set1_epi64(-1); - return _mm512_mask_or_epi64(f,v,t,t); -#endif } //////////////////////////////////////////////////////////////////////////////// @@ -146,3 +141,11 @@ namespace embree return cout << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vboolf16_avx512.h b/thirdparty/embree/common/simd/vboolf16_avx512.h index 238cdc8eb9..19841dcea8 100644 --- a/thirdparty/embree-aarch64/common/simd/vboolf16_avx512.h +++ b/thirdparty/embree/common/simd/vboolf16_avx512.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 16-wide AVX-512 bool type */ @@ -33,25 +41,12 @@ namespace embree /* return int8 mask */ __forceinline __m128i mask8() const { -#if defined(__AVX512BW__) return _mm_movm_epi8(v); -#else - const __m512i f = _mm512_set1_epi32(0); - const __m512i t = _mm512_set1_epi32(-1); - const __m512i m = _mm512_mask_or_epi32(f,v,t,t); - return _mm512_cvtepi32_epi8(m); -#endif } /* return int32 mask */ __forceinline __m512i mask32() const { -#if defined(__AVX512DQ__) return _mm512_movm_epi32(v); -#else - const __m512i f = _mm512_set1_epi32(0); - const __m512i t = _mm512_set1_epi32(-1); - return _mm512_mask_or_epi32(f,v,t,t); -#endif } //////////////////////////////////////////////////////////////////////////////// @@ -148,3 +143,11 @@ namespace embree return cout << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vboolf4_avx512.h b/thirdparty/embree/common/simd/vboolf4_avx512.h index 2ae4c4470e..e65f66b025 100644 --- a/thirdparty/embree-aarch64/common/simd/vboolf4_avx512.h +++ b/thirdparty/embree/common/simd/vboolf4_avx512.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 4-wide AVX-512 bool type */ @@ -141,3 +149,11 @@ namespace embree return cout << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vboolf4_sse2.h b/thirdparty/embree/common/simd/vboolf4_sse2.h index ed53b3c783..fa84b1b6ee 100644 --- a/thirdparty/embree-aarch64/common/simd/vboolf4_sse2.h +++ b/thirdparty/embree/common/simd/vboolf4_sse2.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 4-wide SSE bool type */ @@ -37,13 +45,9 @@ namespace embree : v(mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) {} __forceinline vboolf(bool a, bool b, bool c, bool d) : v(mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {} -#if defined(__aarch64__) && defined(BUILD_IOS) - __forceinline vboolf(int mask) { v = mm_lookupmask_ps[mask]; } - __forceinline vboolf(unsigned int mask) { v = mm_lookupmask_ps[mask]; } -#else __forceinline vboolf(int mask) { assert(mask >= 0 && mask < 16); v = mm_lookupmask_ps[mask]; } __forceinline vboolf(unsigned int mask) { assert(mask < 16); v = mm_lookupmask_ps[mask]; } -#endif + /* return int32 mask */ __forceinline __m128i mask32() const { return _mm_castps_si128(v); @@ -60,13 +64,8 @@ namespace embree /// Array Access //////////////////////////////////////////////////////////////////////////////// -#if defined(__aarch64__) && defined(BUILD_IOS) - __forceinline bool operator [](size_t index) const { return (_mm_movemask_ps(v) >> index) & 1; } - __forceinline int& operator [](size_t index) { return i[index]; } -#else __forceinline bool operator [](size_t index) const { assert(index < 4); return (_mm_movemask_ps(v) >> index) & 1; } __forceinline int& operator [](size_t index) { assert(index < 4); return i[index]; } -#endif }; //////////////////////////////////////////////////////////////////////////////// @@ -101,7 +100,7 @@ namespace embree __forceinline vboolf4 operator ==(const vboolf4& a, const vboolf4& b) { return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b)); } __forceinline vboolf4 select(const vboolf4& m, const vboolf4& t, const vboolf4& f) { -#if (defined(__aarch64__) && defined(BUILD_IOS)) || defined(__SSE4_1__) +#if defined(__SSE4_1__) return _mm_blendv_ps(f, t, m); #else return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); @@ -115,17 +114,6 @@ namespace embree __forceinline vboolf4 unpacklo(const vboolf4& a, const vboolf4& b) { return _mm_unpacklo_ps(a, b); } __forceinline vboolf4 unpackhi(const vboolf4& a, const vboolf4& b) { return _mm_unpackhi_ps(a, b); } -#if defined(__aarch64__) - template<int i0, int i1, int i2, int i3> - __forceinline vboolf4 shuffle(const vboolf4& v) { - return vreinterpretq_f32_u8(vqtbl1q_u8( vreinterpretq_u8_s32(v), _MN_SHUFFLE(i0, i1, i2, i3))); - } - - template<int i0, int i1, int i2, int i3> - __forceinline vboolf4 shuffle(const vboolf4& a, const vboolf4& b) { - return vreinterpretq_f32_u8(vqtbl2q_u8( (uint8x16x2_t){(uint8x16_t)a.v, (uint8x16_t)b.v}, _MF_SHUFFLE(i0, i1, i2, i3))); - } -#else template<int i0, int i1, int i2, int i3> __forceinline vboolf4 shuffle(const vboolf4& v) { return _mm_castsi128_ps(_mm_shuffle_epi32(v, _MM_SHUFFLE(i3, i2, i1, i0))); @@ -135,8 +123,7 @@ namespace embree __forceinline vboolf4 shuffle(const vboolf4& a, const vboolf4& b) { return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); } -#endif - + template<int i0> __forceinline vboolf4 shuffle(const vboolf4& v) { return shuffle<i0,i0,i0,i0>(v); @@ -148,7 +135,7 @@ namespace embree template<> __forceinline vboolf4 shuffle<0, 1, 0, 1>(const vboolf4& v) { return _mm_castpd_ps(_mm_movedup_pd(v)); } #endif -#if defined(__SSE4_1__) && !defined(__aarch64__) +#if defined(__SSE4_1__) template<int dst, int src, int clr> __forceinline vboolf4 insert(const vboolf4& a, const vboolf4& b) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); } template<int dst, int src> __forceinline vboolf4 insert(const vboolf4& a, const vboolf4& b) { return insert<dst, src, 0>(a, b); } template<int dst> __forceinline vboolf4 insert(const vboolf4& a, const bool b) { return insert<dst, 0>(a, vboolf4(b)); } @@ -170,15 +157,11 @@ namespace embree __forceinline bool none(const vboolf4& valid, const vboolf4& b) { return none(valid & b); } __forceinline size_t movemask(const vboolf4& a) { return _mm_movemask_ps(a); } -#if defined(__aarch64__) && defined(BUILD_IOS) -__forceinline size_t popcnt(const vboolf4& a) { return _mm_movemask_popcnt_ps(a); } -#else #if defined(__SSE4_2__) __forceinline size_t popcnt(const vboolf4& a) { return popcnt((size_t)_mm_movemask_ps(a)); } #else __forceinline size_t popcnt(const vboolf4& a) { return bool(a[0])+bool(a[1])+bool(a[2])+bool(a[3]); } #endif -#endif //////////////////////////////////////////////////////////////////////////////// /// Get/Set Functions @@ -196,3 +179,11 @@ __forceinline size_t popcnt(const vboolf4& a) { return _mm_movemask_popcnt_ps(a) return cout << "<" << a[0] << ", " << a[1] << ", " << a[2] << ", " << a[3] << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vboolf8_avx.h b/thirdparty/embree/common/simd/vboolf8_avx.h index 4f64741b55..ba77cc3c5e 100644 --- a/thirdparty/embree-aarch64/common/simd/vboolf8_avx.h +++ b/thirdparty/embree/common/simd/vboolf8_avx.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 8-wide AVX bool type */ @@ -68,11 +76,8 @@ namespace embree //////////////////////////////////////////////////////////////////////////////// __forceinline vboolf(FalseTy) : v(_mm256_setzero_ps()) {} -#if !defined(__aarch64__) __forceinline vboolf(TrueTy) : v(_mm256_cmp_ps(_mm256_setzero_ps(), _mm256_setzero_ps(), _CMP_EQ_OQ)) {} -#else - __forceinline vboolf(TrueTy) : v(_mm256_cmpeq_ps(_mm256_setzero_ps(), _mm256_setzero_ps())) {} -#endif + //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// @@ -187,3 +192,11 @@ namespace embree << a[4] << ", " << a[5] << ", " << a[6] << ", " << a[7] << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vboolf8_avx512.h b/thirdparty/embree/common/simd/vboolf8_avx512.h index 2a52b554c7..73ff5666e1 100644 --- a/thirdparty/embree-aarch64/common/simd/vboolf8_avx512.h +++ b/thirdparty/embree/common/simd/vboolf8_avx512.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 8-wide AVX-512 bool type */ @@ -141,3 +149,11 @@ namespace embree return cout << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vdouble4_avx.h b/thirdparty/embree/common/simd/vdouble4_avx.h index 1f65b45d7e..55326de7dd 100644 --- a/thirdparty/embree-aarch64/common/simd/vdouble4_avx.h +++ b/thirdparty/embree/common/simd/vdouble4_avx.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 4-wide AVX 64-bit double type */ @@ -181,20 +189,13 @@ namespace embree __forceinline vboold4 operator >=(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd_mask(a, b, _MM_CMPINT_GE); } __forceinline vboold4 operator > (const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd_mask(a, b, _MM_CMPINT_GT); } __forceinline vboold4 operator <=(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd_mask(a, b, _MM_CMPINT_LE); } -#elif !defined(__aarch64__) +#else __forceinline vboold4 operator ==(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_EQ_OQ); } __forceinline vboold4 operator !=(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_NEQ_UQ); } __forceinline vboold4 operator < (const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_LT_OS); } __forceinline vboold4 operator >=(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_NLT_US); } __forceinline vboold4 operator > (const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_NLE_US); } __forceinline vboold4 operator <=(const vdouble4& a, const vdouble4& b) { return _mm256_cmp_pd(a, b, _CMP_LE_OS); } -#else - __forceinline vboold4 operator ==(const vdouble4& a, const vdouble4& b) { return _mm256_cmpeq_pd(a, b); } - __forceinline vboold4 operator !=(const vdouble4& a, const vdouble4& b) { return _mm256_cmpneq_pd(a, b); } - __forceinline vboold4 operator < (const vdouble4& a, const vdouble4& b) { return _mm256_cmplt_pd(a, b); } - __forceinline vboold4 operator >=(const vdouble4& a, const vdouble4& b) { return _mm256_cmpnlt_pd(a, b); } - __forceinline vboold4 operator > (const vdouble4& a, const vdouble4& b) { return _mm256_cmpnle_pd(a, b); } - __forceinline vboold4 operator <=(const vdouble4& a, const vdouble4& b) { return _mm256_cmple_pd(a, b); } #endif __forceinline vboold4 operator ==(const vdouble4& a, double b) { return a == vdouble4(b); } @@ -246,18 +247,6 @@ namespace embree #endif } - __forceinline void xchg(const vboold4& m, vdouble4& a, vdouble4& b) { - const vdouble4 c = a; a = select(m,b,a); b = select(m,c,b); - } - - __forceinline vboold4 test(const vdouble4& a, const vdouble4& b) { -#if defined(__AVX512VL__) - return _mm256_test_epi64_mask(_mm256_castpd_si256(a),_mm256_castpd_si256(b)); -#else - return _mm256_testz_si256(_mm256_castpd_si256(a),_mm256_castpd_si256(b)); -#endif - } - //////////////////////////////////////////////////////////////////////////////// // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// @@ -322,3 +311,11 @@ namespace embree return cout; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vdouble8_avx512.h b/thirdparty/embree/common/simd/vdouble8_avx512.h index 4eec7d2f6a..98d21bfe4a 100644 --- a/thirdparty/embree-aarch64/common/simd/vdouble8_avx512.h +++ b/thirdparty/embree/common/simd/vdouble8_avx512.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 8-wide AVX-512 64-bit double type */ @@ -91,15 +99,6 @@ namespace embree _mm512_mask_store_pd(addr, mask, v2); } - /* pass by value to avoid compiler generating inefficient code */ - static __forceinline void storeu_compact(const vboold8 mask,void * addr, const vdouble8& reg) { - _mm512_mask_compressstoreu_pd(addr, mask, reg); - } - - static __forceinline vdouble8 compact64bit(const vboold8& mask, vdouble8& v) { - return _mm512_mask_compress_pd(v, mask, v); - } - static __forceinline vdouble8 compact(const vboold8& mask, vdouble8& v) { return _mm512_mask_compress_pd(v, mask, v); } @@ -260,18 +259,6 @@ namespace embree return _mm512_mask_or_pd(f,m,t,t); } - __forceinline void xchg(const vboold8& m, vdouble8& a, vdouble8& b) { - const vdouble8 c = a; a = select(m,b,a); b = select(m,c,b); - } - - __forceinline vboold8 test(const vboold8& m, const vdouble8& a, const vdouble8& b) { - return _mm512_mask_test_epi64_mask(m,_mm512_castpd_si512(a),_mm512_castpd_si512(b)); - } - - __forceinline vboold8 test(const vdouble8& a, const vdouble8& b) { - return _mm512_test_epi64_mask(_mm512_castpd_si512(a),_mm512_castpd_si512(b)); - } - //////////////////////////////////////////////////////////////////////////////// // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// @@ -354,3 +341,11 @@ namespace embree return cout; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vfloat16_avx512.h b/thirdparty/embree/common/simd/vfloat16_avx512.h index aed2419b77..9f1e2459c4 100644 --- a/thirdparty/embree-aarch64/common/simd/vfloat16_avx512.h +++ b/thirdparty/embree/common/simd/vfloat16_avx512.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 16-wide AVX-512 float type */ @@ -73,11 +81,11 @@ namespace embree } /* WARNING: due to f64x4 the mask is considered as an 8bit mask */ - __forceinline vfloat(const vboolf16& mask, const vfloat8& a, const vfloat8& b) { + /*__forceinline vfloat(const vboolf16& mask, const vfloat8& a, const vfloat8& b) { __m512d aa = _mm512_broadcast_f64x4(_mm256_castps_pd(a)); aa = _mm512_mask_broadcast_f64x4(aa,mask,_mm256_castps_pd(b)); v = _mm512_castpd_ps(aa); - } + }*/ __forceinline explicit vfloat(const vint16& a) { v = _mm512_cvtepi32_ps(a); @@ -123,30 +131,6 @@ namespace embree return _mm512_set1_ps(*f); } - static __forceinline vfloat16 compact(const vboolf16& mask, vfloat16 &v) { - return _mm512_mask_compress_ps(v, mask, v); - } - static __forceinline vfloat16 compact(const vboolf16& mask, vfloat16 &a, const vfloat16& b) { - return _mm512_mask_compress_ps(a, mask, b); - } - - static __forceinline vfloat16 expand(const vboolf16& mask, const vfloat16& a, vfloat16& b) { - return _mm512_mask_expand_ps(b, mask, a); - } - - static __forceinline vfloat16 loadu_compact(const vboolf16& mask, const void* ptr) { - return _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), mask, (float*)ptr); - } - - static __forceinline void storeu_compact(const vboolf16& mask, float *addr, const vfloat16 reg) { - _mm512_mask_compressstoreu_ps(addr, mask, reg); - } - - static __forceinline void storeu_compact_single(const vboolf16& mask, float * addr, const vfloat16& reg) { - //_mm512_mask_compressstoreu_ps(addr,mask,reg); - *addr = mm512_cvtss_f32(_mm512_mask_compress_ps(reg, mask, reg)); - } - template<int scale = 4> static __forceinline vfloat16 gather(const float* ptr, const vint16& index) { return _mm512_i32gather_ps(index, ptr, scale); @@ -194,12 +178,8 @@ namespace embree __forceinline vfloat16 signmsk(const vfloat16& a) { return _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(a),_mm512_set1_epi32(0x80000000))); } __forceinline vfloat16 rcp(const vfloat16& a) { -#if defined(__AVX512ER__) - return _mm512_rcp28_ps(a); -#else const vfloat16 r = _mm512_rcp14_ps(a); return _mm512_mul_ps(r, _mm512_fnmadd_ps(r, a, vfloat16(2.0f))); -#endif } __forceinline vfloat16 sqr (const vfloat16& a) { return _mm512_mul_ps(a,a); } @@ -207,13 +187,9 @@ namespace embree __forceinline vfloat16 rsqrt(const vfloat16& a) { -#if defined(__AVX512VL__) const vfloat16 r = _mm512_rsqrt14_ps(a); return _mm512_fmadd_ps(_mm512_set1_ps(1.5f), r, _mm512_mul_ps(_mm512_mul_ps(_mm512_mul_ps(a, _mm512_set1_ps(-0.5f)), r), _mm512_mul_ps(r, r))); -#else - return _mm512_rsqrt28_ps(a); -#endif } //////////////////////////////////////////////////////////////////////////////// @@ -242,54 +218,26 @@ namespace embree return _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(a),_mm512_castps_si512(b))); } - __forceinline vfloat16 min(const vfloat16& a, const vfloat16& b) { - return _mm512_min_ps(a,b); - } - __forceinline vfloat16 min(const vfloat16& a, float b) { - return _mm512_min_ps(a,vfloat16(b)); - } - __forceinline vfloat16 min(const float& a, const vfloat16& b) { - return _mm512_min_ps(vfloat16(a),b); - } - - __forceinline vfloat16 max(const vfloat16& a, const vfloat16& b) { - return _mm512_max_ps(a,b); - } - __forceinline vfloat16 max(const vfloat16& a, float b) { - return _mm512_max_ps(a,vfloat16(b)); - } - __forceinline vfloat16 max(const float& a, const vfloat16& b) { - return _mm512_max_ps(vfloat16(a),b); - } + __forceinline vfloat16 min(const vfloat16& a, const vfloat16& b) { return _mm512_min_ps(a,b); } + __forceinline vfloat16 min(const vfloat16& a, float b) { return _mm512_min_ps(a,vfloat16(b)); } + __forceinline vfloat16 min(const float& a, const vfloat16& b) { return _mm512_min_ps(vfloat16(a),b); } - __forceinline vfloat16 mask_add(const vboolf16& mask, const vfloat16& c, const vfloat16& a, const vfloat16& b) { return _mm512_mask_add_ps (c,mask,a,b); } - __forceinline vfloat16 mask_min(const vboolf16& mask, const vfloat16& c, const vfloat16& a, const vfloat16& b) { - return _mm512_mask_min_ps(c,mask,a,b); - }; - __forceinline vfloat16 mask_max(const vboolf16& mask, const vfloat16& c, const vfloat16& a, const vfloat16& b) { - return _mm512_mask_max_ps(c,mask,a,b); - }; + __forceinline vfloat16 max(const vfloat16& a, const vfloat16& b) { return _mm512_max_ps(a,b); } + __forceinline vfloat16 max(const vfloat16& a, float b) { return _mm512_max_ps(a,vfloat16(b)); } + __forceinline vfloat16 max(const float& a, const vfloat16& b) { return _mm512_max_ps(vfloat16(a),b); } __forceinline vfloat16 mini(const vfloat16& a, const vfloat16& b) { -#if !defined(__AVX512ER__) // SKX const vint16 ai = _mm512_castps_si512(a); const vint16 bi = _mm512_castps_si512(b); const vint16 ci = _mm512_min_epi32(ai,bi); return _mm512_castsi512_ps(ci); -#else // KNL - return min(a,b); -#endif } __forceinline vfloat16 maxi(const vfloat16& a, const vfloat16& b) { -#if !defined(__AVX512ER__) // SKX const vint16 ai = _mm512_castps_si512(a); const vint16 bi = _mm512_castps_si512(b); const vint16 ci = _mm512_max_epi32(ai,bi); return _mm512_castsi512_ps(ci); -#else // KNL - return max(a,b); -#endif } //////////////////////////////////////////////////////////////////////////////// @@ -300,43 +248,6 @@ namespace embree __forceinline vfloat16 msub (const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_fmsub_ps(a,b,c); } __forceinline vfloat16 nmadd(const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_fnmadd_ps(a,b,c); } __forceinline vfloat16 nmsub(const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_fnmsub_ps(a,b,c); } - - __forceinline vfloat16 mask_msub(const vboolf16& mask,const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_mask_fmsub_ps(a,mask,b,c); } - - __forceinline vfloat16 madd231 (const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_fmadd_ps(c,b,a); } - __forceinline vfloat16 msub213 (const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_fmsub_ps(a,b,c); } - __forceinline vfloat16 msub231 (const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_fmsub_ps(c,b,a); } - __forceinline vfloat16 msubr231(const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_fnmadd_ps(c,b,a); } - - - //////////////////////////////////////////////////////////////////////////////// - /// Operators with rounding - //////////////////////////////////////////////////////////////////////////////// - - __forceinline vfloat16 madd_round_down(const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_fmadd_round_ps(a,b,c,_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); } - __forceinline vfloat16 madd_round_up (const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_fmadd_round_ps(a,b,c,_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); } - - __forceinline vfloat16 mul_round_down(const vfloat16& a, const vfloat16& b) { return _mm512_mul_round_ps(a,b,_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); } - __forceinline vfloat16 mul_round_up (const vfloat16& a, const vfloat16& b) { return _mm512_mul_round_ps(a,b,_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); } - - __forceinline vfloat16 add_round_down(const vfloat16& a, const vfloat16& b) { return _mm512_add_round_ps(a,b,_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); } - __forceinline vfloat16 add_round_up (const vfloat16& a, const vfloat16& b) { return _mm512_add_round_ps(a,b,_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); } - - __forceinline vfloat16 sub_round_down(const vfloat16& a, const vfloat16& b) { return _mm512_sub_round_ps(a,b,_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); } - __forceinline vfloat16 sub_round_up (const vfloat16& a, const vfloat16& b) { return _mm512_sub_round_ps(a,b,_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); } - - __forceinline vfloat16 div_round_down(const vfloat16& a, const vfloat16& b) { return _mm512_div_round_ps(a,b,_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); } - __forceinline vfloat16 div_round_up (const vfloat16& a, const vfloat16& b) { return _mm512_div_round_ps(a,b,_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); } - - __forceinline vfloat16 mask_msub_round_down(const vboolf16& mask,const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_mask_fmsub_round_ps(a,mask,b,c,_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); } - __forceinline vfloat16 mask_msub_round_up (const vboolf16& mask,const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_mask_fmsub_round_ps(a,mask,b,c,_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); } - - __forceinline vfloat16 mask_mul_round_down(const vboolf16& mask,const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_mask_mul_round_ps(a,mask,b,c,_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); } - __forceinline vfloat16 mask_mul_round_up (const vboolf16& mask,const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_mask_mul_round_ps(a,mask,b,c,_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); } - - __forceinline vfloat16 mask_sub_round_down(const vboolf16& mask,const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_mask_sub_round_ps(a,mask,b,c,_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); } - __forceinline vfloat16 mask_sub_round_up (const vboolf16& mask,const vfloat16& a, const vfloat16& b, const vfloat16& c) { return _mm512_mask_sub_round_ps(a,mask,b,c,_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); } - //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators @@ -404,13 +315,6 @@ namespace embree return madd(t,b-a,a); } - __forceinline void xchg(vboolf16 m, vfloat16& a, vfloat16& b) - { - vfloat16 c = a; - a = select(m,b,a); - b = select(m,c,b); - } - //////////////////////////////////////////////////////////////////////////////// /// Rounding Functions //////////////////////////////////////////////////////////////////////////////// @@ -455,24 +359,6 @@ namespace embree return _mm512_shuffle_f32x4(v, v, _MM_SHUFFLE(i3, i2, i1, i0)); } - __forceinline vfloat16 interleave_even(const vfloat16& a, const vfloat16& b) { - return _mm512_castsi512_ps(_mm512_mask_shuffle_epi32(_mm512_castps_si512(a), mm512_int2mask(0xaaaa), _mm512_castps_si512(b), (_MM_PERM_ENUM)0xb1)); - } - - __forceinline vfloat16 interleave_odd(const vfloat16& a, const vfloat16& b) { - return _mm512_castsi512_ps(_mm512_mask_shuffle_epi32(_mm512_castps_si512(b), mm512_int2mask(0x5555), _mm512_castps_si512(a), (_MM_PERM_ENUM)0xb1)); - } - - __forceinline vfloat16 interleave2_even(const vfloat16& a, const vfloat16& b) { - /* mask should be 8-bit but is 16-bit to reuse for interleave_even */ - return _mm512_castsi512_ps(_mm512_mask_permutex_epi64(_mm512_castps_si512(a), mm512_int2mask(0xaaaa), _mm512_castps_si512(b), (_MM_PERM_ENUM)0xb1)); - } - - __forceinline vfloat16 interleave2_odd(const vfloat16& a, const vfloat16& b) { - /* mask should be 8-bit but is 16-bit to reuse for interleave_odd */ - return _mm512_castsi512_ps(_mm512_mask_permutex_epi64(_mm512_castps_si512(b), mm512_int2mask(0x5555), _mm512_castps_si512(a), (_MM_PERM_ENUM)0xb1)); - } - __forceinline vfloat16 interleave4_even(const vfloat16& a, const vfloat16& b) { return _mm512_castsi512_ps(_mm512_mask_permutex_epi64(_mm512_castps_si512(a), mm512_int2mask(0xcc), _mm512_castps_si512(b), (_MM_PERM_ENUM)0x4e)); } @@ -537,17 +423,6 @@ namespace embree __forceinline void transpose(const vfloat16& r0, const vfloat16& r1, const vfloat16& r2, const vfloat16& r3, vfloat16& c0, vfloat16& c1, vfloat16& c2, vfloat16& c3) { -#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL - vfloat16 a0a1_c0c1 = interleave_even(r0, r1); - vfloat16 a2a3_c2c3 = interleave_even(r2, r3); - vfloat16 b0b1_d0d1 = interleave_odd (r0, r1); - vfloat16 b2b3_d2d3 = interleave_odd (r2, r3); - - c0 = interleave2_even(a0a1_c0c1, a2a3_c2c3); - c1 = interleave2_even(b0b1_d0d1, b2b3_d2d3); - c2 = interleave2_odd (a0a1_c0c1, a2a3_c2c3); - c3 = interleave2_odd (b0b1_d0d1, b2b3_d2d3); -#else vfloat16 a0a2_b0b2 = unpacklo(r0, r2); vfloat16 c0c2_d0d2 = unpackhi(r0, r2); vfloat16 a1a3_b1b3 = unpacklo(r1, r3); @@ -557,7 +432,6 @@ namespace embree c1 = unpackhi(a0a2_b0b2, a1a3_b1b3); c2 = unpacklo(c0c2_d0d2, c1c3_d1d3); c3 = unpackhi(c0c2_d0d2, c1c3_d1d3); -#endif } __forceinline void transpose(const vfloat4& r0, const vfloat4& r1, const vfloat4& r2, const vfloat4& r3, @@ -715,44 +589,6 @@ namespace embree return v; } - //////////////////////////////////////////////////////////////////////////////// - /// Memory load and store operations - //////////////////////////////////////////////////////////////////////////////// - - __forceinline vfloat16 loadAOS4to16f(const float& x, const float& y, const float& z) - { - vfloat16 f = zero; - f = select(0x1111,vfloat16::broadcast(&x),f); - f = select(0x2222,vfloat16::broadcast(&y),f); - f = select(0x4444,vfloat16::broadcast(&z),f); - return f; - } - - __forceinline vfloat16 loadAOS4to16f(unsigned int index, - const vfloat16& x, - const vfloat16& y, - const vfloat16& z) - { - vfloat16 f = zero; - f = select(0x1111,vfloat16::broadcast((float*)&x + index),f); - f = select(0x2222,vfloat16::broadcast((float*)&y + index),f); - f = select(0x4444,vfloat16::broadcast((float*)&z + index),f); - return f; - } - - __forceinline vfloat16 loadAOS4to16f(unsigned int index, - const vfloat16& x, - const vfloat16& y, - const vfloat16& z, - const vfloat16& fill) - { - vfloat16 f = fill; - f = select(0x1111,vfloat16::broadcast((float*)&x + index),f); - f = select(0x2222,vfloat16::broadcast((float*)&y + index),f); - f = select(0x4444,vfloat16::broadcast((float*)&z + index),f); - return f; - } - __forceinline vfloat16 rcp_safe(const vfloat16& a) { return rcp(select(a != vfloat16(zero), a, vfloat16(min_rcp_input))); } @@ -769,3 +605,11 @@ namespace embree return cout; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vfloat4_sse2.h b/thirdparty/embree/common/simd/vfloat4_sse2.h index 5732c0fbc8..5215bf9730 100644 --- a/thirdparty/embree-aarch64/common/simd/vfloat4_sse2.h +++ b/thirdparty/embree/common/simd/vfloat4_sse2.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 4-wide SSE float type */ @@ -10,18 +18,18 @@ namespace embree struct vfloat<4> { ALIGNED_STRUCT_(16); - + typedef vboolf4 Bool; typedef vint4 Int; typedef vfloat4 Float; - + enum { size = 4 }; // number of SIMD elements union { __m128 v; float f[4]; int i[4]; }; // data //////////////////////////////////////////////////////////////////////////////// /// Constructors, Assignment & Cast Operators //////////////////////////////////////////////////////////////////////////////// - + __forceinline vfloat() {} __forceinline vfloat(const vfloat4& other) { v = other.v; } __forceinline vfloat4& operator =(const vfloat4& other) { v = other.v; return *this; } @@ -34,19 +42,14 @@ namespace embree __forceinline vfloat(float a, float b, float c, float d) : v(_mm_set_ps(d, c, b, a)) {} __forceinline explicit vfloat(const vint4& a) : v(_mm_cvtepi32_ps(a)) {} -#if defined(__aarch64__) - __forceinline explicit vfloat(const vuint4& x) { - v = vcvtq_f32_u32(vreinterpretq_u32_s32(x.v)); - } -#else __forceinline explicit vfloat(const vuint4& x) { const __m128i a = _mm_and_si128(x,_mm_set1_epi32(0x7FFFFFFF)); - const __m128i b = _mm_and_si128(_mm_srai_epi32(x,31),_mm_set1_epi32(0x4F000000)); //0x4F000000 = 2^31 + const __m128i b = _mm_and_si128(_mm_srai_epi32(x,31),_mm_set1_epi32(0x4F000000)); //0x4F000000 = 2^31 const __m128 af = _mm_cvtepi32_ps(a); - const __m128 bf = _mm_castsi128_ps(b); + const __m128 bf = _mm_castsi128_ps(b); v = _mm_add_ps(af,bf); } -#endif + //////////////////////////////////////////////////////////////////////////////// /// Constants //////////////////////////////////////////////////////////////////////////////// @@ -71,13 +74,6 @@ namespace embree #if defined(__AVX512VL__) - static __forceinline vfloat4 compact(const vboolf4& mask, vfloat4 &v) { - return _mm_mask_compress_ps(v, mask, v); - } - static __forceinline vfloat4 compact(const vboolf4& mask, vfloat4 &a, const vfloat4& b) { - return _mm_mask_compress_ps(a, mask, b); - } - static __forceinline vfloat4 load (const vboolf4& mask, const void* ptr) { return _mm_mask_load_ps (_mm_setzero_ps(),mask,(float*)ptr); } static __forceinline vfloat4 loadu(const vboolf4& mask, const void* ptr) { return _mm_mask_loadu_ps(_mm_setzero_ps(),mask,(float*)ptr); } @@ -107,44 +103,32 @@ namespace embree #if defined (__SSE4_1__) return _mm_castsi128_ps(_mm_stream_load_si128((__m128i*)ptr)); #else - return _mm_load_ps(ptr); + return _mm_load_ps(ptr); #endif } -#if defined(__aarch64__) - static __forceinline vfloat4 load(const int8_t* ptr) { - return __m128(_mm_load4epi8_f32(((__m128i*)ptr))); - } -#elif defined(__SSE4_1__) - static __forceinline vfloat4 load(const int8_t* ptr) { +#if defined(__SSE4_1__) + static __forceinline vfloat4 load(const char* ptr) { return _mm_cvtepi32_ps(_mm_cvtepi8_epi32(_mm_loadu_si128((__m128i*)ptr))); } #else - static __forceinline vfloat4 load(const int8_t* ptr) { + static __forceinline vfloat4 load(const char* ptr) { return vfloat4(ptr[0],ptr[1],ptr[2],ptr[3]); } #endif -#if defined(__aarch64__) - static __forceinline vfloat4 load(const uint8_t* ptr) { - return __m128(_mm_load4epu8_f32(((__m128i*)ptr))); - } -#elif defined(__SSE4_1__) - static __forceinline vfloat4 load(const uint8_t* ptr) { +#if defined(__SSE4_1__) + static __forceinline vfloat4 load(const unsigned char* ptr) { return _mm_cvtepi32_ps(_mm_cvtepu8_epi32(_mm_loadu_si128((__m128i*)ptr))); } #else - static __forceinline vfloat4 load(const uint8_t* ptr) { + static __forceinline vfloat4 load(const unsigned char* ptr) { //return _mm_cvtpu8_ps(*(__m64*)ptr); // don't enable, will use MMX instructions return vfloat4(ptr[0],ptr[1],ptr[2],ptr[3]); } #endif -#if defined(__aarch64__) - static __forceinline vfloat4 load(const short* ptr) { - return __m128(_mm_load4epi16_f32(((__m128i*)ptr))); - } -#elif defined(__SSE4_1__) +#if defined(__SSE4_1__) static __forceinline vfloat4 load(const short* ptr) { return _mm_cvtepi32_ps(_mm_cvtepi16_epi32(_mm_loadu_si128((__m128i*)ptr))); } @@ -157,15 +141,11 @@ namespace embree static __forceinline vfloat4 load(const unsigned short* ptr) { return _mm_mul_ps(vfloat4(vint4::load(ptr)),vfloat4(1.0f/65535.0f)); } - + static __forceinline void store_nt(void* ptr, const vfloat4& v) { #if defined (__SSE4_1__) -#if defined(__aarch64__) - _mm_stream_ps((float*)ptr,vreinterpretq_s32_f32(v.v)); -#else _mm_stream_ps((float*)ptr,v); -#endif #else _mm_store_ps((float*)ptr,v); #endif @@ -173,14 +153,14 @@ namespace embree template<int scale = 4> static __forceinline vfloat4 gather(const float* ptr, const vint4& index) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return _mm_i32gather_ps(ptr, index, scale); #else return vfloat4( - *(float*)(((int8_t*)ptr)+scale*index[0]), - *(float*)(((int8_t*)ptr)+scale*index[1]), - *(float*)(((int8_t*)ptr)+scale*index[2]), - *(float*)(((int8_t*)ptr)+scale*index[3])); + *(float*)(((char*)ptr)+scale*index[0]), + *(float*)(((char*)ptr)+scale*index[1]), + *(float*)(((char*)ptr)+scale*index[2]), + *(float*)(((char*)ptr)+scale*index[3])); #endif } @@ -189,13 +169,13 @@ namespace embree vfloat4 r = zero; #if defined(__AVX512VL__) return _mm_mmask_i32gather_ps(r, mask, index, ptr, scale); -#elif defined(__AVX2__) && !defined(__aarch64__) +#elif defined(__AVX2__) return _mm_mask_i32gather_ps(r, ptr, index, mask, scale); #else - if (likely(mask[0])) r[0] = *(float*)(((int8_t*)ptr)+scale*index[0]); - if (likely(mask[1])) r[1] = *(float*)(((int8_t*)ptr)+scale*index[1]); - if (likely(mask[2])) r[2] = *(float*)(((int8_t*)ptr)+scale*index[2]); - if (likely(mask[3])) r[3] = *(float*)(((int8_t*)ptr)+scale*index[3]); + if (likely(mask[0])) r[0] = *(float*)(((char*)ptr)+scale*index[0]); + if (likely(mask[1])) r[1] = *(float*)(((char*)ptr)+scale*index[1]); + if (likely(mask[2])) r[2] = *(float*)(((char*)ptr)+scale*index[2]); + if (likely(mask[3])) r[3] = *(float*)(((char*)ptr)+scale*index[3]); return r; #endif } @@ -206,10 +186,10 @@ namespace embree #if defined(__AVX512VL__) _mm_i32scatter_ps((float*)ptr, index, v, scale); #else - *(float*)(((int8_t*)ptr)+scale*index[0]) = v[0]; - *(float*)(((int8_t*)ptr)+scale*index[1]) = v[1]; - *(float*)(((int8_t*)ptr)+scale*index[2]) = v[2]; - *(float*)(((int8_t*)ptr)+scale*index[3]) = v[3]; + *(float*)(((char*)ptr)+scale*index[0]) = v[0]; + *(float*)(((char*)ptr)+scale*index[1]) = v[1]; + *(float*)(((char*)ptr)+scale*index[2]) = v[2]; + *(float*)(((char*)ptr)+scale*index[3]) = v[3]; #endif } @@ -219,20 +199,20 @@ namespace embree #if defined(__AVX512VL__) _mm_mask_i32scatter_ps((float*)ptr ,mask, index, v, scale); #else - if (likely(mask[0])) *(float*)(((int8_t*)ptr)+scale*index[0]) = v[0]; - if (likely(mask[1])) *(float*)(((int8_t*)ptr)+scale*index[1]) = v[1]; - if (likely(mask[2])) *(float*)(((int8_t*)ptr)+scale*index[2]) = v[2]; - if (likely(mask[3])) *(float*)(((int8_t*)ptr)+scale*index[3]) = v[3]; + if (likely(mask[0])) *(float*)(((char*)ptr)+scale*index[0]) = v[0]; + if (likely(mask[1])) *(float*)(((char*)ptr)+scale*index[1]) = v[1]; + if (likely(mask[2])) *(float*)(((char*)ptr)+scale*index[2]) = v[2]; + if (likely(mask[3])) *(float*)(((char*)ptr)+scale*index[3]) = v[3]; #endif } - static __forceinline void store(const vboolf4& mask, int8_t* ptr, const vint4& ofs, const vfloat4& v) { + static __forceinline void store(const vboolf4& mask, char* ptr, const vint4& ofs, const vfloat4& v) { scatter<1>(mask,ptr,ofs,v); } static __forceinline void store(const vboolf4& mask, float* ptr, const vint4& ofs, const vfloat4& v) { scatter<4>(mask,ptr,ofs,v); } - + //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// @@ -243,15 +223,27 @@ namespace embree friend __forceinline vfloat4 select(const vboolf4& m, const vfloat4& t, const vfloat4& f) { #if defined(__AVX512VL__) return _mm_mask_blend_ps(m, f, t); -#elif defined(__SSE4_1__) || (defined(__aarch64__)) - return _mm_blendv_ps(f, t, m); +#elif defined(__SSE4_1__) + return _mm_blendv_ps(f, t, m); #else - return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); + return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); #endif } }; + //////////////////////////////////////////////////////////////////////////////// + /// Load/Store + //////////////////////////////////////////////////////////////////////////////// + template<> struct mem<vfloat4> + { + static __forceinline vfloat4 load (const vboolf4& mask, const void* ptr) { return vfloat4::load (mask,ptr); } + static __forceinline vfloat4 loadu(const vboolf4& mask, const void* ptr) { return vfloat4::loadu(mask,ptr); } + + static __forceinline void store (const vboolf4& mask, void* ptr, const vfloat4& v) { vfloat4::store (mask,ptr,v); } + static __forceinline void storeu(const vboolf4& mask, void* ptr, const vfloat4& v) { vfloat4::storeu(mask,ptr,v); } + }; + //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// @@ -264,47 +256,18 @@ namespace embree __forceinline vfloat4 toFloat(const vint4& a) { return vfloat4(a); } __forceinline vfloat4 operator +(const vfloat4& a) { return a; } -#if defined(__aarch64__) - __forceinline vfloat4 operator -(const vfloat4& a) { - return vnegq_f32(a); - } -#else __forceinline vfloat4 operator -(const vfloat4& a) { return _mm_xor_ps(a, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); } -#endif -#if defined(__aarch64__) - __forceinline vfloat4 abs(const vfloat4& a) { return _mm_abs_ps(a); } -#else __forceinline vfloat4 abs(const vfloat4& a) { return _mm_and_ps(a, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))); } -#endif - #if defined(__AVX512VL__) __forceinline vfloat4 sign(const vfloat4& a) { return _mm_mask_blend_ps(_mm_cmp_ps_mask(a, vfloat4(zero), _CMP_LT_OQ), vfloat4(one), -vfloat4(one)); } #else __forceinline vfloat4 sign(const vfloat4& a) { return blendv_ps(vfloat4(one), -vfloat4(one), _mm_cmplt_ps(a, vfloat4(zero))); } #endif - -#if defined(__aarch64__) - __forceinline vfloat4 signmsk(const vfloat4& a) { return _mm_and_ps(a, vreinterpretq_f32_u32(v0x80000000)); } -#else __forceinline vfloat4 signmsk(const vfloat4& a) { return _mm_and_ps(a,_mm_castsi128_ps(_mm_set1_epi32(0x80000000))); } -#endif - + __forceinline vfloat4 rcp(const vfloat4& a) { -#if defined(__aarch64__) -#if defined(BUILD_IOS) - return vfloat4(vdivq_f32(vdupq_n_f32(1.0f),a.v)); -#else //BUILD_IOS - __m128 reciprocal = _mm_rcp_ps(a); - reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal); - reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal); - // +1 round since NEON's reciprocal estimate instruction has less accuracy than SSE2's rcp. - reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal); - return (const vfloat4)reciprocal; -#endif // BUILD_IOS -#else - #if defined(__AVX512VL__) const vfloat4 r = _mm_rcp14_ps(a); #else @@ -316,45 +279,31 @@ namespace embree #else return _mm_mul_ps(r,_mm_sub_ps(vfloat4(2.0f), _mm_mul_ps(r, a))); #endif - -#endif //defined(__aarch64__) } __forceinline vfloat4 sqr (const vfloat4& a) { return _mm_mul_ps(a,a); } __forceinline vfloat4 sqrt(const vfloat4& a) { return _mm_sqrt_ps(a); } __forceinline vfloat4 rsqrt(const vfloat4& a) { -#if defined(__aarch64__) - vfloat4 r = _mm_rsqrt_ps(a); - r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a, r), r)); - r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a, r), r)); - r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a, r), r)); - return r; -#else - #if defined(__AVX512VL__) - const vfloat4 r = _mm_rsqrt14_ps(a); + vfloat4 r = _mm_rsqrt14_ps(a); #else - const vfloat4 r = _mm_rsqrt_ps(a); + vfloat4 r = _mm_rsqrt_ps(a); #endif -#if defined(__AVX2__) - return _mm_fmadd_ps(_mm_set1_ps(1.5f), r, - _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); +#if defined(__ARM_NEON) + r = _mm_fmadd_ps(_mm_set1_ps(1.5f), r, _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); + r = _mm_fmadd_ps(_mm_set1_ps(1.5f), r, _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); +#elif defined(__AVX2__) + r = _mm_fmadd_ps(_mm_set1_ps(1.5f), r, _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); #else - return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f), r), - _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); -#endif - + r = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f), r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r))); #endif + return r; } __forceinline vboolf4 isnan(const vfloat4& a) { -#if defined(__aarch64__) - const vfloat4 b = _mm_and_ps(a, vreinterpretq_f32_u32(v0x7fffffff)); -#else const vfloat4 b = _mm_and_ps(a, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))); -#endif #if defined(__AVX512VL__) return _mm_cmp_epi32_mask(_mm_castps_si128(b), _mm_set1_epi32(0x7f800000), _MM_CMPINT_GT); #else @@ -395,8 +344,7 @@ namespace embree __forceinline vfloat4 max(const vfloat4& a, float b) { return _mm_max_ps(a,vfloat4(b)); } __forceinline vfloat4 max(float a, const vfloat4& b) { return _mm_max_ps(vfloat4(a),b); } -#if defined(__SSE4_1__) || defined(__aarch64__) - +#if defined(__SSE4_1__) __forceinline vfloat4 mini(const vfloat4& a, const vfloat4& b) { const vint4 ai = _mm_castps_si128(a); const vint4 bi = _mm_castps_si128(b); @@ -438,31 +386,17 @@ namespace embree /// Ternary Operators //////////////////////////////////////////////////////////////////////////////// -#if defined(__AVX2__) +#if defined(__AVX2__) || defined(__ARM_NEON) __forceinline vfloat4 madd (const vfloat4& a, const vfloat4& b, const vfloat4& c) { return _mm_fmadd_ps(a,b,c); } __forceinline vfloat4 msub (const vfloat4& a, const vfloat4& b, const vfloat4& c) { return _mm_fmsub_ps(a,b,c); } __forceinline vfloat4 nmadd(const vfloat4& a, const vfloat4& b, const vfloat4& c) { return _mm_fnmadd_ps(a,b,c); } __forceinline vfloat4 nmsub(const vfloat4& a, const vfloat4& b, const vfloat4& c) { return _mm_fnmsub_ps(a,b,c); } #else - -#if defined(__aarch64__) - __forceinline vfloat4 madd (const vfloat4& a, const vfloat4& b, const vfloat4& c) { - return _mm_madd_ps(a, b, c); //a*b+c; - } - __forceinline vfloat4 nmadd(const vfloat4& a, const vfloat4& b, const vfloat4& c) { - return _mm_msub_ps(a, b, c); //-a*b+c; - } - __forceinline vfloat4 nmsub(const vfloat4& a, const vfloat4& b, const vfloat4& c) { - return vnegq_f32(vfmaq_f32(c,a, b)); - } -#else __forceinline vfloat4 madd (const vfloat4& a, const vfloat4& b, const vfloat4& c) { return a*b+c; } + __forceinline vfloat4 msub (const vfloat4& a, const vfloat4& b, const vfloat4& c) { return a*b-c; } __forceinline vfloat4 nmadd(const vfloat4& a, const vfloat4& b, const vfloat4& c) { return -a*b+c;} __forceinline vfloat4 nmsub(const vfloat4& a, const vfloat4& b, const vfloat4& c) { return -a*b-c; } #endif - __forceinline vfloat4 msub (const vfloat4& a, const vfloat4& b, const vfloat4& c) { return a*b-c; } - -#endif //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators @@ -495,13 +429,8 @@ namespace embree __forceinline vboolf4 operator ==(const vfloat4& a, const vfloat4& b) { return _mm_cmpeq_ps (a, b); } __forceinline vboolf4 operator !=(const vfloat4& a, const vfloat4& b) { return _mm_cmpneq_ps(a, b); } __forceinline vboolf4 operator < (const vfloat4& a, const vfloat4& b) { return _mm_cmplt_ps (a, b); } -#if defined(__aarch64__) - __forceinline vboolf4 operator >=(const vfloat4& a, const vfloat4& b) { return _mm_cmpge_ps (a, b); } - __forceinline vboolf4 operator > (const vfloat4& a, const vfloat4& b) { return _mm_cmpgt_ps (a, b); } -#else __forceinline vboolf4 operator >=(const vfloat4& a, const vfloat4& b) { return _mm_cmpnlt_ps(a, b); } __forceinline vboolf4 operator > (const vfloat4& a, const vfloat4& b) { return _mm_cmpnle_ps(a, b); } -#endif __forceinline vboolf4 operator <=(const vfloat4& a, const vfloat4& b) { return _mm_cmple_ps (a, b); } #endif @@ -513,7 +442,7 @@ namespace embree __forceinline vboolf4 operator < (const vfloat4& a, float b) { return a < vfloat4(b); } __forceinline vboolf4 operator < (float a, const vfloat4& b) { return vfloat4(a) < b; } - + __forceinline vboolf4 operator >=(const vfloat4& a, float b) { return a >= vfloat4(b); } __forceinline vboolf4 operator >=(float a, const vfloat4& b) { return vfloat4(a) >= b; } @@ -549,68 +478,17 @@ namespace embree template<int mask> __forceinline vfloat4 select(const vfloat4& t, const vfloat4& f) { -#if defined(__SSE4_1__) +#if defined(__SSE4_1__) return _mm_blend_ps(f, t, mask); #else return select(vboolf4(mask), t, f); #endif } - -#if defined(__aarch64__) - template<> __forceinline vfloat4 select<0>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(vzero)); - } - template<> __forceinline vfloat4 select<1>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(v000F)); - } - template<> __forceinline vfloat4 select<2>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(v00F0)); - } - template<> __forceinline vfloat4 select<3>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(v00FF)); - } - template<> __forceinline vfloat4 select<4>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(v0F00)); - } - template<> __forceinline vfloat4 select<5>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(v0F0F)); - } - template<> __forceinline vfloat4 select<6>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(v0FF0)); - } - template<> __forceinline vfloat4 select<7>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(v0FFF)); - } - template<> __forceinline vfloat4 select<8>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(vF000)); - } - template<> __forceinline vfloat4 select<9>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(vF00F)); - } - template<> __forceinline vfloat4 select<10>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(vF0F0)); - } - template<> __forceinline vfloat4 select<11>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(vF0FF)); - } - template<> __forceinline vfloat4 select<12>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(vFF00)); - } - template<> __forceinline vfloat4 select<13>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(vFF0F)); - } - template<> __forceinline vfloat4 select<14>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(vFFF0)); - } - template<> __forceinline vfloat4 select<15>(const vfloat4& t, const vfloat4& f) { - return _mm_blendv_ps(f, t, vreinterpretq_f32_u32(vFFFF)); - } -#endif - + __forceinline vfloat4 lerp(const vfloat4& a, const vfloat4& b, const vfloat4& t) { return madd(t,b-a,a); } - + __forceinline bool isvalid(const vfloat4& v) { return all((v > vfloat4(-FLT_LARGE)) & (v < vfloat4(+FLT_LARGE))); } @@ -622,21 +500,21 @@ namespace embree __forceinline bool is_finite(const vboolf4& valid, const vfloat4& a) { return all(valid, (a >= vfloat4(-FLT_MAX)) & (a <= vfloat4(+FLT_MAX))); } - + //////////////////////////////////////////////////////////////////////////////// /// Rounding Functions //////////////////////////////////////////////////////////////////////////////// #if defined(__aarch64__) - __forceinline vfloat4 floor(const vfloat4& a) { return vrndmq_f32(a.v); } // towards -inf - __forceinline vfloat4 ceil (const vfloat4& a) { return vrndpq_f32(a.v); } // toward +inf - __forceinline vfloat4 trunc(const vfloat4& a) { return vrndq_f32(a.v); } // towards 0 - __forceinline vfloat4 round(const vfloat4& a) { return vrndnq_f32(a.v); } // to nearest, ties to even. NOTE(LTE): arm clang uses vrndnq, old gcc uses vrndqn? + __forceinline vfloat4 floor(const vfloat4& a) { return vrndmq_f32(a.v); } + __forceinline vfloat4 ceil (const vfloat4& a) { return vrndpq_f32(a.v); } + __forceinline vfloat4 trunc(const vfloat4& a) { return vrndq_f32(a.v); } + __forceinline vfloat4 round(const vfloat4& a) { return vrndnq_f32(a.v); } #elif defined (__SSE4_1__) - __forceinline vfloat4 floor(const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); } - __forceinline vfloat4 ceil (const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); } - __forceinline vfloat4 trunc(const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_ZERO ); } - __forceinline vfloat4 round(const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); } // (even) https://www.felixcloutier.com/x86/roundpd + __forceinline vfloat4 floor(const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); } + __forceinline vfloat4 ceil (const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); } + __forceinline vfloat4 trunc(const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_ZERO ); } + __forceinline vfloat4 round(const vfloat4& a) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); } #else __forceinline vfloat4 floor(const vfloat4& a) { return vfloat4(floorf(a[0]),floorf(a[1]),floorf(a[2]),floorf(a[3])); } __forceinline vfloat4 ceil (const vfloat4& a) { return vfloat4(ceilf (a[0]),ceilf (a[1]),ceilf (a[2]),ceilf (a[3])); } @@ -646,9 +524,7 @@ namespace embree __forceinline vfloat4 frac(const vfloat4& a) { return a-floor(a); } __forceinline vint4 floori(const vfloat4& a) { -#if defined(__aarch64__) - return vcvtq_s32_f32(floor(a)); -#elif defined(__SSE4_1__) +#if defined(__SSE4_1__) return vint4(floor(a)); #else return vint4(a-vfloat4(0.5f)); @@ -662,16 +538,6 @@ namespace embree __forceinline vfloat4 unpacklo(const vfloat4& a, const vfloat4& b) { return _mm_unpacklo_ps(a, b); } __forceinline vfloat4 unpackhi(const vfloat4& a, const vfloat4& b) { return _mm_unpackhi_ps(a, b); } -#if defined(__aarch64__) - template<int i0, int i1, int i2, int i3> - __forceinline vfloat4 shuffle(const vfloat4& v) { - return vreinterpretq_f32_u8(vqtbl1q_u8( (uint8x16_t)v.v, _MN_SHUFFLE(i0, i1, i2, i3))); - } - template<int i0, int i1, int i2, int i3> - __forceinline vfloat4 shuffle(const vfloat4& a, const vfloat4& b) { - return vreinterpretq_f32_u8(vqtbl2q_u8( (uint8x16x2_t){(uint8x16_t)a.v, (uint8x16_t)b.v}, _MF_SHUFFLE(i0, i1, i2, i3))); - } -#else template<int i0, int i1, int i2, int i3> __forceinline vfloat4 shuffle(const vfloat4& v) { return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(v), _MM_SHUFFLE(i3, i2, i1, i0))); @@ -681,19 +547,8 @@ namespace embree __forceinline vfloat4 shuffle(const vfloat4& a, const vfloat4& b) { return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); } -#endif - -#if defined (__SSSE3__) - __forceinline vfloat4 shuffle8(const vfloat4& a, const vint4& shuf) { - return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf)); - } -#endif -#if defined(__aarch64__) - template<> __forceinline vfloat4 shuffle<0, 0, 2, 2>(const vfloat4& v) { return __m128(vqtbl1q_u8( uint8x16_t(v.v), v0022 )); } - template<> __forceinline vfloat4 shuffle<1, 1, 3, 3>(const vfloat4& v) { return __m128(vqtbl1q_u8( uint8x16_t(v.v), v1133)); } - template<> __forceinline vfloat4 shuffle<0, 1, 0, 1>(const vfloat4& v) { return __m128(vqtbl1q_u8( uint8x16_t(v.v), v0101)); } -#elif defined(__SSE3__) +#if defined(__SSE3__) template<> __forceinline vfloat4 shuffle<0, 0, 2, 2>(const vfloat4& v) { return _mm_moveldup_ps(v); } template<> __forceinline vfloat4 shuffle<1, 1, 3, 3>(const vfloat4& v) { return _mm_movehdup_ps(v); } template<> __forceinline vfloat4 shuffle<0, 1, 0, 1>(const vfloat4& v) { return _mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(v))); } @@ -704,56 +559,10 @@ namespace embree return shuffle<i,i,i,i>(v); } -#if defined(__aarch64__) - template<int i> __forceinline float extract(const vfloat4& a); - template<> __forceinline float extract<0>(const vfloat4& b) { - return b[0]; - } - template<> __forceinline float extract<1>(const vfloat4& b) { - return b[1]; - } - template<> __forceinline float extract<2>(const vfloat4& b) { - return b[2]; - } - template<> __forceinline float extract<3>(const vfloat4& b) { - return b[3]; - } -#elif defined (__SSE4_1__) && !defined(__GNUC__) - template<int i> __forceinline float extract(const vfloat4& a) { return _mm_cvtss_f32(_mm_extract_ps(a,i)); } - template<> __forceinline float extract<0>(const vfloat4& a) { return _mm_cvtss_f32(a); } -#else - template<int i> __forceinline float extract(const vfloat4& a) { return _mm_cvtss_f32(shuffle<i,i,i,i>(a)); } - template<> __forceinline float extract<0>(const vfloat4& a) { return _mm_cvtss_f32(a); } -#endif - + template<int i> __forceinline float extract (const vfloat4& a) { return _mm_cvtss_f32(shuffle<i>(a)); } + template<> __forceinline float extract<0>(const vfloat4& a) { return _mm_cvtss_f32(a); } -#if defined(__aarch64__) - template<int dst> __forceinline vfloat4 insert(const vfloat4& a, float b); - template<> __forceinline vfloat4 insert<0>(const vfloat4& a, float b) - { - vfloat4 c = a; - c[0] = b; - return c; - } - template<> __forceinline vfloat4 insert<1>(const vfloat4& a, float b) - { - vfloat4 c = a; - c[1] = b; - return c; - } - template<> __forceinline vfloat4 insert<2>(const vfloat4& a, float b) - { - vfloat4 c = a; - c[2] = b; - return c; - } - template<> __forceinline vfloat4 insert<3>(const vfloat4& a, float b) - { - vfloat4 c = a; - c[3] = b; - return c; - } -#elif defined (__SSE4_1__) +#if defined (__SSE4_1__) template<int dst, int src, int clr> __forceinline vfloat4 insert(const vfloat4& a, const vfloat4& b) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); } template<int dst, int src> __forceinline vfloat4 insert(const vfloat4& a, const vfloat4& b) { return insert<dst, src, 0>(a, b); } template<int dst> __forceinline vfloat4 insert(const vfloat4& a, const float b) { return insert<dst, 0>(a, _mm_set_ss(b)); } @@ -762,19 +571,10 @@ namespace embree template<int dst> __forceinline vfloat4 insert(const vfloat4& a, float b) { vfloat4 c = a; c[dst&3] = b; return c; } #endif -#if defined(__aarch64__) - __forceinline float toScalar(const vfloat4& v) { - return v[0]; - } -#else __forceinline float toScalar(const vfloat4& v) { return _mm_cvtss_f32(v); } -#endif - __forceinline vfloat4 broadcast4f(const vfloat4& a, size_t k) { - return vfloat4::broadcast(&a[k]); - } __forceinline vfloat4 shift_right_1(const vfloat4& x) { - return _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(x), 4)); + return _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(x), 4)); } #if defined (__AVX2__) @@ -790,7 +590,7 @@ namespace embree template<int i> __forceinline vfloat4 align_shift_right(const vfloat4& a, const vfloat4& b) { return _mm_castsi128_ps(_mm_alignr_epi32(_mm_castps_si128(a), _mm_castps_si128(b), i)); - } + } #endif @@ -864,39 +664,28 @@ namespace embree //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// -#if defined(__aarch64__) - __forceinline vfloat4 vreduce_min(const vfloat4& v) { float h = vminvq_f32(v); return vdupq_n_f32(h); } - __forceinline vfloat4 vreduce_max(const vfloat4& v) { float h = vmaxvq_f32(v); return vdupq_n_f32(h); } - __forceinline vfloat4 vreduce_add(const vfloat4& v) { float h = vaddvq_f32(v); return vdupq_n_f32(h); } -#else + __forceinline vfloat4 vreduce_min(const vfloat4& v) { vfloat4 h = min(shuffle<1,0,3,2>(v),v); return min(shuffle<2,3,0,1>(h),h); } __forceinline vfloat4 vreduce_max(const vfloat4& v) { vfloat4 h = max(shuffle<1,0,3,2>(v),v); return max(shuffle<2,3,0,1>(h),h); } __forceinline vfloat4 vreduce_add(const vfloat4& v) { vfloat4 h = shuffle<1,0,3,2>(v) + v ; return shuffle<2,3,0,1>(h) + h ; } -#endif -#if defined(__aarch64__) - __forceinline float reduce_min(const vfloat4& v) { return vminvq_f32(v); } - __forceinline float reduce_max(const vfloat4& v) { return vmaxvq_f32(v); } - __forceinline float reduce_add(const vfloat4& v) { return vaddvq_f32(v); } -#else __forceinline float reduce_min(const vfloat4& v) { return _mm_cvtss_f32(vreduce_min(v)); } __forceinline float reduce_max(const vfloat4& v) { return _mm_cvtss_f32(vreduce_max(v)); } __forceinline float reduce_add(const vfloat4& v) { return _mm_cvtss_f32(vreduce_add(v)); } -#endif - __forceinline size_t select_min(const vboolf4& valid, const vfloat4& v) - { - const vfloat4 a = select(valid,v,vfloat4(pos_inf)); + __forceinline size_t select_min(const vboolf4& valid, const vfloat4& v) + { + const vfloat4 a = select(valid,v,vfloat4(pos_inf)); const vbool4 valid_min = valid & (a == vreduce_min(a)); - return bsf(movemask(any(valid_min) ? valid_min : valid)); + return bsf(movemask(any(valid_min) ? valid_min : valid)); } - __forceinline size_t select_max(const vboolf4& valid, const vfloat4& v) - { - const vfloat4 a = select(valid,v,vfloat4(neg_inf)); + __forceinline size_t select_max(const vboolf4& valid, const vfloat4& v) + { + const vfloat4 a = select(valid,v,vfloat4(neg_inf)); const vbool4 valid_max = valid & (a == vreduce_max(a)); - return bsf(movemask(any(valid_max) ? valid_max : valid)); + return bsf(movemask(any(valid_max) ? valid_max : valid)); } - + //////////////////////////////////////////////////////////////////////////////// /// Euclidian Space Operators //////////////////////////////////////////////////////////////////////////////// @@ -911,7 +700,7 @@ namespace embree const vfloat4 b0 = shuffle<1,2,0,3>(b); const vfloat4 a1 = shuffle<1,2,0,3>(a); const vfloat4 b1 = b; - return shuffle<1,2,0,3>(prod_diff(a0,b0,a1,b1)); + return shuffle<1,2,0,3>(msub(a0,b0,a1*b1)); } //////////////////////////////////////////////////////////////////////////////// @@ -923,3 +712,11 @@ namespace embree } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vfloat8_avx.h b/thirdparty/embree/common/simd/vfloat8_avx.h index 3c7e4a8cdc..13446454e8 100644 --- a/thirdparty/embree-aarch64/common/simd/vfloat8_avx.h +++ b/thirdparty/embree/common/simd/vfloat8_avx.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 8-wide AVX float type */ @@ -33,7 +41,7 @@ namespace embree __forceinline explicit vfloat(const vfloat4& a) : v(_mm256_insertf128_ps(_mm256_castps128_ps256(a),a,1)) {} __forceinline vfloat(const vfloat4& a, const vfloat4& b) : v(_mm256_insertf128_ps(_mm256_castps128_ps256(a),b,1)) {} - __forceinline explicit vfloat(const int8_t* a) : v(_mm256_loadu_ps((const float*)a)) {} + __forceinline explicit vfloat(const char* a) : v(_mm256_loadu_ps((const float*)a)) {} __forceinline vfloat(float a) : v(_mm256_set1_ps(a)) {} __forceinline vfloat(float a, float b) : v(_mm256_set_ps(b, a, b, a, b, a, b, a)) {} __forceinline vfloat(float a, float b, float c, float d) : v(_mm256_set_ps(d, c, b, a, d, c, b, a)) {} @@ -61,21 +69,7 @@ namespace embree return _mm256_broadcast_ss((float*)a); } - static __forceinline vfloat8 broadcast2(const float* a, const float* b) { -#if defined(__INTEL_COMPILER) - const vfloat8 v0 = _mm256_broadcast_ss(a); - const vfloat8 v1 = _mm256_broadcast_ss(b); - return _mm256_blend_ps(v1, v0, 0xf); -#else - return _mm256_set_ps(*b,*b,*b,*b,*a,*a,*a,*a); -#endif - } - - static __forceinline vfloat8 broadcast4f(const vfloat4* ptr) { - return _mm256_broadcast_ps((__m128*)ptr); - } - - static __forceinline vfloat8 load(const int8_t* ptr) { + static __forceinline vfloat8 load(const char* ptr) { #if defined(__AVX2__) return _mm256_cvtepi32_ps(_mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)ptr))); #else @@ -83,7 +77,7 @@ namespace embree #endif } - static __forceinline vfloat8 load(const uint8_t* ptr) { + static __forceinline vfloat8 load(const unsigned char* ptr) { #if defined(__AVX2__) return _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(_mm_loadu_si128((__m128i*)ptr))); #else @@ -107,24 +101,11 @@ namespace embree #if defined(__AVX512VL__) - static __forceinline vfloat8 compact(const vboolf8& mask, vfloat8 &v) { - return _mm256_mask_compress_ps(v, mask, v); - } - static __forceinline vfloat8 compact(const vboolf8& mask, vfloat8 &a, const vfloat8& b) { - return _mm256_mask_compress_ps(a, mask, b); - } - static __forceinline vfloat8 load (const vboolf8& mask, const void* ptr) { return _mm256_mask_load_ps (_mm256_setzero_ps(),mask,(float*)ptr); } static __forceinline vfloat8 loadu(const vboolf8& mask, const void* ptr) { return _mm256_mask_loadu_ps(_mm256_setzero_ps(),mask,(float*)ptr); } static __forceinline void store (const vboolf8& mask, void* ptr, const vfloat8& v) { _mm256_mask_store_ps ((float*)ptr,mask,v); } static __forceinline void storeu(const vboolf8& mask, void* ptr, const vfloat8& v) { _mm256_mask_storeu_ps((float*)ptr,mask,v); } -#elif defined(__aarch64__) - static __forceinline vfloat8 load (const vboolf8& mask, const void* ptr) { return _mm256_maskload_ps((float*)ptr,(__m256i)mask.v); } - static __forceinline vfloat8 loadu(const vboolf8& mask, const void* ptr) { return _mm256_maskload_ps((float*)ptr,(__m256i)mask.v); } - - static __forceinline void store (const vboolf8& mask, void* ptr, const vfloat8& v) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask.v,v); } - static __forceinline void storeu(const vboolf8& mask, void* ptr, const vfloat8& v) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask.v,v); } #else static __forceinline vfloat8 load (const vboolf8& mask, const void* ptr) { return _mm256_maskload_ps((float*)ptr,(__m256i)mask); } static __forceinline vfloat8 loadu(const vboolf8& mask, const void* ptr) { return _mm256_maskload_ps((float*)ptr,(__m256i)mask); } @@ -145,18 +126,18 @@ namespace embree template<int scale = 4> static __forceinline vfloat8 gather(const float* ptr, const vint8& index) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return _mm256_i32gather_ps(ptr, index ,scale); #else return vfloat8( - *(float*)(((int8_t*)ptr)+scale*index[0]), - *(float*)(((int8_t*)ptr)+scale*index[1]), - *(float*)(((int8_t*)ptr)+scale*index[2]), - *(float*)(((int8_t*)ptr)+scale*index[3]), - *(float*)(((int8_t*)ptr)+scale*index[4]), - *(float*)(((int8_t*)ptr)+scale*index[5]), - *(float*)(((int8_t*)ptr)+scale*index[6]), - *(float*)(((int8_t*)ptr)+scale*index[7])); + *(float*)(((char*)ptr)+scale*index[0]), + *(float*)(((char*)ptr)+scale*index[1]), + *(float*)(((char*)ptr)+scale*index[2]), + *(float*)(((char*)ptr)+scale*index[3]), + *(float*)(((char*)ptr)+scale*index[4]), + *(float*)(((char*)ptr)+scale*index[5]), + *(float*)(((char*)ptr)+scale*index[6]), + *(float*)(((char*)ptr)+scale*index[7])); #endif } @@ -165,17 +146,17 @@ namespace embree vfloat8 r = zero; #if defined(__AVX512VL__) return _mm256_mmask_i32gather_ps(r, mask, index, ptr, scale); -#elif defined(__AVX2__) && !defined(__aarch64__) +#elif defined(__AVX2__) return _mm256_mask_i32gather_ps(r, ptr, index, mask, scale); #else - if (likely(mask[0])) r[0] = *(float*)(((int8_t*)ptr)+scale*index[0]); - if (likely(mask[1])) r[1] = *(float*)(((int8_t*)ptr)+scale*index[1]); - if (likely(mask[2])) r[2] = *(float*)(((int8_t*)ptr)+scale*index[2]); - if (likely(mask[3])) r[3] = *(float*)(((int8_t*)ptr)+scale*index[3]); - if (likely(mask[4])) r[4] = *(float*)(((int8_t*)ptr)+scale*index[4]); - if (likely(mask[5])) r[5] = *(float*)(((int8_t*)ptr)+scale*index[5]); - if (likely(mask[6])) r[6] = *(float*)(((int8_t*)ptr)+scale*index[6]); - if (likely(mask[7])) r[7] = *(float*)(((int8_t*)ptr)+scale*index[7]); + if (likely(mask[0])) r[0] = *(float*)(((char*)ptr)+scale*index[0]); + if (likely(mask[1])) r[1] = *(float*)(((char*)ptr)+scale*index[1]); + if (likely(mask[2])) r[2] = *(float*)(((char*)ptr)+scale*index[2]); + if (likely(mask[3])) r[3] = *(float*)(((char*)ptr)+scale*index[3]); + if (likely(mask[4])) r[4] = *(float*)(((char*)ptr)+scale*index[4]); + if (likely(mask[5])) r[5] = *(float*)(((char*)ptr)+scale*index[5]); + if (likely(mask[6])) r[6] = *(float*)(((char*)ptr)+scale*index[6]); + if (likely(mask[7])) r[7] = *(float*)(((char*)ptr)+scale*index[7]); return r; #endif } @@ -186,14 +167,14 @@ namespace embree #if defined(__AVX512VL__) _mm256_i32scatter_ps((float*)ptr, ofs, v, scale); #else - *(float*)(((int8_t*)ptr)+scale*ofs[0]) = v[0]; - *(float*)(((int8_t*)ptr)+scale*ofs[1]) = v[1]; - *(float*)(((int8_t*)ptr)+scale*ofs[2]) = v[2]; - *(float*)(((int8_t*)ptr)+scale*ofs[3]) = v[3]; - *(float*)(((int8_t*)ptr)+scale*ofs[4]) = v[4]; - *(float*)(((int8_t*)ptr)+scale*ofs[5]) = v[5]; - *(float*)(((int8_t*)ptr)+scale*ofs[6]) = v[6]; - *(float*)(((int8_t*)ptr)+scale*ofs[7]) = v[7]; + *(float*)(((char*)ptr)+scale*ofs[0]) = v[0]; + *(float*)(((char*)ptr)+scale*ofs[1]) = v[1]; + *(float*)(((char*)ptr)+scale*ofs[2]) = v[2]; + *(float*)(((char*)ptr)+scale*ofs[3]) = v[3]; + *(float*)(((char*)ptr)+scale*ofs[4]) = v[4]; + *(float*)(((char*)ptr)+scale*ofs[5]) = v[5]; + *(float*)(((char*)ptr)+scale*ofs[6]) = v[6]; + *(float*)(((char*)ptr)+scale*ofs[7]) = v[7]; #endif } @@ -203,24 +184,17 @@ namespace embree #if defined(__AVX512VL__) _mm256_mask_i32scatter_ps((float*)ptr, mask, ofs, v, scale); #else - if (likely(mask[0])) *(float*)(((int8_t*)ptr)+scale*ofs[0]) = v[0]; - if (likely(mask[1])) *(float*)(((int8_t*)ptr)+scale*ofs[1]) = v[1]; - if (likely(mask[2])) *(float*)(((int8_t*)ptr)+scale*ofs[2]) = v[2]; - if (likely(mask[3])) *(float*)(((int8_t*)ptr)+scale*ofs[3]) = v[3]; - if (likely(mask[4])) *(float*)(((int8_t*)ptr)+scale*ofs[4]) = v[4]; - if (likely(mask[5])) *(float*)(((int8_t*)ptr)+scale*ofs[5]) = v[5]; - if (likely(mask[6])) *(float*)(((int8_t*)ptr)+scale*ofs[6]) = v[6]; - if (likely(mask[7])) *(float*)(((int8_t*)ptr)+scale*ofs[7]) = v[7]; + if (likely(mask[0])) *(float*)(((char*)ptr)+scale*ofs[0]) = v[0]; + if (likely(mask[1])) *(float*)(((char*)ptr)+scale*ofs[1]) = v[1]; + if (likely(mask[2])) *(float*)(((char*)ptr)+scale*ofs[2]) = v[2]; + if (likely(mask[3])) *(float*)(((char*)ptr)+scale*ofs[3]) = v[3]; + if (likely(mask[4])) *(float*)(((char*)ptr)+scale*ofs[4]) = v[4]; + if (likely(mask[5])) *(float*)(((char*)ptr)+scale*ofs[5]) = v[5]; + if (likely(mask[6])) *(float*)(((char*)ptr)+scale*ofs[6]) = v[6]; + if (likely(mask[7])) *(float*)(((char*)ptr)+scale*ofs[7]) = v[7]; #endif } - static __forceinline void store(const vboolf8& mask, int8_t* ptr, const vint8& ofs, const vfloat8& v) { - scatter<1>(mask,ptr,ofs,v); - } - static __forceinline void store(const vboolf8& mask, float* ptr, const vint8& ofs, const vfloat8& v) { - scatter<4>(mask,ptr,ofs,v); - } - //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// @@ -241,60 +215,27 @@ namespace embree __forceinline vfloat8 toFloat(const vint8& a) { return vfloat8(a); } __forceinline vfloat8 operator +(const vfloat8& a) { return a; } -#if !defined(__aarch64__) __forceinline vfloat8 operator -(const vfloat8& a) { const __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)); return _mm256_xor_ps(a, mask); } -#else - __forceinline vfloat8 operator -(const vfloat8& a) { - __m256 res; - res.lo = vnegq_f32(a.v.lo); - res.hi = vnegq_f32(a.v.hi); - return res; -} -#endif - -#if !defined(__aarch64__) -__forceinline vfloat8 abs(const vfloat8& a) { - const __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)); - return _mm256_and_ps(a, mask); -} -#else -__forceinline vfloat8 abs(const vfloat8& a) { - __m256 res; - res.lo = vabsq_f32(a.v.lo); - res.hi = vabsq_f32(a.v.hi); - return res; -} -#endif - -#if !defined(__aarch64__) + __forceinline vfloat8 abs(const vfloat8& a) { + const __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)); + return _mm256_and_ps(a, mask); + } __forceinline vfloat8 sign (const vfloat8& a) { return _mm256_blendv_ps(vfloat8(one), -vfloat8(one), _mm256_cmp_ps(a, vfloat8(zero), _CMP_NGE_UQ)); } -#else - __forceinline vfloat8 sign (const vfloat8& a) { return _mm256_blendv_ps(vfloat8(one), -vfloat8(one), _mm256_cmplt_ps(a, vfloat8(zero))); } -#endif __forceinline vfloat8 signmsk(const vfloat8& a) { return _mm256_and_ps(a,_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000))); } static __forceinline vfloat8 rcp(const vfloat8& a) { -#if defined(BUILD_IOS) && defined(__aarch64__) - // ios devices are faster doing full divide, no need for NR fixup - vfloat8 ret; - const float32x4_t one = vdupq_n_f32(1.0f); - ret.v.lo = vdivq_f32(one, a.v.lo); - ret.v.hi = vdivq_f32(one, a.v.hi); - return ret; -#endif - #if defined(__AVX512VL__) const vfloat8 r = _mm256_rcp14_ps(a); #else const vfloat8 r = _mm256_rcp_ps(a); #endif - -#if defined(__AVX2__) //&& !defined(aarch64) + +#if defined(__AVX2__) return _mm256_mul_ps(r, _mm256_fnmadd_ps(r, a, vfloat8(2.0f))); #else return _mm256_mul_ps(r, _mm256_sub_ps(vfloat8(2.0f), _mm256_mul_ps(r, a))); @@ -443,29 +384,17 @@ __forceinline vfloat8 abs(const vfloat8& a) { static __forceinline vfloat8 select(const vboolf8& m, const vfloat8& t, const vfloat8& f) { return _mm256_mask_blend_ps(m, f, t); } -#elif !defined(__aarch64__) - __forceinline vboolf8 operator ==(const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_EQ_OQ); } - __forceinline vboolf8 operator !=(const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_NEQ_UQ); } - __forceinline vboolf8 operator < (const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_LT_OS); } - __forceinline vboolf8 operator >=(const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_NLT_US); } - __forceinline vboolf8 operator > (const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_NLE_US); } - __forceinline vboolf8 operator <=(const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_LE_OS); } - - __forceinline vfloat8 select(const vboolf8& m, const vfloat8& t, const vfloat8& f) { - return _mm256_blendv_ps(f, t, m); - } #else - __forceinline vboolf8 operator ==(const vfloat8& a, const vfloat8& b) { return _mm256_cmpeq_ps(a, b); } - __forceinline vboolf8 operator !=(const vfloat8& a, const vfloat8& b) { return _mm256_cmpneq_ps(a, b); } - __forceinline vboolf8 operator < (const vfloat8& a, const vfloat8& b) { return _mm256_cmplt_ps(a, b); } - __forceinline vboolf8 operator >=(const vfloat8& a, const vfloat8& b) { return _mm256_cmpge_ps(a, b); } - __forceinline vboolf8 operator > (const vfloat8& a, const vfloat8& b) { return _mm256_cmpgt_ps(a, b); } - __forceinline vboolf8 operator <=(const vfloat8& a, const vfloat8& b) { return _mm256_cmple_ps(a, b); } + static __forceinline vboolf8 operator ==(const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_EQ_OQ); } + static __forceinline vboolf8 operator !=(const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_NEQ_UQ); } + static __forceinline vboolf8 operator < (const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_LT_OS); } + static __forceinline vboolf8 operator >=(const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_NLT_US); } + static __forceinline vboolf8 operator > (const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_NLE_US); } + static __forceinline vboolf8 operator <=(const vfloat8& a, const vfloat8& b) { return _mm256_cmp_ps(a, b, _CMP_LE_OS); } - __forceinline vfloat8 select(const vboolf8& m, const vfloat8& t, const vfloat8& f) { - return _mm256_blendv_ps(f, t, m); + static __forceinline vfloat8 select(const vboolf8& m, const vfloat8& t, const vfloat8& f) { + return _mm256_blendv_ps(f, t, m); } - #endif template<int mask> @@ -534,17 +463,10 @@ __forceinline vfloat8 abs(const vfloat8& a) { /// Rounding Functions //////////////////////////////////////////////////////////////////////////////// -#if !defined(__aarch64__) __forceinline vfloat8 floor(const vfloat8& a) { return _mm256_round_ps(a, _MM_FROUND_TO_NEG_INF ); } __forceinline vfloat8 ceil (const vfloat8& a) { return _mm256_round_ps(a, _MM_FROUND_TO_POS_INF ); } __forceinline vfloat8 trunc(const vfloat8& a) { return _mm256_round_ps(a, _MM_FROUND_TO_ZERO ); } __forceinline vfloat8 round(const vfloat8& a) { return _mm256_round_ps(a, _MM_FROUND_TO_NEAREST_INT); } -#else - __forceinline vfloat8 floor(const vfloat8& a) { return _mm256_floor_ps(a); } - __forceinline vfloat8 ceil (const vfloat8& a) { return _mm256_ceil_ps(a); } -#endif - - __forceinline vfloat8 frac (const vfloat8& a) { return a-floor(a); } //////////////////////////////////////////////////////////////////////////////// @@ -579,11 +501,9 @@ __forceinline vfloat8 abs(const vfloat8& a) { return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); } -#if !defined(__aarch64__) template<> __forceinline vfloat8 shuffle<0, 0, 2, 2>(const vfloat8& v) { return _mm256_moveldup_ps(v); } template<> __forceinline vfloat8 shuffle<1, 1, 3, 3>(const vfloat8& v) { return _mm256_movehdup_ps(v); } template<> __forceinline vfloat8 shuffle<0, 1, 0, 1>(const vfloat8& v) { return _mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(v))); } -#endif __forceinline vfloat8 broadcast(const float* ptr) { return _mm256_broadcast_ss(ptr); } template<size_t i> __forceinline vfloat8 insert4(const vfloat8& a, const vfloat4& b) { return _mm256_insertf128_ps(a, b, i); } @@ -592,10 +512,8 @@ __forceinline vfloat8 abs(const vfloat8& a) { __forceinline float toScalar(const vfloat8& v) { return _mm_cvtss_f32(_mm256_castps256_ps128(v)); } - __forceinline vfloat8 assign(const vfloat4& a) { return _mm256_castps128_ps256(a); } - -#if defined (__AVX2__) && !defined(__aarch64__) - __forceinline vfloat8 permute(const vfloat8& a, const __m256i& index) { +#if defined (__AVX2__) + static __forceinline vfloat8 permute(const vfloat8& a, const __m256i& index) { return _mm256_permutevar8x32_ps(a, index); } #endif @@ -618,14 +536,6 @@ __forceinline vfloat8 abs(const vfloat8& a) { } #endif - __forceinline vfloat4 broadcast4f(const vfloat8& a, const size_t k) { - return vfloat4::broadcast(&a[k]); - } - - __forceinline vfloat8 broadcast8f(const vfloat8& a, const size_t k) { - return vfloat8::broadcast(&a[k]); - } - #if defined(__AVX512VL__) static __forceinline vfloat8 shift_right_1(const vfloat8& x) { return align_shift_right<1>(zero,x); @@ -699,7 +609,7 @@ __forceinline vfloat8 abs(const vfloat8& a) { //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// -#if !defined(__aarch64__) + __forceinline vfloat8 vreduce_min2(const vfloat8& v) { return min(v,shuffle<1,0,3,2>(v)); } __forceinline vfloat8 vreduce_min4(const vfloat8& v) { vfloat8 v1 = vreduce_min2(v); return min(v1,shuffle<2,3,0,1>(v1)); } __forceinline vfloat8 vreduce_min (const vfloat8& v) { vfloat8 v1 = vreduce_min4(v); return min(v1,shuffle4<1,0>(v1)); } @@ -715,14 +625,7 @@ __forceinline vfloat8 abs(const vfloat8& a) { __forceinline float reduce_min(const vfloat8& v) { return toScalar(vreduce_min(v)); } __forceinline float reduce_max(const vfloat8& v) { return toScalar(vreduce_max(v)); } __forceinline float reduce_add(const vfloat8& v) { return toScalar(vreduce_add(v)); } -#else - __forceinline float reduce_min(const vfloat8& v) { return vminvq_f32(_mm_min_ps(v.v.lo,v.v.hi)); } - __forceinline float reduce_max(const vfloat8& v) { return vmaxvq_f32(_mm_max_ps(v.v.lo,v.v.hi)); } - __forceinline vfloat8 vreduce_min(const vfloat8& v) { return vfloat8(reduce_min(v)); } - __forceinline vfloat8 vreduce_max(const vfloat8& v) { return vfloat8(reduce_max(v)); } - __forceinline float reduce_add(const vfloat8& v) { return vaddvq_f32(_mm_add_ps(v.v.lo,v.v.hi)); } -#endif __forceinline size_t select_min(const vboolf8& valid, const vfloat8& v) { const vfloat8 a = select(valid,v,vfloat8(pos_inf)); @@ -845,3 +748,11 @@ __forceinline vfloat8 abs(const vfloat8& a) { return cout << "<" << a[0] << ", " << a[1] << ", " << a[2] << ", " << a[3] << ", " << a[4] << ", " << a[5] << ", " << a[6] << ", " << a[7] << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vint16_avx512.h b/thirdparty/embree/common/simd/vint16_avx512.h index 3249bc2b45..3720c3c9d6 100644 --- a/thirdparty/embree-aarch64/common/simd/vint16_avx512.h +++ b/thirdparty/embree/common/simd/vint16_avx512.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 16-wide AVX-512 integer type */ @@ -90,10 +98,10 @@ namespace embree static __forceinline vint16 load (const void* addr) { return _mm512_load_si512((int*)addr); } - static __forceinline vint16 load(const uint8_t* ptr) { return _mm512_cvtepu8_epi32(_mm_load_si128((__m128i*)ptr)); } + static __forceinline vint16 load(const unsigned char* ptr) { return _mm512_cvtepu8_epi32(_mm_load_si128((__m128i*)ptr)); } static __forceinline vint16 load(const unsigned short* ptr) { return _mm512_cvtepu16_epi32(_mm256_load_si256((__m256i*)ptr)); } - static __forceinline vint16 loadu(const uint8_t* ptr) { return _mm512_cvtepu8_epi32(_mm_loadu_si128((__m128i*)ptr)); } + static __forceinline vint16 loadu(const unsigned char* ptr) { return _mm512_cvtepu8_epi32(_mm_loadu_si128((__m128i*)ptr)); } static __forceinline vint16 loadu(const unsigned short* ptr) { return _mm512_cvtepu16_epi32(_mm256_loadu_si256((__m256i*)ptr)); } static __forceinline vint16 loadu(const void* addr) { return _mm512_loadu_si512(addr); } @@ -109,20 +117,6 @@ namespace embree static __forceinline void store_nt(void* __restrict__ ptr, const vint16& a) { _mm512_stream_si512((__m512i*)ptr,a); } - /* pass by value to avoid compiler generating inefficient code */ - static __forceinline void storeu_compact(const vboolf16 mask, void* addr, vint16 reg) { - _mm512_mask_compressstoreu_epi32(addr,mask,reg); - } - - static __forceinline void storeu_compact_single(const vboolf16 mask, void* addr, vint16 reg) { - //_mm512_mask_compressstoreu_epi32(addr,mask,reg); - *(float*)addr = mm512_cvtss_f32(_mm512_mask_compress_ps(_mm512_castsi512_ps(reg),mask,_mm512_castsi512_ps(reg))); - } - - static __forceinline vint16 compact64bit(const vboolf16& mask, vint16 &v) { - return _mm512_mask_compress_epi64(v,mask,v); - } - static __forceinline vint16 compact(const vboolf16& mask, vint16 &v) { return _mm512_mask_compress_epi32(v,mask,v); } @@ -160,10 +154,6 @@ namespace embree _mm512_mask_i32scatter_epi32((int*)ptr,mask,index,v,scale); } - static __forceinline vint16 broadcast64bit(size_t v) { - return _mm512_set1_epi64(v); - } - //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// @@ -313,18 +303,6 @@ namespace embree return _mm512_mask_or_epi32(f,m,t,t); } - __forceinline void xchg(const vboolf16& m, vint16& a, vint16& b) { - const vint16 c = a; a = select(m,b,a); b = select(m,c,b); - } - - __forceinline vboolf16 test(const vboolf16& m, const vint16& a, const vint16& b) { - return _mm512_mask_test_epi32_mask(m,a,b); - } - - __forceinline vboolf16 test(const vint16& a, const vint16& b) { - return _mm512_test_epi32_mask(a,b); - } - //////////////////////////////////////////////////////////////////////////////// // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// @@ -363,10 +341,6 @@ namespace embree template<int i> __forceinline vint16 insert4(const vint16& a, const vint4& b) { return _mm512_inserti32x4(a, b, i); } - __forceinline size_t extract64bit(const vint16& v) { - return _mm_cvtsi128_si64(_mm512_castsi512_si128(v)); - } - template<int N, int i> vint<N> extractN(const vint16& v); @@ -488,3 +462,11 @@ namespace embree return cout; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vint4_sse2.h b/thirdparty/embree/common/simd/vint4_sse2.h index 96f105a7c5..9814d5c71c 100644 --- a/thirdparty/embree-aarch64/common/simd/vint4_sse2.h +++ b/thirdparty/embree/common/simd/vint4_sse2.h @@ -1,10 +1,18 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../math/math.h" +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 4-wide SSE integer type */ @@ -23,7 +31,7 @@ namespace embree //////////////////////////////////////////////////////////////////////////////// /// Constructors, Assignment & Cast Operators //////////////////////////////////////////////////////////////////////////////// - + __forceinline vint() {} __forceinline vint(const vint4& a) { v = a.v; } __forceinline vint4& operator =(const vint4& a) { v = a.v; return *this; } @@ -68,7 +76,7 @@ namespace embree static __forceinline void store (void* ptr, const vint4& v) { _mm_store_si128((__m128i*)ptr,v); } static __forceinline void storeu(void* ptr, const vint4& v) { _mm_storeu_si128((__m128i*)ptr,v); } - + #if defined(__AVX512VL__) static __forceinline vint4 compact(const vboolf4& mask, vint4 &v) { @@ -98,81 +106,61 @@ namespace embree #endif -#if defined(__aarch64__) - static __forceinline vint4 load(const uint8_t* ptr) { - return _mm_load4epu8_epi32(((__m128i*)ptr)); - } - static __forceinline vint4 loadu(const uint8_t* ptr) { - return _mm_load4epu8_epi32(((__m128i*)ptr)); - } -#elif defined(__SSE4_1__) - static __forceinline vint4 load(const uint8_t* ptr) { +#if defined(__SSE4_1__) + static __forceinline vint4 load(const unsigned char* ptr) { return _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } - static __forceinline vint4 loadu(const uint8_t* ptr) { + static __forceinline vint4 loadu(const unsigned char* ptr) { return _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } #else - static __forceinline vint4 load(const uint8_t* ptr) { + static __forceinline vint4 load(const unsigned char* ptr) { return vint4(ptr[0],ptr[1],ptr[2],ptr[3]); - } + } - static __forceinline vint4 loadu(const uint8_t* ptr) { + static __forceinline vint4 loadu(const unsigned char* ptr) { return vint4(ptr[0],ptr[1],ptr[2],ptr[3]); } #endif static __forceinline vint4 load(const unsigned short* ptr) { -#if defined(__aarch64__) - return __m128i(vmovl_u16(vld1_u16(ptr))); -#elif defined (__SSE4_1__) +#if defined (__SSE4_1__) return _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i*)ptr)); #else return vint4(ptr[0],ptr[1],ptr[2],ptr[3]); #endif - } + } - static __forceinline void store(uint8_t* ptr, const vint4& v) { -#if defined(__aarch64__) - int32x4_t x = v; - uint16x4_t y = vqmovn_u32(uint32x4_t(x)); - uint8x8_t z = vqmovn_u16(vcombine_u16(y, y)); - vst1_lane_u32((uint32_t *)ptr,uint32x2_t(z), 0); -#elif defined(__SSE4_1__) + static __forceinline void store(unsigned char* ptr, const vint4& v) { +#if defined(__SSE4_1__) __m128i x = v; x = _mm_packus_epi32(x, x); x = _mm_packus_epi16(x, x); *(int*)ptr = _mm_cvtsi128_si32(x); #else for (size_t i=0;i<4;i++) - ptr[i] = (uint8_t)v[i]; + ptr[i] = (unsigned char)v[i]; #endif } static __forceinline void store(unsigned short* ptr, const vint4& v) { -#if defined(__aarch64__) - uint32x4_t x = uint32x4_t(v.v); - uint16x4_t y = vqmovn_u32(x); - vst1_u16(ptr, y); -#else for (size_t i=0;i<4;i++) ptr[i] = (unsigned short)v[i]; -#endif } static __forceinline vint4 load_nt(void* ptr) { -#if defined(__aarch64__) || defined(__SSE4_1__) - return _mm_stream_load_si128((__m128i*)ptr); +#if defined(__SSE4_1__) + return _mm_stream_load_si128((__m128i*)ptr); #else - return _mm_load_si128((__m128i*)ptr); + return _mm_load_si128((__m128i*)ptr); #endif } - + static __forceinline void store_nt(void* ptr, const vint4& v) { -#if !defined(__aarch64__) && defined(__SSE4_1__) +#if defined(__SSE4_1__) _mm_stream_ps((float*)ptr, _mm_castsi128_ps(v)); #else _mm_store_si128((__m128i*)ptr,v); @@ -181,14 +169,14 @@ namespace embree template<int scale = 4> static __forceinline vint4 gather(const int* ptr, const vint4& index) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return _mm_i32gather_epi32(ptr, index, scale); #else return vint4( - *(int*)(((int8_t*)ptr)+scale*index[0]), - *(int*)(((int8_t*)ptr)+scale*index[1]), - *(int*)(((int8_t*)ptr)+scale*index[2]), - *(int*)(((int8_t*)ptr)+scale*index[3])); + *(int*)(((char*)ptr)+scale*index[0]), + *(int*)(((char*)ptr)+scale*index[1]), + *(int*)(((char*)ptr)+scale*index[2]), + *(int*)(((char*)ptr)+scale*index[3])); #endif } @@ -197,13 +185,13 @@ namespace embree vint4 r = zero; #if defined(__AVX512VL__) return _mm_mmask_i32gather_epi32(r, mask, index, ptr, scale); -#elif defined(__AVX2__) && !defined(__aarch64__) +#elif defined(__AVX2__) return _mm_mask_i32gather_epi32(r, ptr, index, mask, scale); #else - if (likely(mask[0])) r[0] = *(int*)(((int8_t*)ptr)+scale*index[0]); - if (likely(mask[1])) r[1] = *(int*)(((int8_t*)ptr)+scale*index[1]); - if (likely(mask[2])) r[2] = *(int*)(((int8_t*)ptr)+scale*index[2]); - if (likely(mask[3])) r[3] = *(int*)(((int8_t*)ptr)+scale*index[3]); + if (likely(mask[0])) r[0] = *(int*)(((char*)ptr)+scale*index[0]); + if (likely(mask[1])) r[1] = *(int*)(((char*)ptr)+scale*index[1]); + if (likely(mask[2])) r[2] = *(int*)(((char*)ptr)+scale*index[2]); + if (likely(mask[3])) r[3] = *(int*)(((char*)ptr)+scale*index[3]); return r; #endif } @@ -214,10 +202,10 @@ namespace embree #if defined(__AVX512VL__) _mm_i32scatter_epi32((int*)ptr, index, v, scale); #else - *(int*)(((int8_t*)ptr)+scale*index[0]) = v[0]; - *(int*)(((int8_t*)ptr)+scale*index[1]) = v[1]; - *(int*)(((int8_t*)ptr)+scale*index[2]) = v[2]; - *(int*)(((int8_t*)ptr)+scale*index[3]) = v[3]; + *(int*)(((char*)ptr)+scale*index[0]) = v[0]; + *(int*)(((char*)ptr)+scale*index[1]) = v[1]; + *(int*)(((char*)ptr)+scale*index[2]) = v[2]; + *(int*)(((char*)ptr)+scale*index[3]) = v[3]; #endif } @@ -227,14 +215,14 @@ namespace embree #if defined(__AVX512VL__) _mm_mask_i32scatter_epi32((int*)ptr, mask, index, v, scale); #else - if (likely(mask[0])) *(int*)(((int8_t*)ptr)+scale*index[0]) = v[0]; - if (likely(mask[1])) *(int*)(((int8_t*)ptr)+scale*index[1]) = v[1]; - if (likely(mask[2])) *(int*)(((int8_t*)ptr)+scale*index[2]) = v[2]; - if (likely(mask[3])) *(int*)(((int8_t*)ptr)+scale*index[3]) = v[3]; + if (likely(mask[0])) *(int*)(((char*)ptr)+scale*index[0]) = v[0]; + if (likely(mask[1])) *(int*)(((char*)ptr)+scale*index[1]) = v[1]; + if (likely(mask[2])) *(int*)(((char*)ptr)+scale*index[2]) = v[2]; + if (likely(mask[3])) *(int*)(((char*)ptr)+scale*index[3]) = v[3]; #endif } -#if defined(__x86_64__) || defined(__aarch64__) +#if defined(__x86_64__) static __forceinline vint4 broadcast64(long long a) { return _mm_set1_epi64x(a); } #endif @@ -248,12 +236,10 @@ namespace embree friend __forceinline vint4 select(const vboolf4& m, const vint4& t, const vint4& f) { #if defined(__AVX512VL__) return _mm_mask_blend_epi32(m, (__m128i)f, (__m128i)t); -#elif defined(__aarch64__) - return _mm_castps_si128(_mm_blendv_ps((__m128)f.v,(__m128) t.v, (__m128)m.v)); #elif defined(__SSE4_1__) - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); #else - return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f)); + return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f)); #endif } }; @@ -270,9 +256,7 @@ namespace embree __forceinline vint4 operator +(const vint4& a) { return a; } __forceinline vint4 operator -(const vint4& a) { return _mm_sub_epi32(_mm_setzero_si128(), a); } -#if defined(__aarch64__) - __forceinline vint4 abs(const vint4& a) { return vabsq_s32(a.v); } -#elif defined(__SSSE3__) +#if defined(__SSSE3__) __forceinline vint4 abs(const vint4& a) { return _mm_abs_epi32(a); } #endif @@ -288,7 +272,7 @@ namespace embree __forceinline vint4 operator -(const vint4& a, int b) { return a - vint4(b); } __forceinline vint4 operator -(int a, const vint4& b) { return vint4(a) - b; } -#if (defined(__aarch64__)) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline vint4 operator *(const vint4& a, const vint4& b) { return _mm_mullo_epi32(a, b); } #else __forceinline vint4 operator *(const vint4& a, const vint4& b) { return vint4(a[0]*b[0],a[1]*b[1],a[2]*b[2],a[3]*b[3]); } @@ -308,34 +292,34 @@ namespace embree __forceinline vint4 operator ^(const vint4& a, int b) { return a ^ vint4(b); } __forceinline vint4 operator ^(int a, const vint4& b) { return vint4(a) ^ b; } - __forceinline vint4 operator <<(const vint4& a, const int n) { return _mm_slli_epi32(a, n); } - __forceinline vint4 operator >>(const vint4& a, const int n) { return _mm_srai_epi32(a, n); } + __forceinline vint4 operator <<(const vint4& a, int n) { return _mm_slli_epi32(a, n); } + __forceinline vint4 operator >>(const vint4& a, int n) { return _mm_srai_epi32(a, n); } __forceinline vint4 sll (const vint4& a, int b) { return _mm_slli_epi32(a, b); } __forceinline vint4 sra (const vint4& a, int b) { return _mm_srai_epi32(a, b); } __forceinline vint4 srl (const vint4& a, int b) { return _mm_srli_epi32(a, b); } - + //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// __forceinline vint4& operator +=(vint4& a, const vint4& b) { return a = a + b; } __forceinline vint4& operator +=(vint4& a, int b) { return a = a + b; } - + __forceinline vint4& operator -=(vint4& a, const vint4& b) { return a = a - b; } __forceinline vint4& operator -=(vint4& a, int b) { return a = a - b; } -#if (defined(__aarch64__)) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline vint4& operator *=(vint4& a, const vint4& b) { return a = a * b; } __forceinline vint4& operator *=(vint4& a, int b) { return a = a * b; } #endif - + __forceinline vint4& operator &=(vint4& a, const vint4& b) { return a = a & b; } __forceinline vint4& operator &=(vint4& a, int b) { return a = a & b; } - + __forceinline vint4& operator |=(vint4& a, const vint4& b) { return a = a | b; } __forceinline vint4& operator |=(vint4& a, int b) { return a = a | b; } - + __forceinline vint4& operator <<=(vint4& a, int b) { return a = a << b; } __forceinline vint4& operator >>=(vint4& a, int b) { return a = a >> b; } @@ -402,15 +386,14 @@ namespace embree template<int mask> __forceinline vint4 select(const vint4& t, const vint4& f) { -#if defined(__SSE4_1__) +#if defined(__SSE4_1__) return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask)); #else return select(vboolf4(mask), t, f); -#endif +#endif } - -#if defined(__aarch64__) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline vint4 min(const vint4& a, const vint4& b) { return _mm_min_epi32(a, b); } __forceinline vint4 max(const vint4& a, const vint4& b) { return _mm_max_epi32(a, b); } @@ -434,25 +417,16 @@ namespace embree __forceinline vint4 unpacklo(const vint4& a, const vint4& b) { return _mm_castps_si128(_mm_unpacklo_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); } __forceinline vint4 unpackhi(const vint4& a, const vint4& b) { return _mm_castps_si128(_mm_unpackhi_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); } -#if defined(__aarch64__) - template<int i0, int i1, int i2, int i3> - __forceinline vint4 shuffle(const vint4& v) { - return vreinterpretq_s32_u8(vqtbl1q_u8( (uint8x16_t)v.v, _MN_SHUFFLE(i0, i1, i2, i3))); - } - template<int i0, int i1, int i2, int i3> - __forceinline vint4 shuffle(const vint4& a, const vint4& b) { - return vreinterpretq_s32_u8(vqtbl2q_u8( (uint8x16x2_t){(uint8x16_t)a.v, (uint8x16_t)b.v}, _MF_SHUFFLE(i0, i1, i2, i3))); - } -#else template<int i0, int i1, int i2, int i3> __forceinline vint4 shuffle(const vint4& v) { return _mm_shuffle_epi32(v, _MM_SHUFFLE(i3, i2, i1, i0)); } + template<int i0, int i1, int i2, int i3> __forceinline vint4 shuffle(const vint4& a, const vint4& b) { return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); } -#endif + #if defined(__SSE3__) template<> __forceinline vint4 shuffle<0, 0, 2, 2>(const vint4& v) { return _mm_castps_si128(_mm_moveldup_ps(_mm_castsi128_ps(v))); } template<> __forceinline vint4 shuffle<1, 1, 3, 3>(const vint4& v) { return _mm_castps_si128(_mm_movehdup_ps(_mm_castsi128_ps(v))); } @@ -464,10 +438,7 @@ namespace embree return shuffle<i,i,i,i>(v); } -#if defined(__aarch64__) - template<int src> __forceinline int extract(const vint4& b); - template<int dst> __forceinline vint4 insert(const vint4& a, const int b); -#elif defined(__SSE4_1__) +#if defined(__SSE4_1__) template<int src> __forceinline int extract(const vint4& b) { return _mm_extract_epi32(b, src); } template<int dst> __forceinline vint4 insert(const vint4& a, const int b) { return _mm_insert_epi32(a, b, dst); } #else @@ -475,69 +446,19 @@ namespace embree template<int dst> __forceinline vint4 insert(const vint4& a, int b) { vint4 c = a; c[dst&3] = b; return c; } #endif -#if defined(__aarch64__) - template<> __forceinline int extract<0>(const vint4& b) { - return b.v[0]; - } - template<> __forceinline int extract<1>(const vint4& b) { - return b.v[1]; - } - template<> __forceinline int extract<2>(const vint4& b) { - return b.v[2]; - } - template<> __forceinline int extract<3>(const vint4& b) { - return b.v[3]; - } - template<> __forceinline vint4 insert<0>(const vint4& a, int b) - { - vint4 c = a; - c[0] = b; - return c; - } - template<> __forceinline vint4 insert<1>(const vint4& a, int b) - { - vint4 c = a; - c[1] = b; - return c; - } - template<> __forceinline vint4 insert<2>(const vint4& a, int b) - { - vint4 c = a; - c[2] = b; - return c; - } - template<> __forceinline vint4 insert<3>(const vint4& a, int b) - { - vint4 c = a; - c[3] = b; - return c; - } - - __forceinline int toScalar(const vint4& v) { - return v[0]; - } - - __forceinline size_t toSizeT(const vint4& v) { - uint64x2_t x = uint64x2_t(v.v); - return x[0]; - } -#else + template<> __forceinline int extract<0>(const vint4& b) { return _mm_cvtsi128_si32(b); } __forceinline int toScalar(const vint4& v) { return _mm_cvtsi128_si32(v); } - __forceinline size_t toSizeT(const vint4& v) { + __forceinline size_t toSizeT(const vint4& v) { #if defined(__WIN32__) && !defined(__X86_64__) // win32 workaround return toScalar(v); -#elif defined(__ARM_NEON) - // FIXME(LTE): Do we need a swap(i.e. use lane 1)? - return vgetq_lane_u64(*(reinterpret_cast<const uint64x2_t *>(&v)), 0); #else - return _mm_cvtsi128_si64(v); + return _mm_cvtsi128_si64(v); #endif } -#endif - + #if defined(__AVX512VL__) __forceinline vint4 permute(const vint4 &a, const vint4 &index) { @@ -546,25 +467,15 @@ namespace embree template<int i> __forceinline vint4 align_shift_right(const vint4& a, const vint4& b) { - return _mm_alignr_epi32(a, b, i); - } + return _mm_alignr_epi32(a, b, i); + } #endif //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// -#if defined(__aarch64__) || defined(__SSE4_1__) - -#if defined(__aarch64__) - __forceinline vint4 vreduce_min(const vint4& v) { int h = vminvq_s32(v); return vdupq_n_s32(h); } - __forceinline vint4 vreduce_max(const vint4& v) { int h = vmaxvq_s32(v); return vdupq_n_s32(h); } - __forceinline vint4 vreduce_add(const vint4& v) { int h = vaddvq_s32(v); return vdupq_n_s32(h); } - - __forceinline int reduce_min(const vint4& v) { return vminvq_s32(v); } - __forceinline int reduce_max(const vint4& v) { return vmaxvq_s32(v); } - __forceinline int reduce_add(const vint4& v) { return vaddvq_s32(v); } -#else +#if defined(__SSE4_1__) __forceinline vint4 vreduce_min(const vint4& v) { vint4 h = min(shuffle<1,0,3,2>(v),v); return min(shuffle<2,3,0,1>(h),h); } __forceinline vint4 vreduce_max(const vint4& v) { vint4 h = max(shuffle<1,0,3,2>(v),v); return max(shuffle<2,3,0,1>(h),h); } __forceinline vint4 vreduce_add(const vint4& v) { vint4 h = shuffle<1,0,3,2>(v) + v ; return shuffle<2,3,0,1>(h) + h ; } @@ -572,8 +483,7 @@ namespace embree __forceinline int reduce_min(const vint4& v) { return toScalar(vreduce_min(v)); } __forceinline int reduce_max(const vint4& v) { return toScalar(vreduce_max(v)); } __forceinline int reduce_add(const vint4& v) { return toScalar(vreduce_add(v)); } -#endif - + __forceinline size_t select_min(const vint4& v) { return bsf(movemask(v == vreduce_min(v))); } __forceinline size_t select_max(const vint4& v) { return bsf(movemask(v == vreduce_max(v))); } @@ -592,7 +502,7 @@ namespace embree /// Sorting networks //////////////////////////////////////////////////////////////////////////////// -#if (defined(__aarch64__)) || defined(__SSE4_1__) +#if defined(__SSE4_1__) __forceinline vint4 usort_ascending(const vint4& v) { @@ -679,3 +589,10 @@ namespace embree } } +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vint8_avx.h b/thirdparty/embree/common/simd/vint8_avx.h index 25a771284d..f43e9a8c22 100644 --- a/thirdparty/embree-aarch64/common/simd/vint8_avx.h +++ b/thirdparty/embree/common/simd/vint8_avx.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 8-wide AVX integer type */ @@ -71,25 +79,20 @@ namespace embree static __forceinline void store (void* ptr, const vint8& f) { _mm256_store_ps((float*)ptr,_mm256_castsi256_ps(f)); } static __forceinline void storeu(void* ptr, const vint8& f) { _mm256_storeu_ps((float*)ptr,_mm256_castsi256_ps(f)); } -#if !defined(__aarch64__) static __forceinline void store (const vboolf8& mask, void* ptr, const vint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask,_mm256_castsi256_ps(f)); } static __forceinline void storeu(const vboolf8& mask, void* ptr, const vint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask,_mm256_castsi256_ps(f)); } -#else - static __forceinline void store (const vboolf8& mask, void* ptr, const vint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask.v,_mm256_castsi256_ps(f)); } - static __forceinline void storeu(const vboolf8& mask, void* ptr, const vint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask.v,_mm256_castsi256_ps(f)); } -#endif static __forceinline void store_nt(void* ptr, const vint8& v) { _mm256_stream_ps((float*)ptr,_mm256_castsi256_ps(v)); } - static __forceinline vint8 load(const uint8_t* ptr) { + static __forceinline vint8 load(const unsigned char* ptr) { vint4 il = vint4::load(ptr+0); vint4 ih = vint4::load(ptr+4); return vint8(il,ih); } - static __forceinline vint8 loadu(const uint8_t* ptr) { + static __forceinline vint8 loadu(const unsigned char* ptr) { vint4 il = vint4::loadu(ptr+0); vint4 ih = vint4::loadu(ptr+4); return vint8(il,ih); @@ -107,7 +110,7 @@ namespace embree return vint8(il,ih); } - static __forceinline void store(uint8_t* ptr, const vint8& i) { + static __forceinline void store(unsigned char* ptr, const vint8& i) { vint4 il(i.vl); vint4 ih(i.vh); vint4::store(ptr + 0,il); @@ -122,54 +125,54 @@ namespace embree template<int scale = 4> static __forceinline vint8 gather(const int* ptr, const vint8& index) { return vint8( - *(int*)(((int8_t*)ptr)+scale*index[0]), - *(int*)(((int8_t*)ptr)+scale*index[1]), - *(int*)(((int8_t*)ptr)+scale*index[2]), - *(int*)(((int8_t*)ptr)+scale*index[3]), - *(int*)(((int8_t*)ptr)+scale*index[4]), - *(int*)(((int8_t*)ptr)+scale*index[5]), - *(int*)(((int8_t*)ptr)+scale*index[6]), - *(int*)(((int8_t*)ptr)+scale*index[7])); + *(int*)(((char*)ptr)+scale*index[0]), + *(int*)(((char*)ptr)+scale*index[1]), + *(int*)(((char*)ptr)+scale*index[2]), + *(int*)(((char*)ptr)+scale*index[3]), + *(int*)(((char*)ptr)+scale*index[4]), + *(int*)(((char*)ptr)+scale*index[5]), + *(int*)(((char*)ptr)+scale*index[6]), + *(int*)(((char*)ptr)+scale*index[7])); } template<int scale = 4> static __forceinline vint8 gather(const vboolf8& mask, const int* ptr, const vint8& index) { vint8 r = zero; - if (likely(mask[0])) r[0] = *(int*)(((int8_t*)ptr)+scale*index[0]); - if (likely(mask[1])) r[1] = *(int*)(((int8_t*)ptr)+scale*index[1]); - if (likely(mask[2])) r[2] = *(int*)(((int8_t*)ptr)+scale*index[2]); - if (likely(mask[3])) r[3] = *(int*)(((int8_t*)ptr)+scale*index[3]); - if (likely(mask[4])) r[4] = *(int*)(((int8_t*)ptr)+scale*index[4]); - if (likely(mask[5])) r[5] = *(int*)(((int8_t*)ptr)+scale*index[5]); - if (likely(mask[6])) r[6] = *(int*)(((int8_t*)ptr)+scale*index[6]); - if (likely(mask[7])) r[7] = *(int*)(((int8_t*)ptr)+scale*index[7]); + if (likely(mask[0])) r[0] = *(int*)(((char*)ptr)+scale*index[0]); + if (likely(mask[1])) r[1] = *(int*)(((char*)ptr)+scale*index[1]); + if (likely(mask[2])) r[2] = *(int*)(((char*)ptr)+scale*index[2]); + if (likely(mask[3])) r[3] = *(int*)(((char*)ptr)+scale*index[3]); + if (likely(mask[4])) r[4] = *(int*)(((char*)ptr)+scale*index[4]); + if (likely(mask[5])) r[5] = *(int*)(((char*)ptr)+scale*index[5]); + if (likely(mask[6])) r[6] = *(int*)(((char*)ptr)+scale*index[6]); + if (likely(mask[7])) r[7] = *(int*)(((char*)ptr)+scale*index[7]); return r; } template<int scale = 4> static __forceinline void scatter(void* ptr, const vint8& ofs, const vint8& v) { - *(int*)(((int8_t*)ptr)+scale*ofs[0]) = v[0]; - *(int*)(((int8_t*)ptr)+scale*ofs[1]) = v[1]; - *(int*)(((int8_t*)ptr)+scale*ofs[2]) = v[2]; - *(int*)(((int8_t*)ptr)+scale*ofs[3]) = v[3]; - *(int*)(((int8_t*)ptr)+scale*ofs[4]) = v[4]; - *(int*)(((int8_t*)ptr)+scale*ofs[5]) = v[5]; - *(int*)(((int8_t*)ptr)+scale*ofs[6]) = v[6]; - *(int*)(((int8_t*)ptr)+scale*ofs[7]) = v[7]; + *(int*)(((char*)ptr)+scale*ofs[0]) = v[0]; + *(int*)(((char*)ptr)+scale*ofs[1]) = v[1]; + *(int*)(((char*)ptr)+scale*ofs[2]) = v[2]; + *(int*)(((char*)ptr)+scale*ofs[3]) = v[3]; + *(int*)(((char*)ptr)+scale*ofs[4]) = v[4]; + *(int*)(((char*)ptr)+scale*ofs[5]) = v[5]; + *(int*)(((char*)ptr)+scale*ofs[6]) = v[6]; + *(int*)(((char*)ptr)+scale*ofs[7]) = v[7]; } template<int scale = 4> static __forceinline void scatter(const vboolf8& mask, void* ptr, const vint8& ofs, const vint8& v) { - if (likely(mask[0])) *(int*)(((int8_t*)ptr)+scale*ofs[0]) = v[0]; - if (likely(mask[1])) *(int*)(((int8_t*)ptr)+scale*ofs[1]) = v[1]; - if (likely(mask[2])) *(int*)(((int8_t*)ptr)+scale*ofs[2]) = v[2]; - if (likely(mask[3])) *(int*)(((int8_t*)ptr)+scale*ofs[3]) = v[3]; - if (likely(mask[4])) *(int*)(((int8_t*)ptr)+scale*ofs[4]) = v[4]; - if (likely(mask[5])) *(int*)(((int8_t*)ptr)+scale*ofs[5]) = v[5]; - if (likely(mask[6])) *(int*)(((int8_t*)ptr)+scale*ofs[6]) = v[6]; - if (likely(mask[7])) *(int*)(((int8_t*)ptr)+scale*ofs[7]) = v[7]; + if (likely(mask[0])) *(int*)(((char*)ptr)+scale*ofs[0]) = v[0]; + if (likely(mask[1])) *(int*)(((char*)ptr)+scale*ofs[1]) = v[1]; + if (likely(mask[2])) *(int*)(((char*)ptr)+scale*ofs[2]) = v[2]; + if (likely(mask[3])) *(int*)(((char*)ptr)+scale*ofs[3]) = v[3]; + if (likely(mask[4])) *(int*)(((char*)ptr)+scale*ofs[4]) = v[4]; + if (likely(mask[5])) *(int*)(((char*)ptr)+scale*ofs[5]) = v[5]; + if (likely(mask[6])) *(int*)(((char*)ptr)+scale*ofs[6]) = v[6]; + if (likely(mask[7])) *(int*)(((char*)ptr)+scale*ofs[7]) = v[7]; } @@ -315,11 +318,6 @@ namespace embree return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(f), _mm256_castsi256_ps(t), m)); } - __forceinline vint8 notand(const vboolf8& m, const vint8& f) { - return _mm256_castps_si256(_mm256_andnot_ps(m, _mm256_castsi256_ps(f))); - } - - //////////////////////////////////////////////////////////////////////////////// /// Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// @@ -462,3 +460,11 @@ namespace embree return cout << "<" << a[0] << ", " << a[1] << ", " << a[2] << ", " << a[3] << ", " << a[4] << ", " << a[5] << ", " << a[6] << ", " << a[7] << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vint8_avx2.h b/thirdparty/embree/common/simd/vint8_avx2.h index 4937d972cf..e04737ffbe 100644 --- a/thirdparty/embree-aarch64/common/simd/vint8_avx2.h +++ b/thirdparty/embree/common/simd/vint8_avx2.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 8-wide AVX integer type */ @@ -67,8 +75,8 @@ namespace embree /// Loads and Stores //////////////////////////////////////////////////////////////////////////////// - static __forceinline vint8 load(const uint8_t* ptr) { return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } - static __forceinline vint8 loadu(const uint8_t* ptr) { return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } + static __forceinline vint8 load(const unsigned char* ptr) { return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } + static __forceinline vint8 loadu(const unsigned char* ptr) { return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } static __forceinline vint8 load(const unsigned short* ptr) { return _mm256_cvtepu16_epi32(_mm_load_si128((__m128i*)ptr)); } static __forceinline vint8 loadu(const unsigned short* ptr) { return _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i*)ptr)); } @@ -108,7 +116,7 @@ namespace embree _mm256_stream_ps((float*)ptr,_mm256_castsi256_ps(v)); } - static __forceinline void store(uint8_t* ptr, const vint8& i) + static __forceinline void store(unsigned char* ptr, const vint8& i) { for (size_t j=0; j<8; j++) ptr[j] = i[j]; @@ -140,14 +148,14 @@ namespace embree #if defined(__AVX512VL__) _mm256_i32scatter_epi32((int*)ptr, ofs, v, scale); #else - *(int*)(((int8_t*)ptr)+scale*ofs[0]) = v[0]; - *(int*)(((int8_t*)ptr)+scale*ofs[1]) = v[1]; - *(int*)(((int8_t*)ptr)+scale*ofs[2]) = v[2]; - *(int*)(((int8_t*)ptr)+scale*ofs[3]) = v[3]; - *(int*)(((int8_t*)ptr)+scale*ofs[4]) = v[4]; - *(int*)(((int8_t*)ptr)+scale*ofs[5]) = v[5]; - *(int*)(((int8_t*)ptr)+scale*ofs[6]) = v[6]; - *(int*)(((int8_t*)ptr)+scale*ofs[7]) = v[7]; + *(int*)(((char*)ptr)+scale*ofs[0]) = v[0]; + *(int*)(((char*)ptr)+scale*ofs[1]) = v[1]; + *(int*)(((char*)ptr)+scale*ofs[2]) = v[2]; + *(int*)(((char*)ptr)+scale*ofs[3]) = v[3]; + *(int*)(((char*)ptr)+scale*ofs[4]) = v[4]; + *(int*)(((char*)ptr)+scale*ofs[5]) = v[5]; + *(int*)(((char*)ptr)+scale*ofs[6]) = v[6]; + *(int*)(((char*)ptr)+scale*ofs[7]) = v[7]; #endif } @@ -157,14 +165,14 @@ namespace embree #if defined(__AVX512VL__) _mm256_mask_i32scatter_epi32((int*)ptr, mask, ofs, v, scale); #else - if (likely(mask[0])) *(int*)(((int8_t*)ptr)+scale*ofs[0]) = v[0]; - if (likely(mask[1])) *(int*)(((int8_t*)ptr)+scale*ofs[1]) = v[1]; - if (likely(mask[2])) *(int*)(((int8_t*)ptr)+scale*ofs[2]) = v[2]; - if (likely(mask[3])) *(int*)(((int8_t*)ptr)+scale*ofs[3]) = v[3]; - if (likely(mask[4])) *(int*)(((int8_t*)ptr)+scale*ofs[4]) = v[4]; - if (likely(mask[5])) *(int*)(((int8_t*)ptr)+scale*ofs[5]) = v[5]; - if (likely(mask[6])) *(int*)(((int8_t*)ptr)+scale*ofs[6]) = v[6]; - if (likely(mask[7])) *(int*)(((int8_t*)ptr)+scale*ofs[7]) = v[7]; + if (likely(mask[0])) *(int*)(((char*)ptr)+scale*ofs[0]) = v[0]; + if (likely(mask[1])) *(int*)(((char*)ptr)+scale*ofs[1]) = v[1]; + if (likely(mask[2])) *(int*)(((char*)ptr)+scale*ofs[2]) = v[2]; + if (likely(mask[3])) *(int*)(((char*)ptr)+scale*ofs[3]) = v[3]; + if (likely(mask[4])) *(int*)(((char*)ptr)+scale*ofs[4]) = v[4]; + if (likely(mask[5])) *(int*)(((char*)ptr)+scale*ofs[5]) = v[5]; + if (likely(mask[6])) *(int*)(((char*)ptr)+scale*ofs[6]) = v[6]; + if (likely(mask[7])) *(int*)(((char*)ptr)+scale*ofs[7]) = v[7]; #endif } @@ -385,9 +393,7 @@ namespace embree __forceinline int toScalar(const vint8& v) { return _mm_cvtsi128_si32(_mm256_castsi256_si128(v)); } -#if !defined(__aarch64__) - -__forceinline vint8 permute(const vint8& v, const __m256i& index) { + __forceinline vint8 permute(const vint8& v, const __m256i& index) { return _mm256_permutevar8x32_epi32(v, index); } @@ -395,8 +401,6 @@ __forceinline vint8 permute(const vint8& v, const __m256i& index) { return _mm256_castps_si256(_mm256_permutevar_ps(_mm256_castsi256_ps(v), index)); } - - template<int i> static __forceinline vint8 align_shift_right(const vint8& a, const vint8& b) { #if defined(__AVX512VL__) @@ -406,9 +410,6 @@ __forceinline vint8 permute(const vint8& v, const __m256i& index) { #endif } -#endif - - //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// @@ -435,9 +436,6 @@ __forceinline vint8 permute(const vint8& v, const __m256i& index) { __forceinline size_t select_min(const vboolf8& valid, const vint8& v) { const vint8 a = select(valid,v,vint8(pos_inf)); return bsf(movemask(valid & (a == vreduce_min(a)))); } __forceinline size_t select_max(const vboolf8& valid, const vint8& v) { const vint8 a = select(valid,v,vint8(neg_inf)); return bsf(movemask(valid & (a == vreduce_max(a)))); } - - __forceinline vint8 assign(const vint4& a) { return _mm256_castsi128_si256(a); } - //////////////////////////////////////////////////////////////////////////////// /// Sorting networks //////////////////////////////////////////////////////////////////////////////// @@ -510,3 +508,11 @@ __forceinline vint8 permute(const vint8& v, const __m256i& index) { return cout << "<" << a[0] << ", " << a[1] << ", " << a[2] << ", " << a[3] << ", " << a[4] << ", " << a[5] << ", " << a[6] << ", " << a[7] << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vllong4_avx2.h b/thirdparty/embree/common/simd/vllong4_avx2.h index de3ebc16a7..6c86845877 100644 --- a/thirdparty/embree-aarch64/common/simd/vllong4_avx2.h +++ b/thirdparty/embree/common/simd/vllong4_avx2.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 4-wide AVX2 64-bit long long type */ @@ -95,16 +103,6 @@ namespace embree #endif } - static __forceinline vllong4 broadcast64bit(size_t v) { - return _mm256_set1_epi64x(v); - } - - static __forceinline size_t extract64bit(const vllong4& v) - { - return _mm_cvtsi128_si64(_mm256_castsi256_si128(v)); - } - - //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// @@ -276,18 +274,6 @@ namespace embree __forceinline vboold4 le(const vboold4& mask, const vllong4& a, const vllong4& b) { return mask & (a <= b); } #endif - __forceinline void xchg(const vboold4& m, vllong4& a, vllong4& b) { - const vllong4 c = a; a = select(m,b,a); b = select(m,c,b); - } - - __forceinline vboold4 test(const vllong4& a, const vllong4& b) { -#if defined(__AVX512VL__) - return _mm256_test_epi64_mask(a,b); -#else - return _mm256_testz_si256(a,b); -#endif - } - //////////////////////////////////////////////////////////////////////////////// // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// @@ -356,3 +342,11 @@ namespace embree return cout; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vllong8_avx512.h b/thirdparty/embree/common/simd/vllong8_avx512.h index 76dddd8991..ee69411637 100644 --- a/thirdparty/embree-aarch64/common/simd/vllong8_avx512.h +++ b/thirdparty/embree/common/simd/vllong8_avx512.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 8-wide AVX-512 64-bit long long type */ @@ -78,7 +86,7 @@ namespace embree return _mm512_load_si512(addr); } - static __forceinline vllong8 load(const uint8_t* ptr) { + static __forceinline vllong8 load(const unsigned char* ptr) { return _mm512_cvtepu8_epi64(*(__m128i*)ptr); } @@ -98,19 +106,6 @@ namespace embree _mm512_mask_store_epi64(addr,mask,v2); } - /* pass by value to avoid compiler generating inefficient code */ - static __forceinline void storeu_compact(const vboold8 mask, void* addr, const vllong8& reg) { - _mm512_mask_compressstoreu_epi64(addr,mask,reg); - } - - static __forceinline vllong8 compact64bit(const vboold8& mask, vllong8& v) { - return _mm512_mask_compress_epi64(v,mask,v); - } - - static __forceinline vllong8 compact64bit(const vboold8& mask, vllong8& dest, const vllong8& source) { - return _mm512_mask_compress_epi64(dest,mask,source); - } - static __forceinline vllong8 compact(const vboold8& mask, vllong8& v) { return _mm512_mask_compress_epi64(v,mask,v); } @@ -123,16 +118,6 @@ namespace embree return _mm512_mask_expand_epi64(b,mask,a); } - static __forceinline vllong8 broadcast64bit(size_t v) { - return _mm512_set1_epi64(v); - } - - static __forceinline size_t extract64bit(const vllong8& v) - { - return _mm_cvtsi128_si64(_mm512_castsi512_si128(v)); - } - - //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// @@ -271,18 +256,6 @@ namespace embree return _mm512_mask_or_epi64(f,m,t,t); } - __forceinline void xchg(const vboold8& m, vllong8& a, vllong8& b) { - const vllong8 c = a; a = select(m,b,a); b = select(m,c,b); - } - - __forceinline vboold8 test(const vboold8& m, const vllong8& a, const vllong8& b) { - return _mm512_mask_test_epi64_mask(m,a,b); - } - - __forceinline vboold8 test(const vllong8& a, const vllong8& b) { - return _mm512_test_epi64_mask(a,b); - } - //////////////////////////////////////////////////////////////////////////////// // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// @@ -321,10 +294,6 @@ namespace embree return _mm_cvtsi128_si64(_mm512_castsi512_si128(v)); } - __forceinline vllong8 zeroExtend32Bit(const __m512i& a) { - return _mm512_cvtepu32_epi64(_mm512_castsi512_si256(a)); - } - //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// @@ -379,3 +348,11 @@ namespace embree return cout; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vuint16_avx512.h b/thirdparty/embree/common/simd/vuint16_avx512.h index 39752611bb..c9eb6682ff 100644 --- a/thirdparty/embree-aarch64/common/simd/vuint16_avx512.h +++ b/thirdparty/embree/common/simd/vuint16_avx512.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 16-wide AVX-512 unsigned integer type */ @@ -83,7 +91,7 @@ namespace embree return _mm512_loadu_si512(addr); } - static __forceinline vuint16 loadu(const uint8_t* ptr) { return _mm512_cvtepu8_epi32(_mm_loadu_si128((__m128i*)ptr)); } + static __forceinline vuint16 loadu(const unsigned char* ptr) { return _mm512_cvtepu8_epi32(_mm_loadu_si128((__m128i*)ptr)); } static __forceinline vuint16 loadu(const unsigned short* ptr) { return _mm512_cvtepu16_epi32(_mm256_loadu_si256((__m256i*)ptr)); } static __forceinline vuint16 load(const vuint16* addr) { @@ -113,20 +121,6 @@ namespace embree _mm512_mask_store_epi32(addr,mask,v2); } - /* pass by value to avoid compiler generating inefficient code */ - static __forceinline void storeu_compact(const vboolf16 mask, void* addr, const vuint16 reg) { - _mm512_mask_compressstoreu_epi32(addr,mask,reg); - } - - static __forceinline void storeu_compact_single(const vboolf16 mask, void* addr, vuint16 reg) { - //_mm512_mask_compressstoreu_epi32(addr,mask,reg); - *(float*)addr = mm512_cvtss_f32(_mm512_mask_compress_ps(_mm512_castsi512_ps(reg),mask,_mm512_castsi512_ps(reg))); - } - - static __forceinline vuint16 compact64bit(const vboolf16& mask, vuint16& v) { - return _mm512_mask_compress_epi64(v,mask,v); - } - static __forceinline vuint16 compact(const vboolf16& mask, vuint16& v) { return _mm512_mask_compress_epi32(v,mask,v); } @@ -164,15 +158,6 @@ namespace embree _mm512_mask_i32scatter_epi32((int*)ptr,mask,index,v,scale); } - static __forceinline vuint16 broadcast64bit(size_t v) { - return _mm512_set1_epi64(v); - } - - static __forceinline size_t extract64bit(const vuint16& v) - { - return _mm_cvtsi128_si64(_mm512_castsi512_si128(v)); - } - //////////////////////////////////////////////////////////////////////////////// /// Array Access //////////////////////////////////////////////////////////////////////////////// @@ -315,18 +300,6 @@ namespace embree return _mm512_mask_or_epi32(f,m,t,t); } - __forceinline void xchg(const vboolf16& m, vuint16& a, vuint16& b) { - const vuint16 c = a; a = select(m,b,a); b = select(m,c,b); - } - - __forceinline vboolf16 test(const vboolf16& m, const vuint16& a, const vuint16& b) { - return _mm512_mask_test_epi32_mask(m,a,b); - } - - __forceinline vboolf16 test(const vuint16& a, const vuint16& b) { - return _mm512_test_epi32_mask(a,b); - } - //////////////////////////////////////////////////////////////////////////////// // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// @@ -441,3 +414,11 @@ namespace embree return cout; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vuint4_sse2.h b/thirdparty/embree/common/simd/vuint4_sse2.h index a3f393ebf2..0601b9ab80 100644 --- a/thirdparty/embree-aarch64/common/simd/vuint4_sse2.h +++ b/thirdparty/embree/common/simd/vuint4_sse2.h @@ -1,10 +1,18 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../math/math.h" +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 4-wide SSE integer type */ @@ -87,64 +95,27 @@ namespace embree static __forceinline void storeu(const vboolf4& mask, void* ptr, const vuint4& i) { storeu(ptr,select(mask,i,loadu(ptr))); } #endif -#if defined(__aarch64__) - static __forceinline vuint4 load(const uint8_t* ptr) { - return _mm_load4epu8_epi32(((__m128i*)ptr)); - } - static __forceinline vuint4 loadu(const uint8_t* ptr) { - return _mm_load4epu8_epi32(((__m128i*)ptr)); - } -#elif defined(__SSE4_1__) - static __forceinline vuint4 load(const uint8_t* ptr) { +#if defined(__SSE4_1__) + static __forceinline vuint4 load(const unsigned char* ptr) { return _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } - static __forceinline vuint4 loadu(const uint8_t* ptr) { + static __forceinline vuint4 loadu(const unsigned char* ptr) { return _mm_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } #endif static __forceinline vuint4 load(const unsigned short* ptr) { -#if defined(__aarch64__) - return _mm_load4epu16_epi32(((__m128i*)ptr)); -#elif defined (__SSE4_1__) +#if defined (__SSE4_1__) return _mm_cvtepu16_epi32(_mm_loadu_si128((__m128i*)ptr)); #else return vuint4(ptr[0],ptr[1],ptr[2],ptr[3]); #endif } - static __forceinline void store_uint8(uint8_t* ptr, const vuint4& v) { -#if defined(__aarch64__) - uint32x4_t x = uint32x4_t(v.v); - uint16x4_t y = vqmovn_u32(x); - uint8x8_t z = vqmovn_u16(vcombine_u16(y, y)); - vst1_lane_u32((uint32_t *)ptr, uint32x2_t(z), 0); -#elif defined(__SSE4_1__) - __m128i x = v; - x = _mm_packus_epi32(x, x); - x = _mm_packus_epi16(x, x); - *(unsigned*)ptr = _mm_cvtsi128_si32(x); -#else - for (size_t i=0;i<4;i++) - ptr[i] = (uint8_t)v[i]; -#endif - } - - static __forceinline void store_uint8(unsigned short* ptr, const vuint4& v) { -#if defined(__aarch64__) - uint32x4_t x = (uint32x4_t)v.v; - uint16x4_t y = vqmovn_u32(x); - vst1_u16(ptr, y); -#else - for (size_t i=0;i<4;i++) - ptr[i] = (unsigned short)v[i]; -#endif - } - static __forceinline vuint4 load_nt(void* ptr) { -#if (defined(__aarch64__)) || defined(__SSE4_1__) +#if defined(__SSE4_1__) return _mm_stream_load_si128((__m128i*)ptr); #else return _mm_load_si128((__m128i*)ptr); @@ -152,8 +123,8 @@ namespace embree } static __forceinline void store_nt(void* ptr, const vuint4& v) { -#if !defined(__aarch64__) && defined(__SSE4_1__) - _mm_stream_ps((float*)ptr, _mm_castsi128_ps(v)); +#if defined(__SSE4_1__) + _mm_stream_ps((float*)ptr,_mm_castsi128_ps(v)); #else _mm_store_si128((__m128i*)ptr,v); #endif @@ -161,14 +132,14 @@ namespace embree template<int scale = 4> static __forceinline vuint4 gather(const unsigned int* ptr, const vint4& index) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return _mm_i32gather_epi32((const int*)ptr, index, scale); #else return vuint4( - *(unsigned int*)(((int8_t*)ptr)+scale*index[0]), - *(unsigned int*)(((int8_t*)ptr)+scale*index[1]), - *(unsigned int*)(((int8_t*)ptr)+scale*index[2]), - *(unsigned int*)(((int8_t*)ptr)+scale*index[3])); + *(unsigned int*)(((char*)ptr)+scale*index[0]), + *(unsigned int*)(((char*)ptr)+scale*index[1]), + *(unsigned int*)(((char*)ptr)+scale*index[2]), + *(unsigned int*)(((char*)ptr)+scale*index[3])); #endif } @@ -177,13 +148,13 @@ namespace embree vuint4 r = zero; #if defined(__AVX512VL__) return _mm_mmask_i32gather_epi32(r, mask, index, ptr, scale); -#elif defined(__AVX2__) && !defined(__aarch64__) +#elif defined(__AVX2__) return _mm_mask_i32gather_epi32(r, (const int*)ptr, index, mask, scale); #else - if (likely(mask[0])) r[0] = *(unsigned int*)(((int8_t*)ptr)+scale*index[0]); - if (likely(mask[1])) r[1] = *(unsigned int*)(((int8_t*)ptr)+scale*index[1]); - if (likely(mask[2])) r[2] = *(unsigned int*)(((int8_t*)ptr)+scale*index[2]); - if (likely(mask[3])) r[3] = *(unsigned int*)(((int8_t*)ptr)+scale*index[3]); + if (likely(mask[0])) r[0] = *(unsigned int*)(((char*)ptr)+scale*index[0]); + if (likely(mask[1])) r[1] = *(unsigned int*)(((char*)ptr)+scale*index[1]); + if (likely(mask[2])) r[2] = *(unsigned int*)(((char*)ptr)+scale*index[2]); + if (likely(mask[3])) r[3] = *(unsigned int*)(((char*)ptr)+scale*index[3]); return r; #endif } @@ -373,25 +344,16 @@ namespace embree __forceinline vuint4 unpacklo(const vuint4& a, const vuint4& b) { return _mm_castps_si128(_mm_unpacklo_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); } __forceinline vuint4 unpackhi(const vuint4& a, const vuint4& b) { return _mm_castps_si128(_mm_unpackhi_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b))); } -#if defined(__aarch64__) - template<int i0, int i1, int i2, int i3> - __forceinline vuint4 shuffle(const vuint4& v) { - return vreinterpretq_s32_u8(vqtbl1q_u8( (uint8x16_t)v.v, _MN_SHUFFLE(i0, i1, i2, i3))); - } - template<int i0, int i1, int i2, int i3> - __forceinline vuint4 shuffle(const vuint4& a, const vuint4& b) { - return vreinterpretq_s32_u8(vqtbl2q_u8( (uint8x16x2_t){(uint8x16_t)a.v, (uint8x16_t)b.v}, _MF_SHUFFLE(i0, i1, i2, i3))); - } -#else template<int i0, int i1, int i2, int i3> __forceinline vuint4 shuffle(const vuint4& v) { return _mm_shuffle_epi32(v, _MM_SHUFFLE(i3, i2, i1, i0)); } + template<int i0, int i1, int i2, int i3> __forceinline vuint4 shuffle(const vuint4& a, const vuint4& b) { return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); } -#endif + #if defined(__SSE3__) template<> __forceinline vuint4 shuffle<0, 0, 2, 2>(const vuint4& v) { return _mm_castps_si128(_mm_moveldup_ps(_mm_castsi128_ps(v))); } template<> __forceinline vuint4 shuffle<1, 1, 3, 3>(const vuint4& v) { return _mm_castps_si128(_mm_movehdup_ps(_mm_castsi128_ps(v))); } @@ -403,10 +365,7 @@ namespace embree return shuffle<i,i,i,i>(v); } -#if defined(__aarch64__) - template<int src> __forceinline unsigned int extract(const vuint4& b); - template<int dst> __forceinline vuint4 insert(const vuint4& a, const unsigned b); -#elif defined(__SSE4_1__) +#if defined(__SSE4_1__) template<int src> __forceinline unsigned int extract(const vuint4& b) { return _mm_extract_epi32(b, src); } template<int dst> __forceinline vuint4 insert(const vuint4& a, const unsigned b) { return _mm_insert_epi32(a, b, dst); } #else @@ -414,50 +373,11 @@ namespace embree template<int dst> __forceinline vuint4 insert(const vuint4& a, const unsigned b) { vuint4 c = a; c[dst&3] = b; return c; } #endif -#if defined(__aarch64__) - template<> __forceinline unsigned int extract<0>(const vuint4& b) { - return b[0]; - } - template<> __forceinline unsigned int extract<1>(const vuint4& b) { - return b[1]; - } - template<> __forceinline unsigned int extract<2>(const vuint4& b) { - return b[2]; - } - template<> __forceinline unsigned int extract<3>(const vuint4& b) { - return b[3]; - } - - template<> __forceinline vuint4 insert<0>(const vuint4& a, unsigned b){ - vuint4 c = a; - c[0] = b; - return c; - } - template<> __forceinline vuint4 insert<1>(const vuint4& a, unsigned b){ - vuint4 c = a; - c[1] = b; - return c; - } - template<> __forceinline vuint4 insert<2>(const vuint4& a, unsigned b){ - vuint4 c = a; - c[2] = b; - return c; - } - template<> __forceinline vuint4 insert<3>(const vuint4& a, unsigned b){ - vuint4 c = a; - c[3] = b; - return c; - } - - __forceinline unsigned int toScalar(const vuint4& v) { - return v[0]; - } -#else + template<> __forceinline unsigned int extract<0>(const vuint4& b) { return _mm_cvtsi128_si32(b); } __forceinline unsigned int toScalar(const vuint4& v) { return _mm_cvtsi128_si32(v); } -#endif - + //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// @@ -497,3 +417,10 @@ namespace embree } } +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vuint8_avx.h b/thirdparty/embree/common/simd/vuint8_avx.h index d4e86ae92d..589cd9d731 100644 --- a/thirdparty/embree-aarch64/common/simd/vuint8_avx.h +++ b/thirdparty/embree/common/simd/vuint8_avx.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 8-wide AVX integer type */ @@ -69,24 +77,20 @@ namespace embree static __forceinline void store (void* ptr, const vuint8& f) { _mm256_store_ps((float*)ptr,_mm256_castsi256_ps(f)); } static __forceinline void storeu(void* ptr, const vuint8& f) { _mm256_storeu_ps((float*)ptr,_mm256_castsi256_ps(f)); } -#if !defined(__aarch64__) static __forceinline void store (const vboolf8& mask, void* ptr, const vuint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask,_mm256_castsi256_ps(f)); } static __forceinline void storeu(const vboolf8& mask, void* ptr, const vuint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask,_mm256_castsi256_ps(f)); } -#else - static __forceinline void store (const vboolf8& mask, void* ptr, const vuint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask.v,_mm256_castsi256_ps(f)); } - static __forceinline void storeu(const vboolf8& mask, void* ptr, const vuint8& f) { _mm256_maskstore_ps((float*)ptr,(__m256i)mask.v,_mm256_castsi256_ps(f)); } -#endif + static __forceinline void store_nt(void* ptr, const vuint8& v) { _mm256_stream_ps((float*)ptr,_mm256_castsi256_ps(v)); } - static __forceinline vuint8 load(const uint8_t* ptr) { + static __forceinline vuint8 load(const unsigned char* ptr) { vuint4 il = vuint4::load(ptr+0); vuint4 ih = vuint4::load(ptr+4); return vuint8(il,ih); } - static __forceinline vuint8 loadu(const uint8_t* ptr) { + static __forceinline vuint8 loadu(const unsigned char* ptr) { vuint4 il = vuint4::loadu(ptr+0); vuint4 ih = vuint4::loadu(ptr+4); return vuint8(il,ih); @@ -104,7 +108,7 @@ namespace embree return vuint8(il,ih); } - static __forceinline void store(uint8_t* ptr, const vuint8& i) { + static __forceinline void store(unsigned char* ptr, const vuint8& i) { vuint4 il(i.vl); vuint4 ih(i.vh); vuint4::store(ptr + 0,il); @@ -119,54 +123,54 @@ namespace embree template<int scale = 4> static __forceinline vuint8 gather(const unsigned int* ptr, const vint8& index) { return vuint8( - *(unsigned int*)(((int8_t*)ptr)+scale*index[0]), - *(unsigned int*)(((int8_t*)ptr)+scale*index[1]), - *(unsigned int*)(((int8_t*)ptr)+scale*index[2]), - *(unsigned int*)(((int8_t*)ptr)+scale*index[3]), - *(unsigned int*)(((int8_t*)ptr)+scale*index[4]), - *(unsigned int*)(((int8_t*)ptr)+scale*index[5]), - *(unsigned int*)(((int8_t*)ptr)+scale*index[6]), - *(unsigned int*)(((int8_t*)ptr)+scale*index[7])); + *(unsigned int*)(((char*)ptr)+scale*index[0]), + *(unsigned int*)(((char*)ptr)+scale*index[1]), + *(unsigned int*)(((char*)ptr)+scale*index[2]), + *(unsigned int*)(((char*)ptr)+scale*index[3]), + *(unsigned int*)(((char*)ptr)+scale*index[4]), + *(unsigned int*)(((char*)ptr)+scale*index[5]), + *(unsigned int*)(((char*)ptr)+scale*index[6]), + *(unsigned int*)(((char*)ptr)+scale*index[7])); } template<int scale = 4> static __forceinline vuint8 gather(const vboolf8& mask, const unsigned int* ptr, const vint8& index) { vuint8 r = zero; - if (likely(mask[0])) r[0] = *(unsigned int*)(((int8_t*)ptr)+scale*index[0]); - if (likely(mask[1])) r[1] = *(unsigned int*)(((int8_t*)ptr)+scale*index[1]); - if (likely(mask[2])) r[2] = *(unsigned int*)(((int8_t*)ptr)+scale*index[2]); - if (likely(mask[3])) r[3] = *(unsigned int*)(((int8_t*)ptr)+scale*index[3]); - if (likely(mask[4])) r[4] = *(unsigned int*)(((int8_t*)ptr)+scale*index[4]); - if (likely(mask[5])) r[5] = *(unsigned int*)(((int8_t*)ptr)+scale*index[5]); - if (likely(mask[6])) r[6] = *(unsigned int*)(((int8_t*)ptr)+scale*index[6]); - if (likely(mask[7])) r[7] = *(unsigned int*)(((int8_t*)ptr)+scale*index[7]); + if (likely(mask[0])) r[0] = *(unsigned int*)(((char*)ptr)+scale*index[0]); + if (likely(mask[1])) r[1] = *(unsigned int*)(((char*)ptr)+scale*index[1]); + if (likely(mask[2])) r[2] = *(unsigned int*)(((char*)ptr)+scale*index[2]); + if (likely(mask[3])) r[3] = *(unsigned int*)(((char*)ptr)+scale*index[3]); + if (likely(mask[4])) r[4] = *(unsigned int*)(((char*)ptr)+scale*index[4]); + if (likely(mask[5])) r[5] = *(unsigned int*)(((char*)ptr)+scale*index[5]); + if (likely(mask[6])) r[6] = *(unsigned int*)(((char*)ptr)+scale*index[6]); + if (likely(mask[7])) r[7] = *(unsigned int*)(((char*)ptr)+scale*index[7]); return r; } template<int scale = 4> static __forceinline void scatter(void* ptr, const vint8& ofs, const vuint8& v) { - *(unsigned int*)(((int8_t*)ptr)+scale*ofs[0]) = v[0]; - *(unsigned int*)(((int8_t*)ptr)+scale*ofs[1]) = v[1]; - *(unsigned int*)(((int8_t*)ptr)+scale*ofs[2]) = v[2]; - *(unsigned int*)(((int8_t*)ptr)+scale*ofs[3]) = v[3]; - *(unsigned int*)(((int8_t*)ptr)+scale*ofs[4]) = v[4]; - *(unsigned int*)(((int8_t*)ptr)+scale*ofs[5]) = v[5]; - *(unsigned int*)(((int8_t*)ptr)+scale*ofs[6]) = v[6]; - *(unsigned int*)(((int8_t*)ptr)+scale*ofs[7]) = v[7]; + *(unsigned int*)(((char*)ptr)+scale*ofs[0]) = v[0]; + *(unsigned int*)(((char*)ptr)+scale*ofs[1]) = v[1]; + *(unsigned int*)(((char*)ptr)+scale*ofs[2]) = v[2]; + *(unsigned int*)(((char*)ptr)+scale*ofs[3]) = v[3]; + *(unsigned int*)(((char*)ptr)+scale*ofs[4]) = v[4]; + *(unsigned int*)(((char*)ptr)+scale*ofs[5]) = v[5]; + *(unsigned int*)(((char*)ptr)+scale*ofs[6]) = v[6]; + *(unsigned int*)(((char*)ptr)+scale*ofs[7]) = v[7]; } template<int scale = 4> static __forceinline void scatter(const vboolf8& mask, void* ptr, const vint8& ofs, const vuint8& v) { - if (likely(mask[0])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[0]) = v[0]; - if (likely(mask[1])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[1]) = v[1]; - if (likely(mask[2])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[2]) = v[2]; - if (likely(mask[3])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[3]) = v[3]; - if (likely(mask[4])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[4]) = v[4]; - if (likely(mask[5])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[5]) = v[5]; - if (likely(mask[6])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[6]) = v[6]; - if (likely(mask[7])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[7]) = v[7]; + if (likely(mask[0])) *(unsigned int*)(((char*)ptr)+scale*ofs[0]) = v[0]; + if (likely(mask[1])) *(unsigned int*)(((char*)ptr)+scale*ofs[1]) = v[1]; + if (likely(mask[2])) *(unsigned int*)(((char*)ptr)+scale*ofs[2]) = v[2]; + if (likely(mask[3])) *(unsigned int*)(((char*)ptr)+scale*ofs[3]) = v[3]; + if (likely(mask[4])) *(unsigned int*)(((char*)ptr)+scale*ofs[4]) = v[4]; + if (likely(mask[5])) *(unsigned int*)(((char*)ptr)+scale*ofs[5]) = v[5]; + if (likely(mask[6])) *(unsigned int*)(((char*)ptr)+scale*ofs[6]) = v[6]; + if (likely(mask[7])) *(unsigned int*)(((char*)ptr)+scale*ofs[7]) = v[7]; } @@ -294,10 +298,6 @@ namespace embree return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(f), _mm256_castsi256_ps(t), m)); } - __forceinline vuint8 notand(const vboolf8& m, const vuint8& f) { - return _mm256_castps_si256(_mm256_andnot_ps(m, _mm256_castsi256_ps(f))); - } - //////////////////////////////////////////////////////////////////////////////// /// Movement/Shifting/Shuffling Functions @@ -335,7 +335,6 @@ namespace embree template<> __forceinline vuint8 shuffle<1, 1, 3, 3>(const vuint8& v) { return _mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(v))); } template<> __forceinline vuint8 shuffle<0, 1, 0, 1>(const vuint8& v) { return _mm256_castps_si256(_mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(_mm256_castsi256_ps(v))))); } - __forceinline vuint8 broadcast(const unsigned int* ptr) { return _mm256_castps_si256(_mm256_broadcast_ss((const float*)ptr)); } template<int i> __forceinline vuint8 insert4(const vuint8& a, const vuint4& b) { return _mm256_insertf128_si256(a, b, i); } template<int i> __forceinline vuint4 extract4(const vuint8& a) { return _mm256_extractf128_si256(a, i); } template<> __forceinline vuint4 extract4<0>(const vuint8& a) { return _mm256_castsi256_si128(a); } @@ -377,3 +376,11 @@ namespace embree return cout << "<" << a[0] << ", " << a[1] << ", " << a[2] << ", " << a[3] << ", " << a[4] << ", " << a[5] << ", " << a[6] << ", " << a[7] << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/simd/vuint8_avx2.h b/thirdparty/embree/common/simd/vuint8_avx2.h index b2a965448d..17b994522f 100644 --- a/thirdparty/embree-aarch64/common/simd/vuint8_avx2.h +++ b/thirdparty/embree/common/simd/vuint8_avx2.h @@ -1,8 +1,16 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once +#define vboolf vboolf_impl +#define vboold vboold_impl +#define vint vint_impl +#define vuint vuint_impl +#define vllong vllong_impl +#define vfloat vfloat_impl +#define vdouble vdouble_impl + namespace embree { /* 8-wide AVX integer type */ @@ -66,8 +74,8 @@ namespace embree /// Loads and Stores //////////////////////////////////////////////////////////////////////////////// - static __forceinline vuint8 load(const uint8_t* ptr) { return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } - static __forceinline vuint8 loadu(const uint8_t* ptr) { return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } + static __forceinline vuint8 load(const unsigned char* ptr) { return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } + static __forceinline vuint8 loadu(const unsigned char* ptr) { return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)ptr)); } static __forceinline vuint8 load(const unsigned short* ptr) { return _mm256_cvtepu16_epi32(_mm_load_si128((__m128i*)ptr)); } static __forceinline vuint8 loadu(const unsigned short* ptr) { return _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i*)ptr)); } @@ -107,7 +115,7 @@ namespace embree _mm256_stream_ps((float*)ptr,_mm256_castsi256_ps(v)); } - static __forceinline void store(uint8_t* ptr, const vuint8& i) + static __forceinline void store(unsigned char* ptr, const vuint8& i) { for (size_t j=0; j<8; j++) ptr[j] = i[j]; @@ -139,14 +147,14 @@ namespace embree #if defined(__AVX512VL__) _mm256_i32scatter_epi32((int*)ptr, ofs, v, scale); #else - *(unsigned int*)(((int8_t*)ptr) + scale * ofs[0]) = v[0]; - *(unsigned int*)(((int8_t*)ptr) + scale * ofs[1]) = v[1]; - *(unsigned int*)(((int8_t*)ptr) + scale * ofs[2]) = v[2]; - *(unsigned int*)(((int8_t*)ptr) + scale * ofs[3]) = v[3]; - *(unsigned int*)(((int8_t*)ptr) + scale * ofs[4]) = v[4]; - *(unsigned int*)(((int8_t*)ptr) + scale * ofs[5]) = v[5]; - *(unsigned int*)(((int8_t*)ptr) + scale * ofs[6]) = v[6]; - *(unsigned int*)(((int8_t*)ptr) + scale * ofs[7]) = v[7]; + *(unsigned int*)(((char*)ptr)+scale*ofs[0]) = v[0]; + *(unsigned int*)(((char*)ptr)+scale*ofs[1]) = v[1]; + *(unsigned int*)(((char*)ptr)+scale*ofs[2]) = v[2]; + *(unsigned int*)(((char*)ptr)+scale*ofs[3]) = v[3]; + *(unsigned int*)(((char*)ptr)+scale*ofs[4]) = v[4]; + *(unsigned int*)(((char*)ptr)+scale*ofs[5]) = v[5]; + *(unsigned int*)(((char*)ptr)+scale*ofs[6]) = v[6]; + *(unsigned int*)(((char*)ptr)+scale*ofs[7]) = v[7]; #endif } @@ -156,14 +164,14 @@ namespace embree #if defined(__AVX512VL__) _mm256_mask_i32scatter_epi32((int*)ptr, mask, ofs, v, scale); #else - if (likely(mask[0])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[0]) = v[0]; - if (likely(mask[1])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[1]) = v[1]; - if (likely(mask[2])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[2]) = v[2]; - if (likely(mask[3])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[3]) = v[3]; - if (likely(mask[4])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[4]) = v[4]; - if (likely(mask[5])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[5]) = v[5]; - if (likely(mask[6])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[6]) = v[6]; - if (likely(mask[7])) *(unsigned int*)(((int8_t*)ptr)+scale*ofs[7]) = v[7]; + if (likely(mask[0])) *(unsigned int*)(((char*)ptr)+scale*ofs[0]) = v[0]; + if (likely(mask[1])) *(unsigned int*)(((char*)ptr)+scale*ofs[1]) = v[1]; + if (likely(mask[2])) *(unsigned int*)(((char*)ptr)+scale*ofs[2]) = v[2]; + if (likely(mask[3])) *(unsigned int*)(((char*)ptr)+scale*ofs[3]) = v[3]; + if (likely(mask[4])) *(unsigned int*)(((char*)ptr)+scale*ofs[4]) = v[4]; + if (likely(mask[5])) *(unsigned int*)(((char*)ptr)+scale*ofs[5]) = v[5]; + if (likely(mask[6])) *(unsigned int*)(((char*)ptr)+scale*ofs[6]) = v[6]; + if (likely(mask[7])) *(unsigned int*)(((char*)ptr)+scale*ofs[7]) = v[7]; #endif } @@ -371,16 +379,12 @@ namespace embree template<> __forceinline vuint8 shuffle<1, 1, 3, 3>(const vuint8& v) { return _mm256_castps_si256(_mm256_movehdup_ps(_mm256_castsi256_ps(v))); } template<> __forceinline vuint8 shuffle<0, 1, 0, 1>(const vuint8& v) { return _mm256_castps_si256(_mm256_castpd_ps(_mm256_movedup_pd(_mm256_castps_pd(_mm256_castsi256_ps(v))))); } - __forceinline vuint8 broadcast(const unsigned int* ptr) { return _mm256_castps_si256(_mm256_broadcast_ss((const float*)ptr)); } - template<int i> __forceinline vuint8 insert4(const vuint8& a, const vuint4& b) { return _mm256_insertf128_si256(a, b, i); } template<int i> __forceinline vuint4 extract4(const vuint8& a) { return _mm256_extractf128_si256(a, i); } template<> __forceinline vuint4 extract4<0>(const vuint8& a) { return _mm256_castsi256_si128(a); } __forceinline int toScalar(const vuint8& v) { return _mm_cvtsi128_si32(_mm256_castsi256_si128(v)); } -#if !defined(__aarch64__) - __forceinline vuint8 permute(const vuint8& v, const __m256i& index) { return _mm256_permutevar8x32_epi32(v, index); } @@ -396,10 +400,7 @@ namespace embree #else return _mm256_alignr_epi8(a, b, 4*i); #endif - } - -#endif - + } //////////////////////////////////////////////////////////////////////////////// /// Reductions @@ -427,8 +428,6 @@ namespace embree //__forceinline size_t select_min(const vboolf8& valid, const vuint8& v) { const vuint8 a = select(valid,v,vuint8(pos_inf)); return bsf(movemask(valid & (a == vreduce_min(a)))); } //__forceinline size_t select_max(const vboolf8& valid, const vuint8& v) { const vuint8 a = select(valid,v,vuint8(neg_inf)); return bsf(movemask(valid & (a == vreduce_max(a)))); } - __forceinline vuint8 assign(const vuint4& a) { return _mm256_castsi128_si256(a); } - //////////////////////////////////////////////////////////////////////////////// /// Output Operators //////////////////////////////////////////////////////////////////////////////// @@ -437,3 +436,11 @@ namespace embree return cout << "<" << a[0] << ", " << a[1] << ", " << a[2] << ", " << a[3] << ", " << a[4] << ", " << a[5] << ", " << a[6] << ", " << a[7] << ">"; } } + +#undef vboolf +#undef vboold +#undef vint +#undef vuint +#undef vllong +#undef vfloat +#undef vdouble diff --git a/thirdparty/embree-aarch64/common/sys/alloc.cpp b/thirdparty/embree/common/sys/alloc.cpp index 12f143f131..abdd269069 100644 --- a/thirdparty/embree-aarch64/common/sys/alloc.cpp +++ b/thirdparty/embree/common/sys/alloc.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "alloc.h" @@ -23,7 +23,7 @@ namespace embree if (size != 0 && ptr == nullptr) // -- GODOT start -- // throw std::bad_alloc(); - abort(); + abort(); // -- GODOT end -- return ptr; diff --git a/thirdparty/embree-aarch64/common/sys/alloc.h b/thirdparty/embree/common/sys/alloc.h index 5898ecda70..4fa474ec1d 100644 --- a/thirdparty/embree-aarch64/common/sys/alloc.h +++ b/thirdparty/embree/common/sys/alloc.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/sys/array.h b/thirdparty/embree/common/sys/array.h index 77722a39f6..dd9190c52a 100644 --- a/thirdparty/embree-aarch64/common/sys/array.h +++ b/thirdparty/embree/common/sys/array.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -139,7 +139,7 @@ namespace embree __forceinline Ty& operator[](const unsigned i) { assert(i<N); return data[i]; } __forceinline const Ty& operator[](const unsigned i) const { assert(i<N); return data[i]; } -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) __forceinline Ty& operator[](const size_t i) { assert(i<N); return data[i]; } __forceinline const Ty& operator[](const size_t i) const { assert(i<N); return data[i]; } #endif @@ -196,7 +196,7 @@ namespace embree __forceinline Ty& operator[](const int i) { assert(i>=0 && i<max_total_elements); resize(i+1); return data[i]; } __forceinline Ty& operator[](const unsigned i) { assert(i<max_total_elements); resize(i+1); return data[i]; } -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) __forceinline Ty& operator[](const size_t i) { assert(i<max_total_elements); resize(i+1); return data[i]; } #endif diff --git a/thirdparty/embree-aarch64/common/sys/atomic.h b/thirdparty/embree/common/sys/atomic.h index ebfb8552c3..67af254f36 100644 --- a/thirdparty/embree-aarch64/common/sys/atomic.h +++ b/thirdparty/embree/common/sys/atomic.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/sys/barrier.cpp b/thirdparty/embree/common/sys/barrier.cpp index 0061d18db2..0c0e39d92d 100644 --- a/thirdparty/embree-aarch64/common/sys/barrier.cpp +++ b/thirdparty/embree/common/sys/barrier.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "barrier.h" diff --git a/thirdparty/embree-aarch64/common/sys/barrier.h b/thirdparty/embree/common/sys/barrier.h index 89607b8685..37fc036291 100644 --- a/thirdparty/embree-aarch64/common/sys/barrier.h +++ b/thirdparty/embree/common/sys/barrier.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/sys/condition.cpp b/thirdparty/embree/common/sys/condition.cpp index 0e7ca7af39..606a1d0b04 100644 --- a/thirdparty/embree-aarch64/common/sys/condition.cpp +++ b/thirdparty/embree/common/sys/condition.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "condition.h" @@ -40,19 +40,23 @@ namespace embree struct ConditionImplementation { __forceinline ConditionImplementation () { - pthread_cond_init(&cond,nullptr); + if (pthread_cond_init(&cond,nullptr) != 0) + THROW_RUNTIME_ERROR("pthread_cond_init failed"); } __forceinline ~ConditionImplementation() { - pthread_cond_destroy(&cond); - } + MAYBE_UNUSED bool ok = pthread_cond_destroy(&cond) == 0; + assert(ok); + } __forceinline void wait(MutexSys& mutex) { - pthread_cond_wait(&cond, (pthread_mutex_t*)mutex.mutex); + if (pthread_cond_wait(&cond, (pthread_mutex_t*)mutex.mutex) != 0) + THROW_RUNTIME_ERROR("pthread_cond_wait failed"); } __forceinline void notify_all() { - pthread_cond_broadcast(&cond); + if (pthread_cond_broadcast(&cond) != 0) + THROW_RUNTIME_ERROR("pthread_cond_broadcast failed"); } public: diff --git a/thirdparty/embree-aarch64/common/sys/condition.h b/thirdparty/embree/common/sys/condition.h index 7a3a05aa81..557c6e3482 100644 --- a/thirdparty/embree-aarch64/common/sys/condition.h +++ b/thirdparty/embree/common/sys/condition.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/sys/filename.cpp b/thirdparty/embree/common/sys/filename.cpp index 86182c1afb..f55b224302 100644 --- a/thirdparty/embree-aarch64/common/sys/filename.cpp +++ b/thirdparty/embree/common/sys/filename.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "filename.h" diff --git a/thirdparty/embree-aarch64/common/sys/filename.h b/thirdparty/embree/common/sys/filename.h index 58f881b14d..d5929cd836 100644 --- a/thirdparty/embree-aarch64/common/sys/filename.h +++ b/thirdparty/embree/common/sys/filename.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -73,7 +73,7 @@ namespace embree friend bool operator!=(const FileName& a, const FileName& b); /*! output operator */ - friend embree_ostream operator<<(embree_ostream cout, const FileName& filename); + friend std::ostream& operator<<(std::ostream& cout, const FileName& filename); private: std::string filename; diff --git a/thirdparty/embree-aarch64/common/sys/intrinsics.h b/thirdparty/embree/common/sys/intrinsics.h index 44cdbd8f0f..ed8dd7d40a 100644 --- a/thirdparty/embree-aarch64/common/sys/intrinsics.h +++ b/thirdparty/embree/common/sys/intrinsics.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -10,10 +10,7 @@ #endif #if defined(__ARM_NEON) -#include "../math/SSE2NEON.h" -#if defined(NEON_AVX2_EMULATION) -#include "../math/AVX2NEON.h" -#endif +#include "../simd/arm/emulation.h" #else #include <immintrin.h> #endif @@ -27,14 +24,6 @@ #endif #endif -#if defined(__aarch64__) -#if !defined(_lzcnt_u32) - #define _lzcnt_u32 __builtin_clz -#endif -#if !defined(_lzcnt_u32) - #define _lzcnt_u32 __builtin_clzll -#endif -#else #if defined(__LZCNT__) #if !defined(_lzcnt_u32) #define _lzcnt_u32 __lzcnt32 @@ -43,13 +32,16 @@ #define _lzcnt_u64 __lzcnt64 #endif #endif -#endif #if defined(__WIN32__) -# ifndef NOMINMAX -# define NOMINMAX -# endif -# include <windows.h> +// -- GODOT start -- +#if !defined(NOMINMAX) +// -- GODOT end -- +#define NOMINMAX +// -- GODOT start -- +#endif +#include "windows.h" +// -- GODOT end -- #endif /* normally defined in pmmintrin.h, but we always need this */ @@ -62,133 +54,133 @@ namespace embree { - + //////////////////////////////////////////////////////////////////////////////// /// Windows Platform //////////////////////////////////////////////////////////////////////////////// - + #if defined(__WIN32__) - - __forceinline size_t read_tsc() + + __forceinline size_t read_tsc() { LARGE_INTEGER li; QueryPerformanceCounter(&li); return (size_t)li.QuadPart; } - + __forceinline int bsf(int v) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return _tzcnt_u32(v); #else unsigned long r = 0; _BitScanForward(&r,v); return r; #endif } - + __forceinline unsigned bsf(unsigned v) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return _tzcnt_u32(v); #else unsigned long r = 0; _BitScanForward(&r,v); return r; #endif } - + #if defined(__X86_64__) __forceinline size_t bsf(size_t v) { -#if defined(__AVX2__) +#if defined(__AVX2__) return _tzcnt_u64(v); #else unsigned long r = 0; _BitScanForward64(&r,v); return r; #endif } #endif - - __forceinline int bscf(int& v) + + __forceinline int bscf(int& v) { int i = bsf(v); v &= v-1; return i; } - - __forceinline unsigned bscf(unsigned& v) + + __forceinline unsigned bscf(unsigned& v) { unsigned i = bsf(v); v &= v-1; return i; } - + #if defined(__X86_64__) - __forceinline size_t bscf(size_t& v) + __forceinline size_t bscf(size_t& v) { size_t i = bsf(v); v &= v-1; return i; } #endif - + __forceinline int bsr(int v) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return 31 - _lzcnt_u32(v); #else unsigned long r = 0; _BitScanReverse(&r,v); return r; #endif } - + __forceinline unsigned bsr(unsigned v) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return 31 - _lzcnt_u32(v); #else unsigned long r = 0; _BitScanReverse(&r,v); return r; #endif } - + #if defined(__X86_64__) __forceinline size_t bsr(size_t v) { -#if defined(__AVX2__) +#if defined(__AVX2__) return 63 -_lzcnt_u64(v); #else unsigned long r = 0; _BitScanReverse64(&r, v); return r; #endif } #endif - + __forceinline int lzcnt(const int x) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return _lzcnt_u32(x); #else if (unlikely(x == 0)) return 32; - return 31 - bsr(x); + return 31 - bsr(x); #endif } - + __forceinline int btc(int v, int i) { long r = v; _bittestandcomplement(&r,i); return r; } - + __forceinline int bts(int v, int i) { long r = v; _bittestandset(&r,i); return r; } - + __forceinline int btr(int v, int i) { long r = v; _bittestandreset(&r,i); return r; } - + #if defined(__X86_64__) - + __forceinline size_t btc(size_t v, size_t i) { size_t r = v; _bittestandcomplement64((__int64*)&r,i); return r; } - + __forceinline size_t bts(size_t v, size_t i) { __int64 r = v; _bittestandset64(&r,i); return r; } - + __forceinline size_t btr(size_t v, size_t i) { __int64 r = v; _bittestandreset64(&r,i); return r; } - + #endif - + __forceinline int32_t atomic_cmpxchg(volatile int32_t* p, const int32_t c, const int32_t v) { return _InterlockedCompareExchange((volatile long*)p,v,c); } @@ -196,174 +188,160 @@ namespace embree //////////////////////////////////////////////////////////////////////////////// /// Unix Platform //////////////////////////////////////////////////////////////////////////////// - + #else - + #if defined(__i386__) && defined(__PIC__) - - __forceinline void __cpuid(int out[4], int op) + + __forceinline void __cpuid(int out[4], int op) { asm volatile ("xchg{l}\t{%%}ebx, %1\n\t" "cpuid\n\t" "xchg{l}\t{%%}ebx, %1\n\t" - : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) - : "0"(op)); + : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "0"(op)); } - - __forceinline void __cpuid_count(int out[4], int op1, int op2) + + __forceinline void __cpuid_count(int out[4], int op1, int op2) { asm volatile ("xchg{l}\t{%%}ebx, %1\n\t" "cpuid\n\t" "xchg{l}\t{%%}ebx, %1\n\t" : "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3]) - : "0" (op1), "2" (op2)); + : "0" (op1), "2" (op2)); } - -#else + +#elif defined(__X86_ASM__) __forceinline void __cpuid(int out[4], int op) { -#if defined(__ARM_NEON) - if (op == 0) { // Get CPU name - out[0] = 0x41524d20; - out[1] = 0x41524d20; - out[2] = 0x41524d20; - out[3] = 0x41524d20; - } -#else - asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op)); -#endif + asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op)); } - -#if !defined(__ARM_NEON) + __forceinline void __cpuid_count(int out[4], int op1, int op2) { - asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2)); + asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2)); } + #endif - -#endif - + __forceinline uint64_t read_tsc() { -#if defined(__ARM_NEON) - return 0; // FIXME(LTE): mimic rdtsc -#else +#if defined(__X86_ASM__) uint32_t high,low; asm volatile ("rdtsc" : "=d"(high), "=a"(low)); return (((uint64_t)high) << 32) + (uint64_t)low; +#else + /* Not supported yet, meaning measuring traversal cost per pixel does not work. */ + return 0; #endif } - + __forceinline int bsf(int v) { -#if defined(__ARM_NEON) - return __builtin_ctz(v); -#else -#if defined(__AVX2__) +#if defined(__AVX2__) return _tzcnt_u32(v); -#else +#elif defined(__X86_ASM__) int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; -#endif +#else + return __builtin_ctz(v); #endif } - -#if defined(__X86_64__) || defined(__aarch64__) - __forceinline unsigned bsf(unsigned v) + +#if defined(__64BIT__) + __forceinline unsigned bsf(unsigned v) { -#if defined(__ARM_NEON) - return __builtin_ctz(v); -#else -#if defined(__AVX2__) +#if defined(__AVX2__) return _tzcnt_u32(v); -#else +#elif defined(__X86_ASM__) unsigned r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; -#endif +#else + return __builtin_ctz(v); #endif } #endif - + __forceinline size_t bsf(size_t v) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) #if defined(__X86_64__) return _tzcnt_u64(v); #else return _tzcnt_u32(v); #endif -#elif defined(__ARM_NEON) - return __builtin_ctzl(v); -#else +#elif defined(__X86_ASM__) size_t r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; +#else + return __builtin_ctzl(v); #endif } - __forceinline int bscf(int& v) + __forceinline int bscf(int& v) { int i = bsf(v); v &= v-1; return i; } - -#if defined(__X86_64__) || defined(__aarch64__) - __forceinline unsigned int bscf(unsigned int& v) + +#if defined(__64BIT__) + __forceinline unsigned int bscf(unsigned int& v) { unsigned int i = bsf(v); v &= v-1; return i; } #endif - - __forceinline size_t bscf(size_t& v) + + __forceinline size_t bscf(size_t& v) { size_t i = bsf(v); v &= v-1; return i; } - + __forceinline int bsr(int v) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return 31 - _lzcnt_u32(v); -#elif defined(__ARM_NEON) - return __builtin_clz(v)^31; -#else +#elif defined(__X86_ASM__) int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; +#else + return __builtin_clz(v) ^ 31; #endif } - -#if defined(__X86_64__) || defined(__aarch64__) + +#if defined(__64BIT__) __forceinline unsigned bsr(unsigned v) { -#if defined(__AVX2__) +#if defined(__AVX2__) return 31 - _lzcnt_u32(v); -#elif defined(__ARM_NEON) - return __builtin_clz(v)^31; -#else +#elif defined(__X86_ASM__) unsigned r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; +#else + return __builtin_clz(v) ^ 31; #endif } #endif - + __forceinline size_t bsr(size_t v) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) #if defined(__X86_64__) return 63 - _lzcnt_u64(v); #else return 31 - _lzcnt_u32(v); #endif -#elif defined(__aarch64__) - return (sizeof(v) * 8 - 1) - __builtin_clzl(v); -#else +#elif defined(__X86_ASM__) size_t r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; +#else + return (sizeof(v) * 8 - 1) - __builtin_clzl(v); #endif } - + __forceinline int lzcnt(const int x) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) return _lzcnt_u32(x); #else if (unlikely(x == 0)) return 32; - return 31 - bsr(x); + return 31 - bsr(x); #endif } __forceinline size_t blsr(size_t v) { -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) #if defined(__INTEL_COMPILER) return _blsr_u64(v); #else @@ -377,79 +355,65 @@ namespace embree return v & (v-1); #endif } - + __forceinline int btc(int v, int i) { -#if defined(__aarch64__) - // _bittestandcomplement(long *a, long b) { - // unsigned char x = (*a >> b) & 1; - // *a = *a ^ (1 << b); - // return x; - - // We only need `*a` - return (v ^ (1 << i)); -#else +#if defined(__X86_ASM__) int r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags" ); return r; +#else + return (v ^ (1 << i)); #endif } - + __forceinline int bts(int v, int i) { -#if defined(__aarch64__) - // _bittestandset(long *a, long b) { - // unsigned char x = (*a >> b) & 1; - // *a = *a | (1 << b); - // return x; - return (v | (v << i)); -#else +#if defined(__X86_ASM__) int r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; +#else + return (v | (v << i)); #endif } - + __forceinline int btr(int v, int i) { -#if defined(__aarch64__) - // _bittestandreset(long *a, long b) { - // unsigned char x = (*a >> b) & 1; - // *a = *a & ~(1 << b); - // return x; - return (v & ~(v << i)); -#else +#if defined(__X86_ASM__) int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; +#else + return (v & ~(v << i)); #endif } - + __forceinline size_t btc(size_t v, size_t i) { -#if defined(__aarch64__) - return (v ^ (1 << i)); -#else +#if defined(__X86_ASM__) size_t r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags" ); return r; +#else + return (v ^ (1 << i)); #endif } - + __forceinline size_t bts(size_t v, size_t i) { -#if defined(__aarch64__) - return (v | (v << i)); -#else +#if defined(__X86_ASM__) size_t r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; +#else + return (v | (v << i)); #endif } - + __forceinline size_t btr(size_t v, size_t i) { -#if defined(__ARM_NEON) - return (v & ~(v << i)); -#else +#if defined(__X86_ASM__) size_t r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; +#else + return (v & ~(v << i)); #endif } __forceinline int32_t atomic_cmpxchg(int32_t volatile* value, int32_t comparand, const int32_t input) { return __sync_val_compare_and_swap(value, comparand, input); } - + #endif - + //////////////////////////////////////////////////////////////////////////////// /// All Platforms //////////////////////////////////////////////////////////////////////////////// - + #if defined(__clang__) || defined(__GNUC__) #if !defined(_mm_undefined_ps) __forceinline __m128 _mm_undefined_ps() { return _mm_setzero_ps(); } @@ -471,39 +435,41 @@ namespace embree #endif #endif -#if defined(__SSE4_2__) || defined(__ARM_NEON) - +#if defined(__SSE4_2__) + __forceinline int popcnt(int in) { return _mm_popcnt_u32(in); } - + __forceinline unsigned popcnt(unsigned in) { return _mm_popcnt_u32(in); } - -#if defined(__X86_64__) || defined(__ARM_NEON) + +#if defined(__64BIT__) __forceinline size_t popcnt(size_t in) { return _mm_popcnt_u64(in); } #endif - + #endif +#if defined(__X86_ASM__) __forceinline uint64_t rdtsc() { - int dummy[4]; - __cpuid(dummy,0); - uint64_t clock = read_tsc(); - __cpuid(dummy,0); + int dummy[4]; + __cpuid(dummy,0); + uint64_t clock = read_tsc(); + __cpuid(dummy,0); return clock; } - +#endif + __forceinline void pause_cpu(const size_t N = 8) { for (size_t i=0; i<N; i++) - _mm_pause(); + _mm_pause(); } - + /* prefetches */ __forceinline void prefetchL1 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T0); } __forceinline void prefetchL2 (const void* ptr) { _mm_prefetch((const char*)ptr,_MM_HINT_T1); } @@ -513,18 +479,18 @@ namespace embree #if defined(__INTEL_COMPILER) _mm_prefetch((const char*)ptr,_MM_HINT_ET0); #else - _mm_prefetch((const char*)ptr,_MM_HINT_T0); + _mm_prefetch((const char*)ptr,_MM_HINT_T0); #endif } - __forceinline void prefetchL1EX(const void* ptr) { - prefetchEX(ptr); + __forceinline void prefetchL1EX(const void* ptr) { + prefetchEX(ptr); } - - __forceinline void prefetchL2EX(const void* ptr) { - prefetchEX(ptr); + + __forceinline void prefetchL2EX(const void* ptr) { + prefetchEX(ptr); } -#if defined(__AVX2__) && !defined(__aarch64__) +#if defined(__AVX2__) __forceinline unsigned int pext(unsigned int a, unsigned int b) { return _pext_u32(a, b); } __forceinline unsigned int pdep(unsigned int a, unsigned int b) { return _pdep_u32(a, b); } #if defined(__X86_64__) diff --git a/thirdparty/embree-aarch64/common/sys/library.cpp b/thirdparty/embree/common/sys/library.cpp index 899267a1e4..fc983dffd5 100644 --- a/thirdparty/embree-aarch64/common/sys/library.cpp +++ b/thirdparty/embree/common/sys/library.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "library.h" @@ -27,7 +27,7 @@ namespace embree /* returns address of a symbol from the library */ void* getSymbol(lib_t lib, const std::string& sym) { - return reinterpret_cast<void *>(GetProcAddress(HMODULE(lib),sym.c_str())); + return (void*)GetProcAddress(HMODULE(lib),sym.c_str()); } /* closes the shared library */ @@ -61,7 +61,7 @@ namespace embree lib = dlopen((executable.path() + fullName).c_str(),RTLD_NOW); if (lib == nullptr) { const char* error = dlerror(); - if (error) { + if (error) { THROW_RUNTIME_ERROR(error); } else { THROW_RUNTIME_ERROR("could not load library "+executable.str()); diff --git a/thirdparty/embree-aarch64/common/sys/library.h b/thirdparty/embree/common/sys/library.h index c2164e9fbe..67e14d2420 100644 --- a/thirdparty/embree-aarch64/common/sys/library.h +++ b/thirdparty/embree/common/sys/library.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/sys/mutex.cpp b/thirdparty/embree/common/sys/mutex.cpp index 11779bc9b9..789feaf2d8 100644 --- a/thirdparty/embree-aarch64/common/sys/mutex.cpp +++ b/thirdparty/embree/common/sys/mutex.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "mutex.h" @@ -36,7 +36,6 @@ namespace embree MAYBE_UNUSED bool ok = pthread_mutex_destroy((pthread_mutex_t*)mutex) == 0; assert(ok); delete (pthread_mutex_t*)mutex; - mutex = nullptr; } void MutexSys::lock() diff --git a/thirdparty/embree-aarch64/common/sys/mutex.h b/thirdparty/embree/common/sys/mutex.h index 1164210f23..4cb3626d92 100644 --- a/thirdparty/embree-aarch64/common/sys/mutex.h +++ b/thirdparty/embree/common/sys/mutex.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/sys/platform.h b/thirdparty/embree/common/sys/platform.h index 737f14aa6e..697e07bb86 100644 --- a/thirdparty/embree-aarch64/common/sys/platform.h +++ b/thirdparty/embree/common/sys/platform.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -23,9 +23,17 @@ /// detect platform //////////////////////////////////////////////////////////////////////////////// -/* detect 32 or 64 platform */ +/* detect 32 or 64 Intel platform */ #if defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) #define __X86_64__ +#define __X86_ASM__ +#elif defined(__i386__) || defined(_M_IX86) +#define __X86_ASM__ +#endif + +/* detect 64 bit platform */ +#if defined(__X86_64__) || defined(__aarch64__) +#define __64BIT__ #endif /* detect Linux platform */ @@ -88,10 +96,12 @@ #define dll_import __declspec(dllimport) #else #define dll_export __attribute__ ((visibility ("default"))) -#define dll_import +#define dll_import #endif -#ifdef __WIN32__ +// -- GODOT start -- +#if defined(__WIN32__) && !defined(__MINGW32__) +// -- GODOT end -- #if !defined(__noinline) #define __noinline __declspec(noinline) #endif @@ -103,16 +113,11 @@ #define __restrict__ //__restrict // causes issues with MSVC #endif #if !defined(__thread) -// NOTE: Require `-fms-extensions` for clang #define __thread __declspec(thread) #endif #if !defined(__aligned) -#if defined(__MINGW32__) -#define __aligned(...) __attribute__((aligned(__VA_ARGS__))) -#else #define __aligned(...) __declspec(align(__VA_ARGS__)) #endif -#endif //#define __FUNCTION__ __FUNCTION__ #define debugbreak() __debugbreak() @@ -147,7 +152,7 @@ #endif // -- GODOT start -- -#ifndef likely +#if !defined(likely) // -- GODOT end -- #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #define likely(expr) (expr) @@ -205,7 +210,7 @@ namespace embree { /* windows does not have ssize_t */ #if defined(__WIN32__) -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) typedef int64_t ssize_t; #else typedef int32_t ssize_t; @@ -329,7 +334,7 @@ __forceinline std::string toString(long long value) { /// Some macros for static profiling //////////////////////////////////////////////////////////////////////////////// -#if defined (__GNUC__) +#if defined (__GNUC__) #define IACA_SSC_MARK( MARK_ID ) \ __asm__ __volatile__ ( \ "\n\t movl $"#MARK_ID", %%ebx" \ @@ -368,7 +373,7 @@ namespace embree bool active; const Closure f; }; - + template <typename Closure> OnScopeExitHelper<Closure> OnScopeExit(const Closure f) { return OnScopeExitHelper<Closure>(f); diff --git a/thirdparty/embree-aarch64/common/sys/ref.h b/thirdparty/embree/common/sys/ref.h index 24648e6234..c2b56c1908 100644 --- a/thirdparty/embree-aarch64/common/sys/ref.h +++ b/thirdparty/embree/common/sys/ref.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/sys/regression.cpp b/thirdparty/embree/common/sys/regression.cpp index d95ff8dfe0..45315b1105 100644 --- a/thirdparty/embree-aarch64/common/sys/regression.cpp +++ b/thirdparty/embree/common/sys/regression.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "regression.h" diff --git a/thirdparty/embree-aarch64/common/sys/regression.h b/thirdparty/embree/common/sys/regression.h index 632f8d92cf..bb0bb94006 100644 --- a/thirdparty/embree-aarch64/common/sys/regression.h +++ b/thirdparty/embree/common/sys/regression.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/sys/string.cpp b/thirdparty/embree/common/sys/string.cpp index 931244383e..f42fdc8536 100644 --- a/thirdparty/embree-aarch64/common/sys/string.cpp +++ b/thirdparty/embree/common/sys/string.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "string.h" diff --git a/thirdparty/embree-aarch64/common/sys/string.h b/thirdparty/embree/common/sys/string.h index 2e9b0f88c3..820076b21c 100644 --- a/thirdparty/embree-aarch64/common/sys/string.h +++ b/thirdparty/embree/common/sys/string.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/sys/sysinfo.cpp b/thirdparty/embree/common/sys/sysinfo.cpp index 1d11436770..f1a59e511e 100644 --- a/thirdparty/embree-aarch64/common/sys/sysinfo.cpp +++ b/thirdparty/embree/common/sys/sysinfo.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "sysinfo.h" @@ -18,44 +18,32 @@ typedef cpuset_t cpu_set_t; namespace embree { NullTy null; - - std::string getPlatformName() + + std::string getPlatformName() { -#if defined(__LINUX__) && defined(__ANDROID__) && defined(__aarch64__) && defined(__ARM_NEON) - return "Android Linux (aarch64 / arm64)"; -#elif defined(__LINUX__) && defined(__ANDROID__) && defined(__X86_64__) - return "Android Linux (x64)"; -#elif defined(__LINUX__) && defined(__ANDROID__) && (defined(_X86_) || defined(__X86__) || defined(_M_IX86)) - return "Android Linux (x86)"; -#elif defined(__LINUX__) && !defined(__X86_64__) +#if defined(__LINUX__) && !defined(__64BIT__) return "Linux (32bit)"; -#elif defined(__LINUX__) && defined(__X86_64__) +#elif defined(__LINUX__) && defined(__64BIT__) return "Linux (64bit)"; -#elif defined(__FREEBSD__) && !defined(__X86_64__) +#elif defined(__FREEBSD__) && !defined(__64BIT__) return "FreeBSD (32bit)"; -#elif defined(__FREEBSD__) && defined(__X86_64__) +#elif defined(__FREEBSD__) && defined(__64BIT__) return "FreeBSD (64bit)"; -#elif defined(__CYGWIN__) && !defined(__X86_64__) +#elif defined(__CYGWIN__) && !defined(__64BIT__) return "Cygwin (32bit)"; -#elif defined(__CYGWIN__) && defined(__X86_64__) +#elif defined(__CYGWIN__) && defined(__64BIT__) return "Cygwin (64bit)"; -#elif defined(__WIN32__) && !defined(__X86_64__) +#elif defined(__WIN32__) && !defined(__64BIT__) return "Windows (32bit)"; -#elif defined(__WIN32__) && defined(__X86_64__) +#elif defined(__WIN32__) && defined(__64BIT__) return "Windows (64bit)"; -#elif defined(TARGET_IPHONE_SIMULATOR) && defined(__X86_64__) - return "iOS Simulator (x64)"; -#elif defined(TARGET_OS_IPHONE) && defined(__aarch64__) && defined(__ARM_NEON) - return "iOS (aarch64 / arm64)"; -#elif defined(__MACOSX__) && !defined(__X86_64__) +#elif defined(__MACOSX__) && !defined(__64BIT__) return "Mac OS X (32bit)"; -#elif defined(__MACOSX__) && defined(__X86_64__) +#elif defined(__MACOSX__) && defined(__64BIT__) return "Mac OS X (64bit)"; -#elif defined(__UNIX__) && defined(__aarch64__) - return "Unix (aarch64)"; -#elif defined(__UNIX__) && !defined(__X86_64__) +#elif defined(__UNIX__) && !defined(__64BIT__) return "Unix (32bit)"; -#elif defined(__UNIX__) && defined(__X86_64__) +#elif defined(__UNIX__) && defined(__64BIT__) return "Unix (64bit)"; #else return "Unknown"; @@ -91,21 +79,28 @@ namespace embree std::string getCPUVendor() { - int cpuinfo[4]; - __cpuid (cpuinfo, 0); +#if defined(__X86_ASM__) + int cpuinfo[4]; + __cpuid (cpuinfo, 0); int name[4]; name[0] = cpuinfo[1]; name[1] = cpuinfo[3]; name[2] = cpuinfo[2]; name[3] = 0; return (char*)name; +#elif defined(__ARM_NEON) + return "ARM"; +#else + return "Unknown"; +#endif } - CPU getCPUModel() + CPU getCPUModel() { +#if defined(__X86_ASM__) if (getCPUVendor() != "GenuineIntel") return CPU::UNKNOWN; - + int out[4]; __cpuid(out, 0); if (out[0] < 1) return CPU::UNKNOWN; @@ -169,6 +164,10 @@ namespace embree if (DisplayFamily_DisplayModel == 0x0685) return CPU::XEON_PHI_KNIGHTS_MILL; if (DisplayFamily_DisplayModel == 0x0657) return CPU::XEON_PHI_KNIGHTS_LANDING; +#elif defined(__ARM_NEON) + return CPU::ARM; +#endif + return CPU::UNKNOWN; } @@ -195,13 +194,13 @@ namespace embree case CPU::NEHALEM : return "Nehalem"; case CPU::CORE2 : return "Core2"; case CPU::CORE1 : return "Core"; - case CPU::ARM : return "Arm"; + case CPU::ARM : return "ARM"; case CPU::UNKNOWN : return "Unknown CPU"; } return "Unknown CPU (error)"; } -#if !defined(__ARM_NEON) +#if defined(__X86_ASM__) /* constants to access destination registers of CPUID instruction */ static const int EAX = 0; static const int EBX = 1; @@ -241,16 +240,17 @@ namespace embree static const int CPU_FEATURE_BIT_AVX512BW = 1 << 30; // AVX512BW (byte and word instructions) static const int CPU_FEATURE_BIT_AVX512VL = 1 << 31; // AVX512VL (vector length extensions) static const int CPU_FEATURE_BIT_AVX512IFMA = 1 << 21; // AVX512IFMA (integer fused multiple-add instructions) - + /* cpuid[eax=7,ecx=0].ecx */ static const int CPU_FEATURE_BIT_AVX512VBMI = 1 << 1; // AVX512VBMI (vector bit manipulation instructions) #endif -#if !defined(__ARM_NEON) - __noinline int64_t get_xcr0() +#if defined(__X86_ASM__) + __noinline int64_t get_xcr0() { - // https://github.com/opencv/opencv/blob/master/modules/core/src/system.cpp#L466 -#if defined (__WIN32__) && defined(_XCR_XFEATURE_ENABLED_MASK) +// -- GODOT start -- +#if defined (__WIN32__) && !defined (__MINGW32__) +// -- GODOT end -- int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32 xcr0 = _xgetbv(0); return xcr0; @@ -264,40 +264,19 @@ namespace embree int getCPUFeatures() { -#if defined(__ARM_NEON) - int cpu_features = CPU_FEATURE_NEON|CPU_FEATURE_SSE|CPU_FEATURE_SSE2; -#if defined(NEON_AVX2_EMULATION) - cpu_features |= CPU_FEATURE_SSE3|CPU_FEATURE_SSSE3|CPU_FEATURE_SSE42; - cpu_features |= CPU_FEATURE_XMM_ENABLED; - cpu_features |= CPU_FEATURE_YMM_ENABLED; - cpu_features |= CPU_FEATURE_SSE41 | CPU_FEATURE_RDRAND | CPU_FEATURE_F16C; - cpu_features |= CPU_FEATURE_POPCNT; - cpu_features |= CPU_FEATURE_AVX; - cpu_features |= CPU_FEATURE_AVX2; - cpu_features |= CPU_FEATURE_FMA3; - cpu_features |= CPU_FEATURE_LZCNT; - cpu_features |= CPU_FEATURE_BMI1; - cpu_features |= CPU_FEATURE_BMI2; - cpu_features |= CPU_FEATURE_NEON_2X; - - - -#endif - return cpu_features; - -#else +#if defined(__X86_ASM__) /* cache CPU features access */ static int cpu_features = 0; - if (cpu_features) + if (cpu_features) return cpu_features; /* get number of CPUID leaves */ - int cpuid_leaf0[4]; + int cpuid_leaf0[4]; __cpuid(cpuid_leaf0, 0x00000000); - unsigned nIds = cpuid_leaf0[EAX]; + unsigned nIds = cpuid_leaf0[EAX]; /* get number of extended CPUID leaves */ - int cpuid_leafe[4]; + int cpuid_leafe[4]; __cpuid(cpuid_leafe, 0x80000000); unsigned nExIds = cpuid_leafe[EAX]; @@ -329,7 +308,7 @@ namespace embree if (xmm_enabled) cpu_features |= CPU_FEATURE_XMM_ENABLED; if (ymm_enabled) cpu_features |= CPU_FEATURE_YMM_ENABLED; if (zmm_enabled) cpu_features |= CPU_FEATURE_ZMM_ENABLED; - + if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE ) cpu_features |= CPU_FEATURE_SSE; if (cpuid_leaf_1[EDX] & CPU_FEATURE_BIT_SSE2 ) cpu_features |= CPU_FEATURE_SSE2; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE3 ) cpu_features |= CPU_FEATURE_SSE3; @@ -337,8 +316,8 @@ namespace embree if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_1) cpu_features |= CPU_FEATURE_SSE41; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_SSE4_2) cpu_features |= CPU_FEATURE_SSE42; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_POPCNT) cpu_features |= CPU_FEATURE_POPCNT; + if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_AVX ) cpu_features |= CPU_FEATURE_AVX; - if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_F16C ) cpu_features |= CPU_FEATURE_F16C; if (cpuid_leaf_1[ECX] & CPU_FEATURE_BIT_RDRAND) cpu_features |= CPU_FEATURE_RDRAND; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX2 ) cpu_features |= CPU_FEATURE_AVX2; @@ -350,7 +329,7 @@ namespace embree if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512F ) cpu_features |= CPU_FEATURE_AVX512F; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512DQ ) cpu_features |= CPU_FEATURE_AVX512DQ; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512PF ) cpu_features |= CPU_FEATURE_AVX512PF; - if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512ER ) cpu_features |= CPU_FEATURE_AVX512ER; + if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512ER ) cpu_features |= CPU_FEATURE_AVX512ER; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512CD ) cpu_features |= CPU_FEATURE_AVX512CD; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512BW ) cpu_features |= CPU_FEATURE_AVX512BW; if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512IFMA) cpu_features |= CPU_FEATURE_AVX512IFMA; @@ -358,6 +337,12 @@ namespace embree if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI; return cpu_features; +#elif defined(__ARM_NEON) + /* emulated features with sse2neon */ + return CPU_FEATURE_SSE|CPU_FEATURE_SSE2|CPU_FEATURE_XMM_ENABLED; +#else + /* Unknown CPU. */ + return 0; #endif } @@ -391,11 +376,9 @@ namespace embree if (features & CPU_FEATURE_AVX512VL) str += "AVX512VL "; if (features & CPU_FEATURE_AVX512IFMA) str += "AVX512IFMA "; if (features & CPU_FEATURE_AVX512VBMI) str += "AVX512VBMI "; - if (features & CPU_FEATURE_NEON) str += "NEON "; - if (features & CPU_FEATURE_NEON_2X) str += "2xNEON "; return str; } - + std::string stringOfISA (int isa) { if (isa == SSE) return "SSE"; @@ -406,17 +389,14 @@ namespace embree if (isa == SSE42) return "SSE4.2"; if (isa == AVX) return "AVX"; if (isa == AVX2) return "AVX2"; - if (isa == AVX512KNL) return "AVX512KNL"; - if (isa == AVX512SKX) return "AVX512SKX"; - if (isa == NEON) return "NEON"; - if (isa == NEON_2X) return "2xNEON"; + if (isa == AVX512) return "AVX512"; return "UNKNOWN"; } bool hasISA(int features, int isa) { return (features & isa) == isa; } - + std::string supportedTargetList (int features) { std::string v; @@ -429,10 +409,7 @@ namespace embree if (hasISA(features,AVX)) v += "AVX "; if (hasISA(features,AVXI)) v += "AVXI "; if (hasISA(features,AVX2)) v += "AVX2 "; - if (hasISA(features,AVX512KNL)) v += "AVX512KNL "; - if (hasISA(features,AVX512SKX)) v += "AVX512SKX "; - if (hasISA(features,NEON)) v += "NEON "; - if (hasISA(features,NEON_2X)) v += "2xNEON "; + if (hasISA(features,AVX512)) v += "AVX512 "; return v; } } @@ -456,7 +433,7 @@ namespace embree return std::string(filename); } - unsigned int getNumberOfLogicalThreads() + unsigned int getNumberOfLogicalThreads() { static int nThreads = -1; if (nThreads != -1) return nThreads; @@ -467,11 +444,11 @@ namespace embree GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount"); GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc) GetProcAddress(hlib, "GetActiveProcessorCount"); - if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount) + if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount) { int groups = pGetActiveProcessorGroupCount(); int totalProcessors = 0; - for (int i = 0; i < groups; i++) + for (int i = 0; i < groups; i++) totalProcessors += pGetActiveProcessorCount(i); nThreads = totalProcessors; } @@ -485,7 +462,7 @@ namespace embree return nThreads; } - int getTerminalWidth() + int getTerminalWidth() { HANDLE handle = GetStdHandle(STD_OUTPUT_HANDLE); if (handle == INVALID_HANDLE_VALUE) return 80; @@ -495,7 +472,7 @@ namespace embree return info.dwSize.X; } - double getSeconds() + double getSeconds() { LARGE_INTEGER freq, val; QueryPerformanceFrequency(&freq); @@ -534,7 +511,7 @@ namespace embree namespace embree { - std::string getExecutableFileName() + std::string getExecutableFileName() { std::string pid = "/proc/" + toString(getpid()) + "/exe"; char buf[4096]; @@ -587,7 +564,7 @@ namespace embree size_t getVirtualMemoryBytes() { return 0; } - + size_t getResidentMemoryBytes() { return 0; } @@ -617,7 +594,7 @@ namespace embree size_t getVirtualMemoryBytes() { return 0; } - + size_t getResidentMemoryBytes() { return 0; } @@ -638,12 +615,15 @@ namespace embree namespace embree { - unsigned int getNumberOfLogicalThreads() + unsigned int getNumberOfLogicalThreads() { static int nThreads = -1; if (nThreads != -1) return nThreads; +// -- GODOT start -- +// #if defined(__MACOSX__) #if defined(__MACOSX__) || defined(__ANDROID__) +// -- GODOT end -- nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container assert(nThreads); #else @@ -651,12 +631,12 @@ namespace embree if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0) nThreads = CPU_COUNT(&set); #endif - + assert(nThreads); return nThreads; } - int getTerminalWidth() + int getTerminalWidth() { struct winsize info; if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &info) < 0) return 80; diff --git a/thirdparty/embree-aarch64/common/sys/sysinfo.h b/thirdparty/embree/common/sys/sysinfo.h index 8e313a59b3..72351d12e4 100644 --- a/thirdparty/embree-aarch64/common/sys/sysinfo.h +++ b/thirdparty/embree/common/sys/sysinfo.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -16,13 +16,9 @@ /* define isa namespace and ISA bitvector */ #if defined (__AVX512VL__) -# define isa avx512skx -# define ISA AVX512SKX -# define ISA_STR "AVX512SKX" -#elif defined (__AVX512F__) -# define isa avx512knl -# define ISA AVX512KNL -# define ISA_STR "AVX512KNL" +# define isa avx512 +# define ISA AVX512 +# define ISA_STR "AVX512" #elif defined (__AVX2__) # define isa avx2 # define ISA AVX2 @@ -59,12 +55,7 @@ # define isa sse # define ISA SSE # define ISA_STR "SSE" -#elif defined(__ARM_NEON) -// NOTE(LTE): Use sse2 for `isa` for the compatibility at the moment. -#define isa sse2 -#define ISA NEON -#define ISA_STR "NEON" -#else +#else #error Unknown ISA #endif @@ -120,7 +111,7 @@ namespace embree static const int CPU_FEATURE_SSE3 = 1 << 2; static const int CPU_FEATURE_SSSE3 = 1 << 3; static const int CPU_FEATURE_SSE41 = 1 << 4; - static const int CPU_FEATURE_SSE42 = 1 << 5; + static const int CPU_FEATURE_SSE42 = 1 << 5; static const int CPU_FEATURE_POPCNT = 1 << 6; static const int CPU_FEATURE_AVX = 1 << 7; static const int CPU_FEATURE_F16C = 1 << 8; @@ -131,7 +122,7 @@ namespace embree static const int CPU_FEATURE_BMI1 = 1 << 13; static const int CPU_FEATURE_BMI2 = 1 << 14; static const int CPU_FEATURE_AVX512F = 1 << 16; - static const int CPU_FEATURE_AVX512DQ = 1 << 17; + static const int CPU_FEATURE_AVX512DQ = 1 << 17; static const int CPU_FEATURE_AVX512PF = 1 << 18; static const int CPU_FEATURE_AVX512ER = 1 << 19; static const int CPU_FEATURE_AVX512CD = 1 << 20; @@ -142,9 +133,7 @@ namespace embree static const int CPU_FEATURE_XMM_ENABLED = 1 << 25; static const int CPU_FEATURE_YMM_ENABLED = 1 << 26; static const int CPU_FEATURE_ZMM_ENABLED = 1 << 27; - static const int CPU_FEATURE_NEON = 1 << 28; - static const int CPU_FEATURE_NEON_2X = 1 << 29; - + /*! get CPU features */ int getCPUFeatures(); @@ -155,7 +144,7 @@ namespace embree std::string supportedTargetList (int isa); /*! ISAs */ - static const int SSE = CPU_FEATURE_SSE | CPU_FEATURE_XMM_ENABLED; + static const int SSE = CPU_FEATURE_SSE | CPU_FEATURE_XMM_ENABLED; static const int SSE2 = SSE | CPU_FEATURE_SSE2; static const int SSE3 = SSE2 | CPU_FEATURE_SSE3; static const int SSSE3 = SSE3 | CPU_FEATURE_SSSE3; @@ -164,10 +153,7 @@ namespace embree static const int AVX = SSE42 | CPU_FEATURE_AVX | CPU_FEATURE_YMM_ENABLED; static const int AVXI = AVX | CPU_FEATURE_F16C | CPU_FEATURE_RDRAND; static const int AVX2 = AVXI | CPU_FEATURE_AVX2 | CPU_FEATURE_FMA3 | CPU_FEATURE_BMI1 | CPU_FEATURE_BMI2 | CPU_FEATURE_LZCNT; - static const int AVX512KNL = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512PF | CPU_FEATURE_AVX512ER | CPU_FEATURE_AVX512CD | CPU_FEATURE_ZMM_ENABLED; - static const int AVX512SKX = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512DQ | CPU_FEATURE_AVX512CD | CPU_FEATURE_AVX512BW | CPU_FEATURE_AVX512VL | CPU_FEATURE_ZMM_ENABLED; - static const int NEON = CPU_FEATURE_NEON | CPU_FEATURE_SSE | CPU_FEATURE_SSE2; - static const int NEON_2X = CPU_FEATURE_NEON_2X | AVX2; + static const int AVX512 = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512DQ | CPU_FEATURE_AVX512CD | CPU_FEATURE_AVX512BW | CPU_FEATURE_AVX512VL | CPU_FEATURE_ZMM_ENABLED; /*! converts ISA bitvector into a string */ std::string stringOfISA(int features); diff --git a/thirdparty/embree-aarch64/common/sys/thread.cpp b/thirdparty/embree/common/sys/thread.cpp index f9ea5b7d96..f4014be89b 100644 --- a/thirdparty/embree-aarch64/common/sys/thread.cpp +++ b/thirdparty/embree/common/sys/thread.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "thread.h" @@ -7,7 +7,7 @@ #include <iostream> #if defined(__ARM_NEON) -#include "../math/SSE2NEON.h" +#include "../simd/arm/emulation.h" #else #include <xmmintrin.h> #endif @@ -39,7 +39,7 @@ namespace embree GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount"); SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity"); SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx"); - if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx) + if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx) { int groups = pGetActiveProcessorGroupCount(); int totalProcessors = 0, group = 0, number = 0; @@ -52,7 +52,7 @@ namespace embree } totalProcessors += processors; } - + GROUP_AFFINITY groupAffinity; groupAffinity.Group = (WORD)group; groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number); @@ -61,15 +61,15 @@ namespace embree groupAffinity.Reserved[2] = 0; if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr)) WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning - + PROCESSOR_NUMBER processorNumber; processorNumber.Group = group; processorNumber.Number = number; processorNumber.Reserved = 0; if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr)) WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning - } - else + } + else { if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity))) WARNING("SetThreadAffinityMask failed"); // on purpose only a warning @@ -83,10 +83,10 @@ namespace embree setAffinity(GetCurrentThread(), affinity); } - struct ThreadStartupData + struct ThreadStartupData { public: - ThreadStartupData (thread_func f, void* arg) + ThreadStartupData (thread_func f, void* arg) : f(f), arg(arg) {} public: thread_func f; @@ -99,7 +99,6 @@ namespace embree _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); parg->f(parg->arg); delete parg; - parg = nullptr; return 0; } @@ -125,6 +124,12 @@ namespace embree CloseHandle(HANDLE(tid)); } + /*! destroy a hardware thread by its handle */ + void destroyThread(thread_t tid) { + TerminateThread(HANDLE(tid),0); + CloseHandle(HANDLE(tid)); + } + /*! creates thread local storage */ tls_t createTls() { return tls_t(size_t(TlsAlloc())); @@ -153,27 +158,24 @@ namespace embree /// Linux Platform //////////////////////////////////////////////////////////////////////////////// -#if defined(__LINUX__) +// -- GODOT start -- +#if defined(__LINUX__) && !defined(__ANDROID__) +// -- GODOT end -- #include <fstream> #include <sstream> #include <algorithm> -#if defined(__ANDROID__) -#include <pthread.h> -#endif - namespace embree { static MutexSys mutex; static std::vector<size_t> threadIDs; - -#if !defined(__ANDROID__) // TODO(LTE): Implement for Android target + /* changes thread ID mapping such that we first fill up all thread on one core */ size_t mapThreadID(size_t threadID) { Lock<MutexSys> lock(mutex); - + if (threadIDs.size() == 0) { /* parse thread/CPU topology */ @@ -185,11 +187,11 @@ namespace embree if (fs.fail()) break; int i; - while (fs >> i) + while (fs >> i) { if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; })) threadIDs.push_back(i); - if (fs.peek() == ',') + if (fs.peek() == ',') fs.ignore(); } fs.close(); @@ -233,24 +235,41 @@ namespace embree return ID; } -#endif /*! set affinity of the calling thread */ void setAffinity(ssize_t affinity) { -#if defined(__ANDROID__) - // TODO(LTE): Implement -#else cpu_set_t cset; CPU_ZERO(&cset); size_t threadID = mapThreadID(affinity); CPU_SET(threadID, &cset); pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset); + } +} #endif + +// -- GODOT start -- +//////////////////////////////////////////////////////////////////////////////// +/// Android Platform +//////////////////////////////////////////////////////////////////////////////// + +#if defined(__ANDROID__) + +namespace embree +{ + /*! set affinity of the calling thread */ + void setAffinity(ssize_t affinity) + { + cpu_set_t cset; + CPU_ZERO(&cset); + CPU_SET(affinity, &cset); + + sched_setaffinity(0, sizeof(cset), &cset); } } #endif +// -- GODOT end -- //////////////////////////////////////////////////////////////////////////////// /// FreeBSD Platform @@ -289,10 +308,14 @@ namespace embree /*! set affinity of the calling thread */ void setAffinity(ssize_t affinity) { +#if !defined(__ARM_NEON) // affinity seems not supported on M1 chip + thread_affinity_policy ap; ap.affinity_tag = affinity; if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) WARNING("setting thread affinity failed"); // on purpose only a warning + +#endif } } #endif @@ -312,21 +335,21 @@ namespace embree namespace embree { - struct ThreadStartupData + struct ThreadStartupData { public: - ThreadStartupData (thread_func f, void* arg, int affinity) + ThreadStartupData (thread_func f, void* arg, int affinity) : f(f), arg(arg), affinity(affinity) {} - public: + public: thread_func f; void* arg; ssize_t affinity; }; - + static void* threadStartup(ThreadStartupData* parg) { _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); - + /*! Mac OS X does not support setting affinity at thread creation time */ #if defined(__MACOSX__) if (parg->affinity >= 0) @@ -335,7 +358,6 @@ namespace embree parg->f(parg->arg); delete parg; - parg = nullptr; return nullptr; } @@ -351,13 +373,15 @@ namespace embree pthread_t* tid = new pthread_t; if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) { pthread_attr_destroy(&attr); - delete tid; + delete tid; FATAL("pthread_create failed"); } pthread_attr_destroy(&attr); /* set affinity */ +// -- GODOT start -- #if defined(__LINUX__) && !defined(__ANDROID__) +// -- GODOT end -- if (threadID >= 0) { cpu_set_t cset; CPU_ZERO(&cset); @@ -372,7 +396,16 @@ namespace embree CPU_SET(threadID, &cset); pthread_setaffinity_np(*tid, sizeof(cset), &cset); } +// -- GODOT start -- +#elif defined(__ANDROID__) + if (threadID >= 0) { + cpu_set_t cset; + CPU_ZERO(&cset); + CPU_SET(threadID, &cset); + sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset); + } #endif +// -- GODOT end -- return thread_t(tid); } @@ -389,8 +422,20 @@ namespace embree delete (pthread_t*)tid; } + /*! destroy a hardware thread by its handle */ + void destroyThread(thread_t tid) { +// -- GODOT start -- +#if defined(__ANDROID__) + FATAL("Can't destroy threads on Android."); +#else + pthread_cancel(*(pthread_t*)tid); + delete (pthread_t*)tid; +#endif +// -- GODOT end -- + } + /*! creates thread local storage */ - tls_t createTls() + tls_t createTls() { pthread_key_t* key = new pthread_key_t; if (pthread_key_create(key,nullptr) != 0) { @@ -402,14 +447,14 @@ namespace embree } /*! return the thread local storage pointer */ - void* getTls(tls_t tls) + void* getTls(tls_t tls) { assert(tls); return pthread_getspecific(*(pthread_key_t*)tls); } /*! set the thread local storage pointer */ - void setTls(tls_t tls, void* const ptr) + void setTls(tls_t tls, void* const ptr) { assert(tls); if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0) @@ -417,7 +462,7 @@ namespace embree } /*! destroys thread local storage identifier */ - void destroyTls(tls_t tls) + void destroyTls(tls_t tls) { assert(tls); if (pthread_key_delete(*(pthread_key_t*)tls) != 0) diff --git a/thirdparty/embree-aarch64/common/sys/thread.h b/thirdparty/embree/common/sys/thread.h index 45da6e6a70..92a10d5c5d 100644 --- a/thirdparty/embree-aarch64/common/sys/thread.h +++ b/thirdparty/embree/common/sys/thread.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -29,6 +29,9 @@ namespace embree /*! waits until the given thread has terminated */ void join(thread_t tid); + /*! destroy handle of a thread */ + void destroyThread(thread_t tid); + /*! type for handle to thread local storage */ typedef struct opaque_tls_t* tls_t; diff --git a/thirdparty/embree-aarch64/common/sys/vector.h b/thirdparty/embree/common/sys/vector.h index e41794de7c..f832626789 100644 --- a/thirdparty/embree-aarch64/common/sys/vector.h +++ b/thirdparty/embree/common/sys/vector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/tasking/taskscheduler.h b/thirdparty/embree/common/tasking/taskscheduler.h index 9940e068d0..8f3dd87689 100644 --- a/thirdparty/embree-aarch64/common/tasking/taskscheduler.h +++ b/thirdparty/embree/common/tasking/taskscheduler.h @@ -1,12 +1,10 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #if defined(TASKING_INTERNAL) # include "taskschedulerinternal.h" -#elif defined(TASKING_GCD) && defined(BUILD_IOS) -# include "taskschedulergcd.h" #elif defined(TASKING_TBB) # include "taskschedulertbb.h" #elif defined(TASKING_PPL) diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.cpp b/thirdparty/embree/common/tasking/taskschedulerinternal.cpp index ebf656d1a0..ad438588a3 100644 --- a/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.cpp +++ b/thirdparty/embree/common/tasking/taskschedulerinternal.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "taskschedulerinternal.h" @@ -154,12 +154,6 @@ namespace embree assert(newNumThreads); newNumThreads = min(newNumThreads, (size_t) getNumberOfLogicalThreads()); - // We are observing a few % gain by increasing number threads by 2 on aarch64. -#if defined(__aarch64__) && defined(BUILD_IOS) - numThreads = newNumThreads*2; -#else - numThreads = newNumThreads; -#endif numThreads = newNumThreads; if (!startThreads && !running) return; running = true; @@ -382,10 +376,10 @@ namespace embree yield(); #endif } - // -- GODOT start -- - // return except; - return; - // -- GODOT end -- + // -- GODOT start -- + // return except; + return; + // -- GODOT end -- } bool TaskScheduler::steal_from_other_threads(Thread& thread) diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.h b/thirdparty/embree/common/tasking/taskschedulerinternal.h index 8bd70b2b8c..8fa6bb12fa 100644 --- a/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.h +++ b/thirdparty/embree/common/tasking/taskschedulerinternal.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -135,16 +135,15 @@ namespace embree __forceinline void push_right(Thread& thread, const size_t size, const Closure& closure) { if (right >= TASK_STACK_SIZE) - // -- GODOT start -- - // throw std::runtime_error("task stack overflow"); - abort(); - // -- GODOT end -- + // -- GODOT start -- + // throw std::runtime_error("task stack overflow"); + abort(); + // -- GODOT end -- /* allocate new task on right side of stack */ size_t oldStackPtr = stackPtr; TaskFunction* func = new (alloc(sizeof(ClosureTaskFunction<Closure>))) ClosureTaskFunction<Closure>(closure); - /* gcc 8 or later fails to compile without explicit .load() */ - new (&(tasks[right.load()])) Task(func,thread.task,oldStackPtr,size); + new (&tasks[right]) Task(func,thread.task,oldStackPtr,size); right++; /* also move left pointer */ diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulerppl.h b/thirdparty/embree/common/tasking/taskschedulerppl.h index 776f98cdac..cbc2ecdbb8 100644 --- a/thirdparty/embree-aarch64/common/tasking/taskschedulerppl.h +++ b/thirdparty/embree/common/tasking/taskschedulerppl.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulertbb.h b/thirdparty/embree/common/tasking/taskschedulertbb.h index 98dba26871..35bd49849f 100644 --- a/thirdparty/embree-aarch64/common/tasking/taskschedulertbb.h +++ b/thirdparty/embree/common/tasking/taskschedulertbb.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -12,7 +12,13 @@ #include "../sys/ref.h" #if defined(__WIN32__) +// -- GODOT start -- +#if !defined(NOMINMAX) +// -- GODOT end -- # define NOMINMAX +// -- GODOT start -- +#endif +// -- GODOT end -- #endif // We need to define these to avoid implicit linkage against diff --git a/thirdparty/embree-aarch64/include/embree3/rtcore.h b/thirdparty/embree/include/embree3/rtcore.h index 5830bb5880..450ab4c535 100644 --- a/thirdparty/embree-aarch64/include/embree3/rtcore.h +++ b/thirdparty/embree/include/embree3/rtcore.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/include/embree3/rtcore_buffer.h b/thirdparty/embree/include/embree3/rtcore_buffer.h index 400b604aa5..6b8eba9769 100644 --- a/thirdparty/embree-aarch64/include/embree3/rtcore_buffer.h +++ b/thirdparty/embree/include/embree3/rtcore_buffer.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/include/embree3/rtcore_builder.h b/thirdparty/embree/include/embree3/rtcore_builder.h index d62a7f72cc..4bff999fed 100644 --- a/thirdparty/embree-aarch64/include/embree3/rtcore_builder.h +++ b/thirdparty/embree/include/embree3/rtcore_builder.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/include/embree3/rtcore_common.h b/thirdparty/embree/include/embree3/rtcore_common.h index 890e06faa3..4857e1e05e 100644 --- a/thirdparty/embree-aarch64/include/embree3/rtcore_common.h +++ b/thirdparty/embree/include/embree3/rtcore_common.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -19,7 +19,9 @@ typedef int ssize_t; #endif #endif -#if defined(_WIN32) && !defined(__MINGW32__) +// -- GODOT start -- +#if defined(_WIN32) && defined(_MSC_VER) +// -- GODOT end -- # define RTC_ALIGN(...) __declspec(align(__VA_ARGS__)) #else # define RTC_ALIGN(...) __attribute__((aligned(__VA_ARGS__))) @@ -35,7 +37,7 @@ typedef int ssize_t; #endif #endif -#if defined(_WIN32) +#if defined(_WIN32) # define RTC_FORCEINLINE __forceinline #else # define RTC_FORCEINLINE inline __attribute__((always_inline)) @@ -224,13 +226,13 @@ RTC_FORCEINLINE void rtcInitIntersectContext(struct RTCIntersectContext* context } /* Point query structure for closest point query */ -struct RTC_ALIGN(16) RTCPointQuery +struct RTC_ALIGN(16) RTCPointQuery { float x; // x coordinate of the query point float y; // y coordinate of the query point float z; // z coordinate of the query point float time; // time of the point query - float radius; // radius of the point query + float radius; // radius of the point query }; /* Structure of a packet of 4 query points */ @@ -250,7 +252,7 @@ struct RTC_ALIGN(32) RTCPointQuery8 float y[8]; // y coordinate of the query point float z[8]; // z coordinate of the query point float time[8]; // time of the point query - float radius[8]; // radius ofr the point query + float radius[8]; // radius ofr the point query }; /* Structure of a packet of 16 query points */ @@ -269,11 +271,11 @@ struct RTC_ALIGN(16) RTCPointQueryContext { // accumulated 4x4 column major matrices from world space to instance space. // undefined if size == 0. - float world2inst[RTC_MAX_INSTANCE_LEVEL_COUNT][16]; + float world2inst[RTC_MAX_INSTANCE_LEVEL_COUNT][16]; // accumulated 4x4 column major matrices from instance space to world space. // undefined if size == 0. - float inst2world[RTC_MAX_INSTANCE_LEVEL_COUNT][16]; + float inst2world[RTC_MAX_INSTANCE_LEVEL_COUNT][16]; // instance ids. unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; @@ -301,13 +303,13 @@ struct RTC_ALIGN(16) RTCPointQueryFunctionArguments void* userPtr; // primitive and geometry ID of primitive - unsigned int primID; - unsigned int geomID; + unsigned int primID; + unsigned int geomID; // the context with transformation and instance ID stack struct RTCPointQueryContext* context; - // If the current instance transform M (= context->world2inst[context->instStackSize]) + // If the current instance transform M (= context->world2inst[context->instStackSize]) // is a similarity matrix, i.e there is a constant factor similarityScale such that, // for all x,y: dist(Mx, My) = similarityScale * dist(x, y), // The similarity scale is 0, if the current instance transform is not a @@ -322,5 +324,5 @@ struct RTC_ALIGN(16) RTCPointQueryFunctionArguments }; typedef bool (*RTCPointQueryFunction)(struct RTCPointQueryFunctionArguments* args); - + RTC_NAMESPACE_END diff --git a/thirdparty/embree-aarch64/include/embree3/rtcore_config.h b/thirdparty/embree/include/embree3/rtcore_config.h index 337d4e9487..3a9819c9f1 100644 --- a/thirdparty/embree-aarch64/include/embree3/rtcore_config.h +++ b/thirdparty/embree/include/embree3/rtcore_config.h @@ -1,14 +1,14 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #define RTC_VERSION_MAJOR 3 -#define RTC_VERSION_MINOR 12 -#define RTC_VERSION_PATCH 1 -#define RTC_VERSION 31201 -#define RTC_VERSION_STRING "3.12.1" +#define RTC_VERSION_MINOR 13 +#define RTC_VERSION_PATCH 0 +#define RTC_VERSION 31300 +#define RTC_VERSION_STRING "3.13.0" #define RTC_MAX_INSTANCE_LEVEL_COUNT 1 diff --git a/thirdparty/embree-aarch64/include/embree3/rtcore_device.h b/thirdparty/embree/include/embree3/rtcore_device.h index 594e2b755d..2dd3047603 100644 --- a/thirdparty/embree-aarch64/include/embree3/rtcore_device.h +++ b/thirdparty/embree/include/embree3/rtcore_device.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/include/embree3/rtcore_geometry.h b/thirdparty/embree/include/embree3/rtcore_geometry.h index c70f1b0e5c..d1de17491c 100644 --- a/thirdparty/embree-aarch64/include/embree3/rtcore_geometry.h +++ b/thirdparty/embree/include/embree3/rtcore_geometry.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/include/embree3/rtcore_quaternion.h b/thirdparty/embree/include/embree3/rtcore_quaternion.h index 449cdedfdc..6489fa3467 100644 --- a/thirdparty/embree-aarch64/include/embree3/rtcore_quaternion.h +++ b/thirdparty/embree/include/embree3/rtcore_quaternion.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/include/embree3/rtcore_ray.h b/thirdparty/embree/include/embree3/rtcore_ray.h index 1ae3309ef1..a2ee6dabbb 100644 --- a/thirdparty/embree-aarch64/include/embree3/rtcore_ray.h +++ b/thirdparty/embree/include/embree3/rtcore_ray.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/include/embree3/rtcore_scene.h b/thirdparty/embree/include/embree3/rtcore_scene.h index 0cd6401593..5878a3d402 100644 --- a/thirdparty/embree-aarch64/include/embree3/rtcore_scene.h +++ b/thirdparty/embree/include/embree3/rtcore_scene.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h b/thirdparty/embree/kernels/builders/bvh_builder_hair.h index 755ce255fb..d83e8918a1 100644 --- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_hair.h +++ b/thirdparty/embree/kernels/builders/bvh_builder_hair.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h b/thirdparty/embree/kernels/builders/bvh_builder_morton.h index 92be2f7e65..8f21e3254f 100644 --- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_morton.h +++ b/thirdparty/embree/kernels/builders/bvh_builder_morton.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h b/thirdparty/embree/kernels/builders/bvh_builder_msmblur.h index 4c138dacdb..f9a08d65cd 100644 --- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur.h +++ b/thirdparty/embree/kernels/builders/bvh_builder_msmblur.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -297,7 +297,7 @@ namespace embree if (object_split_sah < 0.50f*leaf_sah) return object_split; - /* do temporal splits only if the the time range is big enough */ + /* do temporal splits only if the time range is big enough */ if (set.time_range.size() > 1.01f/float(set.max_num_time_segments)) { const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize); diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h b/thirdparty/embree/kernels/builders/bvh_builder_msmblur_hair.h index e477c313a3..397e8636b1 100644 --- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_msmblur_hair.h +++ b/thirdparty/embree/kernels/builders/bvh_builder_msmblur_hair.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h b/thirdparty/embree/kernels/builders/bvh_builder_sah.h index 3f7e678a10..fff4bf2a35 100644 --- a/thirdparty/embree-aarch64/kernels/builders/bvh_builder_sah.h +++ b/thirdparty/embree/kernels/builders/bvh_builder_sah.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -43,7 +43,7 @@ namespace embree { if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor; if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth; - if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(static_cast<size_t>(settings.sahBlockSize)); + if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(settings.sahBlockSize); if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize; if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize; if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost )) travCost = settings.traversalCost; diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h b/thirdparty/embree/kernels/builders/heuristic_binning.h index a4d3b68e46..ee29d09ac9 100644 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning.h +++ b/thirdparty/embree/kernels/builders/heuristic_binning.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -444,482 +444,6 @@ namespace embree BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins }; - -#if defined(__AVX512ER__) // KNL - - /*! mapping into bins */ - template<> - struct BinMapping<16> - { - public: - __forceinline BinMapping() {} - - /*! calculates the mapping */ - template<typename PrimInfo> - __forceinline BinMapping(const PrimInfo& pinfo) - { - num = 16; - const vfloat4 eps = 1E-34f; - const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size()); - scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f)); - ofs = (vfloat4) pinfo.centBounds.lower; - scale16 = scale; - ofs16 = ofs; - } - - /*! returns number of bins */ - __forceinline size_t size() const { return num; } - - __forceinline vint16 bin16(const Vec3fa& p) const { - return vint16(vint4(floori((vfloat4(p)-ofs)*scale))); - } - - __forceinline vint16 bin16(const vfloat16& p) const { - return floori((p-ofs16)*scale16); - } - - __forceinline int bin_unsafe(const PrimRef& ref, - const vint16& vSplitPos, - const vbool16& splitDimMask) const // FIXME: rename to isLeft - { - const vfloat16 lower(*(vfloat4*)&ref.lower); - const vfloat16 upper(*(vfloat4*)&ref.upper); - const vfloat16 p = lower + upper; - const vint16 i = floori((p-ofs16)*scale16); - return lt(splitDimMask,i,vSplitPos); - } - - /*! returns true if the mapping is invalid in some dimension */ - __forceinline bool invalid(const size_t dim) const { - return scale[dim] == 0.0f; - } - - public: - size_t num; - vfloat4 ofs,scale; //!< linear function that maps to bin ID - vfloat16 ofs16,scale16; //!< linear function that maps to bin ID - }; - - /* 16 bins in-register binner */ - template<typename PrimRef> - struct __aligned(64) BinInfoT<16,PrimRef,BBox3fa> - { - typedef BinSplit<16> Split; - typedef vbool16 vbool; - typedef vint16 vint; - typedef vfloat16 vfloat; - - __forceinline BinInfoT() { - } - - __forceinline BinInfoT(EmptyTy) { - clear(); - } - - /*! clears the bin info */ - __forceinline void clear() - { - lower[0] = lower[1] = lower[2] = pos_inf; - upper[0] = upper[1] = upper[2] = neg_inf; - count[0] = count[1] = count[2] = 0; - } - - - static __forceinline vfloat16 prefix_area_rl(const vfloat16 min_x, - const vfloat16 min_y, - const vfloat16 min_z, - const vfloat16 max_x, - const vfloat16 max_y, - const vfloat16 max_z) - { - const vfloat16 r_min_x = reverse_prefix_min(min_x); - const vfloat16 r_min_y = reverse_prefix_min(min_y); - const vfloat16 r_min_z = reverse_prefix_min(min_z); - const vfloat16 r_max_x = reverse_prefix_max(max_x); - const vfloat16 r_max_y = reverse_prefix_max(max_y); - const vfloat16 r_max_z = reverse_prefix_max(max_z); - const vfloat16 dx = r_max_x - r_min_x; - const vfloat16 dy = r_max_y - r_min_y; - const vfloat16 dz = r_max_z - r_min_z; - const vfloat16 area_rl = madd(dx,dy,madd(dx,dz,dy*dz)); - return area_rl; - } - - static __forceinline vfloat16 prefix_area_lr(const vfloat16 min_x, - const vfloat16 min_y, - const vfloat16 min_z, - const vfloat16 max_x, - const vfloat16 max_y, - const vfloat16 max_z) - { - const vfloat16 r_min_x = prefix_min(min_x); - const vfloat16 r_min_y = prefix_min(min_y); - const vfloat16 r_min_z = prefix_min(min_z); - const vfloat16 r_max_x = prefix_max(max_x); - const vfloat16 r_max_y = prefix_max(max_y); - const vfloat16 r_max_z = prefix_max(max_z); - const vfloat16 dx = r_max_x - r_min_x; - const vfloat16 dy = r_max_y - r_min_y; - const vfloat16 dz = r_max_z - r_min_z; - const vfloat16 area_lr = madd(dx,dy,madd(dx,dz,dy*dz)); - return area_lr; - } - - - /*! bins an array of primitives */ - __forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<16>& mapping) - { - if (unlikely(N == 0)) return; - - const vfloat16 init_min(pos_inf); - const vfloat16 init_max(neg_inf); - - vfloat16 min_x0,min_x1,min_x2; - vfloat16 min_y0,min_y1,min_y2; - vfloat16 min_z0,min_z1,min_z2; - vfloat16 max_x0,max_x1,max_x2; - vfloat16 max_y0,max_y1,max_y2; - vfloat16 max_z0,max_z1,max_z2; - vuint16 count0,count1,count2; - - min_x0 = init_min; - min_x1 = init_min; - min_x2 = init_min; - min_y0 = init_min; - min_y1 = init_min; - min_y2 = init_min; - min_z0 = init_min; - min_z1 = init_min; - min_z2 = init_min; - - max_x0 = init_max; - max_x1 = init_max; - max_x2 = init_max; - max_y0 = init_max; - max_y1 = init_max; - max_y2 = init_max; - max_z0 = init_max; - max_z1 = init_max; - max_z2 = init_max; - - count0 = zero; - count1 = zero; - count2 = zero; - - const vint16 step16(step); - size_t i; - for (i=0; i<N-1; i+=2) - { - /*! map even and odd primitive to bin */ - const BBox3fa primA = prims[i+0].bounds(); - const vfloat16 centerA = vfloat16((vfloat4)primA.lower) + vfloat16((vfloat4)primA.upper); - const vint16 binA = mapping.bin16(centerA); - - const BBox3fa primB = prims[i+1].bounds(); - const vfloat16 centerB = vfloat16((vfloat4)primB.lower) + vfloat16((vfloat4)primB.upper); - const vint16 binB = mapping.bin16(centerB); - - /* A */ - { - const vfloat16 b_min_x = prims[i+0].lower.x; - const vfloat16 b_min_y = prims[i+0].lower.y; - const vfloat16 b_min_z = prims[i+0].lower.z; - const vfloat16 b_max_x = prims[i+0].upper.x; - const vfloat16 b_max_y = prims[i+0].upper.y; - const vfloat16 b_max_z = prims[i+0].upper.z; - - const vint16 bin0 = shuffle<0>(binA); - const vint16 bin1 = shuffle<1>(binA); - const vint16 bin2 = shuffle<2>(binA); - - const vbool16 m_update_x = step16 == bin0; - const vbool16 m_update_y = step16 == bin1; - const vbool16 m_update_z = step16 == bin2; - - assert(popcnt((size_t)m_update_x) == 1); - assert(popcnt((size_t)m_update_y) == 1); - assert(popcnt((size_t)m_update_z) == 1); - - min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x); - min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y); - min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z); - // ------------------------------------------------------------------------ - max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x); - max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y); - max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z); - // ------------------------------------------------------------------------ - min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x); - min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y); - min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z); - // ------------------------------------------------------------------------ - max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x); - max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y); - max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z); - // ------------------------------------------------------------------------ - min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x); - min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y); - min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z); - // ------------------------------------------------------------------------ - max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x); - max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y); - max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z); - // ------------------------------------------------------------------------ - count0 = mask_add(m_update_x,count0,count0,vuint16(1)); - count1 = mask_add(m_update_y,count1,count1,vuint16(1)); - count2 = mask_add(m_update_z,count2,count2,vuint16(1)); - } - - - /* B */ - { - const vfloat16 b_min_x = prims[i+1].lower.x; - const vfloat16 b_min_y = prims[i+1].lower.y; - const vfloat16 b_min_z = prims[i+1].lower.z; - const vfloat16 b_max_x = prims[i+1].upper.x; - const vfloat16 b_max_y = prims[i+1].upper.y; - const vfloat16 b_max_z = prims[i+1].upper.z; - - const vint16 bin0 = shuffle<0>(binB); - const vint16 bin1 = shuffle<1>(binB); - const vint16 bin2 = shuffle<2>(binB); - - const vbool16 m_update_x = step16 == bin0; - const vbool16 m_update_y = step16 == bin1; - const vbool16 m_update_z = step16 == bin2; - - assert(popcnt((size_t)m_update_x) == 1); - assert(popcnt((size_t)m_update_y) == 1); - assert(popcnt((size_t)m_update_z) == 1); - - min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x); - min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y); - min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z); - // ------------------------------------------------------------------------ - max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x); - max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y); - max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z); - // ------------------------------------------------------------------------ - min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x); - min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y); - min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z); - // ------------------------------------------------------------------------ - max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x); - max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y); - max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z); - // ------------------------------------------------------------------------ - min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x); - min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y); - min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z); - // ------------------------------------------------------------------------ - max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x); - max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y); - max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z); - // ------------------------------------------------------------------------ - count0 = mask_add(m_update_x,count0,count0,vuint16(1)); - count1 = mask_add(m_update_y,count1,count1,vuint16(1)); - count2 = mask_add(m_update_z,count2,count2,vuint16(1)); - } - - } - - if (i < N) - { - const BBox3fa prim0 = prims[i].bounds(); - const vfloat16 center0 = vfloat16((vfloat4)prim0.lower) + vfloat16((vfloat4)prim0.upper); - const vint16 bin = mapping.bin16(center0); - - const vfloat16 b_min_x = prims[i].lower.x; - const vfloat16 b_min_y = prims[i].lower.y; - const vfloat16 b_min_z = prims[i].lower.z; - const vfloat16 b_max_x = prims[i].upper.x; - const vfloat16 b_max_y = prims[i].upper.y; - const vfloat16 b_max_z = prims[i].upper.z; - - const vint16 bin0 = shuffle<0>(bin); - const vint16 bin1 = shuffle<1>(bin); - const vint16 bin2 = shuffle<2>(bin); - - const vbool16 m_update_x = step16 == bin0; - const vbool16 m_update_y = step16 == bin1; - const vbool16 m_update_z = step16 == bin2; - - assert(popcnt((size_t)m_update_x) == 1); - assert(popcnt((size_t)m_update_y) == 1); - assert(popcnt((size_t)m_update_z) == 1); - - min_x0 = mask_min(m_update_x,min_x0,min_x0,b_min_x); - min_y0 = mask_min(m_update_x,min_y0,min_y0,b_min_y); - min_z0 = mask_min(m_update_x,min_z0,min_z0,b_min_z); - // ------------------------------------------------------------------------ - max_x0 = mask_max(m_update_x,max_x0,max_x0,b_max_x); - max_y0 = mask_max(m_update_x,max_y0,max_y0,b_max_y); - max_z0 = mask_max(m_update_x,max_z0,max_z0,b_max_z); - // ------------------------------------------------------------------------ - min_x1 = mask_min(m_update_y,min_x1,min_x1,b_min_x); - min_y1 = mask_min(m_update_y,min_y1,min_y1,b_min_y); - min_z1 = mask_min(m_update_y,min_z1,min_z1,b_min_z); - // ------------------------------------------------------------------------ - max_x1 = mask_max(m_update_y,max_x1,max_x1,b_max_x); - max_y1 = mask_max(m_update_y,max_y1,max_y1,b_max_y); - max_z1 = mask_max(m_update_y,max_z1,max_z1,b_max_z); - // ------------------------------------------------------------------------ - min_x2 = mask_min(m_update_z,min_x2,min_x2,b_min_x); - min_y2 = mask_min(m_update_z,min_y2,min_y2,b_min_y); - min_z2 = mask_min(m_update_z,min_z2,min_z2,b_min_z); - // ------------------------------------------------------------------------ - max_x2 = mask_max(m_update_z,max_x2,max_x2,b_max_x); - max_y2 = mask_max(m_update_z,max_y2,max_y2,b_max_y); - max_z2 = mask_max(m_update_z,max_z2,max_z2,b_max_z); - // ------------------------------------------------------------------------ - count0 = mask_add(m_update_x,count0,count0,vuint16(1)); - count1 = mask_add(m_update_y,count1,count1,vuint16(1)); - count2 = mask_add(m_update_z,count2,count2,vuint16(1)); - } - - lower[0] = Vec3vf16( min_x0, min_y0, min_z0 ); - lower[1] = Vec3vf16( min_x1, min_y1, min_z1 ); - lower[2] = Vec3vf16( min_x2, min_y2, min_z2 ); - - upper[0] = Vec3vf16( max_x0, max_y0, max_z0 ); - upper[1] = Vec3vf16( max_x1, max_y1, max_z1 ); - upper[2] = Vec3vf16( max_x2, max_y2, max_z2 ); - - count[0] = count0; - count[1] = count1; - count[2] = count2; - } - - __forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<16>& mapping) { - bin(prims+begin,end-begin,mapping); - } - - /*! merges in other binning information */ - __forceinline void merge (const BinInfoT& other, size_t numBins) - { - for (size_t i=0; i<3; i++) - { - lower[i] = min(lower[i],other.lower[i]); - upper[i] = max(upper[i],other.upper[i]); - count[i] += other.count[i]; - } - } - - /*! reducesr binning information */ - static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b) - { - BinInfoT c; - for (size_t i=0; i<3; i++) - { - c.counts[i] = a.counts[i] + b.counts[i]; - c.lower[i] = min(a.lower[i],b.lower[i]); - c.upper[i] = max(a.upper[i],b.upper[i]); - } - return c; - } - - /*! finds the best split by scanning binning information */ - __forceinline Split best(const BinMapping<16>& mapping, const size_t blocks_shift) const - { - /* find best dimension */ - float bestSAH = inf; - int bestDim = -1; - int bestPos = 0; - const vuint16 blocks_add = (1 << blocks_shift)-1; - const vfloat16 inf(pos_inf); - for (size_t dim=0; dim<3; dim++) - { - /* ignore zero sized dimensions */ - if (unlikely(mapping.invalid(dim))) - continue; - - const vfloat16 rArea16 = prefix_area_rl(lower[dim].x,lower[dim].y,lower[dim].z, upper[dim].x,upper[dim].y,upper[dim].z); - const vfloat16 lArea16 = prefix_area_lr(lower[dim].x,lower[dim].y,lower[dim].z, upper[dim].x,upper[dim].y,upper[dim].z); - const vuint16 lCount16 = prefix_sum(count[dim]); - const vuint16 rCount16 = reverse_prefix_sum(count[dim]); - - /* compute best split in this dimension */ - const vfloat16 leftArea = lArea16; - const vfloat16 rightArea = align_shift_right<1>(zero,rArea16); - const vuint16 lC = lCount16; - const vuint16 rC = align_shift_right<1>(zero,rCount16); - const vuint16 leftCount = ( lC + blocks_add) >> blocks_shift; - const vuint16 rightCount = ( rC + blocks_add) >> blocks_shift; - const vbool16 valid = (leftArea < inf) & (rightArea < inf) & vbool16(0x7fff); // handles inf entries - const vfloat16 sah = select(valid,madd(leftArea,vfloat16(leftCount),rightArea*vfloat16(rightCount)),vfloat16(pos_inf)); - /* test if this is a better dimension */ - if (any(sah < vfloat16(bestSAH))) - { - const size_t index = select_min(sah); - assert(index < 15); - assert(sah[index] < bestSAH); - bestDim = dim; - bestPos = index+1; - bestSAH = sah[index]; - } - } - - return Split(bestSAH,bestDim,bestPos,mapping); - - } - - /*! calculates extended split information */ - __forceinline void getSplitInfo(const BinMapping<16>& mapping, const Split& split, SplitInfo& info) const - { - if (split.dim == -1) { - new (&info) SplitInfo(0,empty,0,empty); - return; - } - // FIXME: horizontal reduction! - - size_t leftCount = 0; - BBox3fa leftBounds = empty; - for (size_t i=0; i<(size_t)split.pos; i++) { - leftCount += count[split.dim][i]; - Vec3fa bounds_lower(lower[split.dim].x[i],lower[split.dim].y[i],lower[split.dim].z[i]); - Vec3fa bounds_upper(upper[split.dim].x[i],upper[split.dim].y[i],upper[split.dim].z[i]); - leftBounds.extend(BBox3fa(bounds_lower,bounds_upper)); - } - size_t rightCount = 0; - BBox3fa rightBounds = empty; - for (size_t i=split.pos; i<mapping.size(); i++) { - rightCount += count[split.dim][i]; - Vec3fa bounds_lower(lower[split.dim].x[i],lower[split.dim].y[i],lower[split.dim].z[i]); - Vec3fa bounds_upper(upper[split.dim].x[i],upper[split.dim].y[i],upper[split.dim].z[i]); - rightBounds.extend(BBox3fa(bounds_lower,bounds_upper)); - } - new (&info) SplitInfo(leftCount,leftBounds,rightCount,rightBounds); - } - - /*! gets the number of primitives left of the split */ - __forceinline size_t getLeftCount(const BinMapping<16>& mapping, const Split& split) const - { - if (unlikely(split.dim == -1)) return -1; - - size_t leftCount = 0; - for (size_t i = 0; i < (size_t)split.pos; i++) { - leftCount += count[split.dim][i]; - } - return leftCount; - } - - /*! gets the number of primitives right of the split */ - __forceinline size_t getRightCount(const BinMapping<16>& mapping, const Split& split) const - { - if (unlikely(split.dim == -1)) return -1; - - size_t rightCount = 0; - for (size_t i = (size_t)split.pos; i<mapping.size(); i++) { - rightCount += count[split.dim][i]; - } - return rightCount; - } - - private: - Vec3vf16 lower[3]; - Vec3vf16 upper[3]; - vuint16 count[3]; - }; -#endif } template<typename BinInfoT, typename BinMapping, typename PrimRef> diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h b/thirdparty/embree/kernels/builders/heuristic_binning_array_aligned.h index a4c272f015..ab3b97efb9 100644 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_aligned.h +++ b/thirdparty/embree/kernels/builders/heuristic_binning_array_aligned.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -40,15 +40,10 @@ namespace embree typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner; typedef range<size_t> Set; -#if defined(__AVX512ER__) // KNL - static const size_t PARALLEL_THRESHOLD = 4*768; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 768; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 768; -#else static const size_t PARALLEL_THRESHOLD = 3 * 1024; static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; -#endif + __forceinline HeuristicArrayBinningSAH () : prims(nullptr) {} diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h b/thirdparty/embree/kernels/builders/heuristic_binning_array_unaligned.h index 1370244586..34a7f121bb 100644 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_binning_array_unaligned.h +++ b/thirdparty/embree/kernels/builders/heuristic_binning_array_unaligned.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h b/thirdparty/embree/kernels/builders/heuristic_openmerge_array.h index 21f18c0208..4249d16ea1 100644 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_openmerge_array.h +++ b/thirdparty/embree/kernels/builders/heuristic_openmerge_array.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // TODO: diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h b/thirdparty/embree/kernels/builders/heuristic_spatial.h index d8ca6cb92c..a6939ba258 100644 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial.h +++ b/thirdparty/embree/kernels/builders/heuristic_spatial.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h b/thirdparty/embree/kernels/builders/heuristic_spatial_array.h index 911dcf950c..60d235f48d 100644 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_spatial_array.h +++ b/thirdparty/embree/kernels/builders/heuristic_spatial_array.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -107,15 +107,9 @@ namespace embree //typedef extended_range<size_t> Set; typedef Split2<ObjectSplit,SpatialSplit> Split; -#if defined(__AVX512ER__) // KNL - static const size_t PARALLEL_THRESHOLD = 3*1024; - static const size_t PARALLEL_FIND_BLOCK_SIZE = 768; - static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; -#else static const size_t PARALLEL_THRESHOLD = 3*1024; static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024; static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128; -#endif static const size_t MOVE_STEP_SIZE = 64; static const size_t CREATE_SPLITS_STEP_SIZE = 64; diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h b/thirdparty/embree/kernels/builders/heuristic_strand_array.h index ede0d04c78..19c7fcdaa8 100644 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_strand_array.h +++ b/thirdparty/embree/kernels/builders/heuristic_strand_array.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h b/thirdparty/embree/kernels/builders/heuristic_timesplit_array.h index c999941a11..b968e01c90 100644 --- a/thirdparty/embree-aarch64/kernels/builders/heuristic_timesplit_array.h +++ b/thirdparty/embree/kernels/builders/heuristic_timesplit_array.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/builders/priminfo.h b/thirdparty/embree/kernels/builders/priminfo.h index 06c1388742..fee515247a 100644 --- a/thirdparty/embree-aarch64/kernels/builders/priminfo.h +++ b/thirdparty/embree/kernels/builders/priminfo.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp b/thirdparty/embree/kernels/builders/primrefgen.cpp index e23de3df28..d279dc4993 100644 --- a/thirdparty/embree-aarch64/kernels/builders/primrefgen.cpp +++ b/thirdparty/embree/kernels/builders/primrefgen.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "primrefgen.h" @@ -11,7 +11,7 @@ namespace embree { namespace isa { - PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) + PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) { ParallelPrefixSumState<PrimInfo> pstate; @@ -22,7 +22,7 @@ namespace embree }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); /* if we need to filter out geometry, run again */ - if (pinfo.size() != prims.size()) + if (pinfo.size() != numPrimRefs) { progressMonitor(0); pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo { @@ -32,7 +32,7 @@ namespace embree return pinfo; } - PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) + PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) { ParallelForForPrefixSumState<PrimInfo> pstate; Scene::Iterator2 iter(scene,types,mblur); @@ -45,7 +45,7 @@ namespace embree }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); /* if we need to filter out geometry, run again */ - if (pinfo.size() != prims.size()) + if (pinfo.size() != numPrimRefs) { progressMonitor(0); pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { @@ -55,7 +55,7 @@ namespace embree return pinfo; } - PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime) + PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime) { ParallelForForPrefixSumState<PrimInfo> pstate; Scene::Iterator2 iter(scene,types,true); @@ -68,7 +68,7 @@ namespace embree }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); /* if we need to filter out geometry, run again */ - if (pinfo.size() != prims.size()) + if (pinfo.size() != numPrimRefs) { progressMonitor(0); pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { @@ -78,7 +78,7 @@ namespace embree return pinfo; } - PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1) + PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1) { ParallelForForPrefixSumState<PrimInfoMB> pstate; Scene::Iterator2 iter(scene,types,true); @@ -91,7 +91,7 @@ namespace embree }, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); }); /* if we need to filter out geometry, run again */ - if (pinfo.size() != prims.size()) + if (pinfo.size() != numPrimRefs) { progressMonitor(0); pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB { @@ -182,56 +182,124 @@ namespace embree // ==================================================================================================== // ==================================================================================================== - // template for grid meshes + // special variants for grid meshes -#if 0 - template<> - PrimInfo createPrimRefArray<GridMesh,false>(Scene* scene, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor) +// -- GODOT start -- +#if defined(EMBREE_GEOMETRY_GRID) +// -- GODOT end -- + PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids) { - PING; + PrimInfo pinfo(empty); + size_t numPrimitives = 0; + + /* first run to get #primitives */ + ParallelForForPrefixSumState<PrimInfo> pstate; Scene::Iterator<GridMesh,false> iter(scene); - - /* first try */ - progressMonitor(0); + pstate.init(iter,size_t(1024)); - PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k) -> PrimInfo - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - BBox3fa bounds = empty; - if (!mesh->buildBounds(j,&bounds)) continue; - const PrimRef prim(bounds,mesh->geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - - /* if we need to filter out geometry, run again */ - if (pinfo.size() != prims.size()) - { - progressMonitor(0); - pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, const PrimInfo& base) -> PrimInfo - { - k = base.size(); + + /* iterate over all meshes in the scene */ + pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo { PrimInfo pinfo(empty); for (size_t j=r.begin(); j<r.end(); j++) { + if (!mesh->valid(j)) continue; BBox3fa bounds = empty; - if (!mesh->buildBounds(j,&bounds)) continue; - const PrimRef prim(bounds,mesh->geomID,unsigned(j)); - pinfo.add_center2(prim); - prims[k++] = prim; + const PrimRef prim(bounds,(unsigned)geomID,(unsigned)j); + if (!mesh->valid(j)) continue; + pinfo.add_center2(prim,mesh->getNumSubGrids(j)); } return pinfo; }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - } + numPrimitives = pinfo.size(); + + /* resize arrays */ + sgrids.resize(numPrimitives); + prims.resize(numPrimitives); + + /* second run to fill primrefs and SubGridBuildData arrays */ + pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { + k = base.size(); + size_t p_index = k; + PrimInfo pinfo(empty); + for (size_t j=r.begin(); j<r.end(); j++) + { + if (!mesh->valid(j)) continue; + const GridMesh::Grid &g = mesh->grid(j); + for (unsigned int y=0; y<g.resY-1u; y+=2) + for (unsigned int x=0; x<g.resX-1u; x+=2) + { + BBox3fa bounds = empty; + if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid + const PrimRef prim(bounds,(unsigned)geomID,(unsigned)p_index); + pinfo.add_center2(prim); + sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j)); + prims[p_index++] = prim; + } + } + return pinfo; + }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); + assert(pinfo.size() == numPrimitives); return pinfo; } -#endif + PrimInfo createPrimRefArrayGrids(GridMesh* mesh, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids) + { + unsigned int geomID_ = std::numeric_limits<unsigned int>::max (); + + PrimInfo pinfo(empty); + size_t numPrimitives = 0; + + ParallelPrefixSumState<PrimInfo> pstate; + /* iterate over all grids in a single mesh */ + pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo + { + PrimInfo pinfo(empty); + for (size_t j=r.begin(); j<r.end(); j++) + { + if (!mesh->valid(j)) continue; + BBox3fa bounds = empty; + const PrimRef prim(bounds,geomID_,unsigned(j)); + pinfo.add_center2(prim,mesh->getNumSubGrids(j)); + } + return pinfo; + }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); + numPrimitives = pinfo.size(); + /* resize arrays */ + sgrids.resize(numPrimitives); + prims.resize(numPrimitives); + + /* second run to fill primrefs and SubGridBuildData arrays */ + pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo + { + + size_t p_index = base.size(); + PrimInfo pinfo(empty); + for (size_t j=r.begin(); j<r.end(); j++) + { + if (!mesh->valid(j)) continue; + const GridMesh::Grid &g = mesh->grid(j); + for (unsigned int y=0; y<g.resY-1u; y+=2) + for (unsigned int x=0; x<g.resX-1u; x+=2) + { + BBox3fa bounds = empty; + if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid + const PrimRef prim(bounds,geomID_,unsigned(p_index)); + pinfo.add_center2(prim); + sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j)); + prims[p_index++] = prim; + } + } + return pinfo; + }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); + + return pinfo; + } +// -- GODOT start -- +#endif +// -- GODOT end -- + // ==================================================================================================== // ==================================================================================================== // ==================================================================================================== diff --git a/thirdparty/embree/kernels/builders/primrefgen.h b/thirdparty/embree/kernels/builders/primrefgen.h new file mode 100644 index 0000000000..c09a848ba3 --- /dev/null +++ b/thirdparty/embree/kernels/builders/primrefgen.h @@ -0,0 +1,34 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "../common/scene.h" +#include "../common/primref.h" +#include "../common/primref_mb.h" +#include "priminfo.h" +#include "bvh_builder_morton.h" + +namespace embree +{ + namespace isa + { + PrimInfo createPrimRefArray(Geometry* geometry, unsigned int geomID, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor); + + PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor); + + PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime = 0); + + PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f)); + + template<typename Mesh> + size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor); + + /* special variants for grids */ + PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids); + + PrimInfo createPrimRefArrayGrids(GridMesh* mesh, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids); + + } +} + diff --git a/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h b/thirdparty/embree/kernels/builders/primrefgen_presplit.h index 8bdb38b955..8cd251ddd2 100644 --- a/thirdparty/embree-aarch64/kernels/builders/primrefgen_presplit.h +++ b/thirdparty/embree/kernels/builders/primrefgen_presplit.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/builders/splitter.h b/thirdparty/embree/kernels/builders/splitter.h index dbd6cf07c7..f7720bd284 100644 --- a/thirdparty/embree-aarch64/kernels/builders/splitter.h +++ b/thirdparty/embree/kernels/builders/splitter.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -164,6 +164,28 @@ namespace embree private: const Scene* scene; }; + + + struct DummySplitter + { + __forceinline DummySplitter(const Scene* scene, const PrimRef& prim) + { + } + }; + + struct DummySplitterFactory + { + __forceinline DummySplitterFactory(const Scene* scene) + : scene(scene) {} + + __forceinline DummySplitter operator() (const PrimRef& prim) const { + return DummySplitter(scene,prim); + } + + private: + const Scene* scene; + }; + } } diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp b/thirdparty/embree/kernels/bvh/bvh.cpp index bd102bd6ef..a84295f0da 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh.cpp +++ b/thirdparty/embree/kernels/bvh/bvh.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh.h" @@ -51,7 +51,7 @@ namespace embree template<int N> void BVHN<N>::layoutLargeNodes(size_t num) { -#if defined(__X86_64__) || defined(__aarch64__) // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues +#if defined(__64BIT__) // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues struct NodeArea { __forceinline NodeArea() {} @@ -183,7 +183,7 @@ namespace embree template class BVHN<8>; #endif -#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) || defined(__aarch64__) +#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) template class BVHN<4>; #endif } diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh.h b/thirdparty/embree/kernels/bvh/bvh.h index 8fdf912e52..565eec5a58 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh.h +++ b/thirdparty/embree/kernels/bvh/bvh.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -81,7 +81,7 @@ namespace embree struct CreateAlloc : public FastAllocator::Create { __forceinline CreateAlloc (BVHN* bvh) : FastAllocator::Create(&bvh->alloc) {} }; - + typedef BVHNodeRecord<NodeRef> NodeRecord; typedef BVHNodeRecordMB<NodeRef> NodeRecordMB; typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D; diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp b/thirdparty/embree/kernels/bvh/bvh4_factory.cpp index 23f4f63d45..890d5e7b7c 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.cpp +++ b/thirdparty/embree/kernels/bvh/bvh4_factory.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh4_factory.h" @@ -260,12 +260,12 @@ namespace embree void BVH4Factory::selectBuilders(int features) { - IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4MeshSAH)); - IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4iMeshSAH)); - IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelTriangle4vMeshSAH)); - IF_ENABLED_QUADS (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelQuadMeshSAH)); - IF_ENABLED_USER (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelVirtualSAH)); - IF_ENABLED_INSTANCE (SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4BuilderTwoLevelInstanceSAH)); + IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelTriangle4MeshSAH)); + IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelTriangle4iMeshSAH)); + IF_ENABLED_TRIS (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelTriangle4vMeshSAH)); + IF_ENABLED_QUADS (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelQuadMeshSAH)); + IF_ENABLED_USER (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelVirtualSAH)); + IF_ENABLED_INSTANCE (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelInstanceSAH)); IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4vBuilder_OBB_New)); IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4iBuilder_OBB_New)); @@ -273,15 +273,15 @@ namespace embree IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH4Curve8iBuilder_OBB_New)); IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH4OBBCurve8iMBBuilder_OBB)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4SceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4vSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Triangle4iSceneBuilderSAH)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4SceneBuilderSAH)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4vSceneBuilderSAH)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4iSceneBuilderSAH)); IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4iMBSceneBuilderSAH)); IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Triangle4vMBSceneBuilderSAH)); IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4QuantizedTriangle4iSceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Quad4vSceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4Quad4iSceneBuilderSAH)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Quad4vSceneBuilderSAH)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Quad4iSceneBuilderSAH)); IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Quad4iMBSceneBuilderSAH)); IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4QuantizedQuad4iSceneBuilderSAH)); @@ -291,207 +291,207 @@ namespace embree IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Quad4vSceneBuilderFastSpatialSAH)); - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4VirtualSceneBuilderSAH)); + IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4VirtualSceneBuilderSAH)); IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4VirtualMBSceneBuilderSAH)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4InstanceSceneBuilderSAH)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceSceneBuilderSAH)); IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceMBSceneBuilderSAH)); IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridSceneBuilderSAH)); IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridMBSceneBuilderSAH)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4SubdivPatch1BuilderSAH)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,BVH4SubdivPatch1MBBuilderSAH)); + IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4SubdivPatch1BuilderSAH)); + IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4SubdivPatch1MBBuilderSAH)); } void BVH4Factory::selectIntersectors(int features) { - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4i)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8i)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4v)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8v)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector4iMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8iMB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,VirtualCurveIntersector4i)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,VirtualCurveIntersector8i)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,VirtualCurveIntersector4v)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,VirtualCurveIntersector8v)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,VirtualCurveIntersector4iMB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,VirtualCurveIntersector8iMB)); /* select intersectors1 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector1)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector1MB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust1)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust1MB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersector1)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersector1MB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersectorRobust1)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersectorRobust1MB)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4iIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4vIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,BVH4Triangle4iIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,BVH4Triangle4Intersector1Moeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512(features,BVH4Triangle4iIntersector1Moeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512(features,BVH4Triangle4vIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512(features,BVH4Triangle4iIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4vMBIntersector1Moeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iMBIntersector1Moeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4vMBIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iMBIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector1Pluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector1Moeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iIntersector1Moeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector1Pluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector1Moeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iMBIntersector1Pluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iMBIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,QBVH4Triangle4iIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,QBVH4Quad4iIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512(features,QBVH4Triangle4iIntersector1Pluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512(features,QBVH4Quad4iIntersector1Pluecker)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector1)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector1)); + IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4SubdivPatch1Intersector1)); + IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4SubdivPatch1MBIntersector1)); - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector1)); - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector1)); + IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4VirtualIntersector1)); + IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4VirtualMBIntersector1)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector1)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector1)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceIntersector1)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceMBIntersector1)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector1Moeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector1Moeller)) - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector1Pluecker)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridIntersector1Moeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridMBIntersector1Moeller)) + IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridIntersector1Pluecker)); #if defined (EMBREE_RAY_PACKETS) /* select intersectors4 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector4Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector4HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust4Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust4HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector4HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vIntersector4HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector4HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector4HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector4HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector4HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector4HybridPluecker)); - - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector4)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector4)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersector4Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersector4HybridMB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersectorRobust4Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersectorRobust4HybridMB)); + + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4Intersector4HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4Intersector4HybridMoellerNoFilter)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iIntersector4HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4vIntersector4HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iIntersector4HybridPluecker)); + + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4vMBIntersector4HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iMBIntersector4HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4vMBIntersector4HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iMBIntersector4HybridPluecker)); + + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector4HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector4HybridMoellerNoFilter)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iIntersector4HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector4HybridPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iIntersector4HybridPluecker)); + + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iMBIntersector4HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iMBIntersector4HybridPluecker)); + + IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4SubdivPatch1Intersector4)); + IF_ENABLED_SUBDIV(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4SubdivPatch1MBIntersector4)); - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector4Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector4Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4VirtualIntersector4Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4VirtualMBIntersector4Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector4Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector4Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceIntersector4Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceMBIntersector4Chunk)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector4HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector4HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector4HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector4HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector4HybridPluecker)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridIntersector4HybridMoeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridMBIntersector4HybridMoeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridIntersector4HybridPluecker)); /* select intersectors8 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector8Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersector8HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust8Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust8HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4Intersector8HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vIntersector8HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iIntersector8HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4vMBIntersector8HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Triangle4iMBIntersector8HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4vIntersector8HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iIntersector8HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4Quad4iMBIntersector8HybridPluecker)); - - IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1Intersector8)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4SubdivPatch1MBIntersector8)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersector8Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersector8HybridMB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersectorRobust8Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4OBBVirtualCurveIntersectorRobust8HybridMB)); + + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Triangle4Intersector8HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Triangle4Intersector8HybridMoellerNoFilter)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Triangle4iIntersector8HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Triangle4vIntersector8HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Triangle4iIntersector8HybridPluecker)); + + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Triangle4vMBIntersector8HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Triangle4iMBIntersector8HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Triangle4vMBIntersector8HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Triangle4iMBIntersector8HybridPluecker)); + + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector8HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector8HybridMoellerNoFilter)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Quad4iIntersector8HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector8HybridPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Quad4iIntersector8HybridPluecker)); + + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Quad4iMBIntersector8HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4Quad4iMBIntersector8HybridPluecker)); + + IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4SubdivPatch1Intersector8)); + IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4SubdivPatch1MBIntersector8)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4VirtualIntersector8Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4VirtualMBIntersector8Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4VirtualIntersector8Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4VirtualMBIntersector8Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4InstanceIntersector8Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4InstanceMBIntersector8Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4InstanceIntersector8Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4InstanceMBIntersector8Chunk)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector8HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridMBIntersector8HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH4GridIntersector8HybridPluecker)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4GridIntersector8HybridMoeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4GridMBIntersector8HybridMoeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4GridIntersector8HybridPluecker)); /* select intersectors16 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersector16Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersector16HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust16Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4OBBVirtualCurveIntersectorRobust16HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4Intersector16HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vIntersector16HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersector16HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vMBIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iMBIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4vMBIntersector16HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Triangle4iMBIntersector16HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersector16HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersector16HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iMBIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4Quad4iMBIntersector16HybridPluecker)); - - IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4SubdivPatch1Intersector16)); - IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4SubdivPatch1MBIntersector16)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512(features,BVH4OBBVirtualCurveIntersector16Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512(features,BVH4OBBVirtualCurveIntersector16HybridMB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512(features,BVH4OBBVirtualCurveIntersectorRobust16Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512(features,BVH4OBBVirtualCurveIntersectorRobust16HybridMB)); + + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Triangle4Intersector16HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Triangle4Intersector16HybridMoellerNoFilter)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Triangle4iIntersector16HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Triangle4vIntersector16HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Triangle4iIntersector16HybridPluecker)); + + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Triangle4vMBIntersector16HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Triangle4iMBIntersector16HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Triangle4vMBIntersector16HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Triangle4iMBIntersector16HybridPluecker)); + + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Quad4vIntersector16HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Quad4vIntersector16HybridMoellerNoFilter)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Quad4iIntersector16HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Quad4vIntersector16HybridPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Quad4iIntersector16HybridPluecker)); + + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Quad4iMBIntersector16HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH4Quad4iMBIntersector16HybridPluecker)); + + IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX512(features,BVH4SubdivPatch1Intersector16)); + IF_ENABLED_SUBDIV(SELECT_SYMBOL_INIT_AVX512(features,BVH4SubdivPatch1MBIntersector16)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4VirtualIntersector16Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4VirtualMBIntersector16Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512(features,BVH4VirtualIntersector16Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512(features,BVH4VirtualMBIntersector16Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4InstanceIntersector16Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4InstanceMBIntersector16Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH4InstanceIntersector16Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH4InstanceMBIntersector16Chunk)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridIntersector16HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridMBIntersector16HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH4GridIntersector16HybridPluecker)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH4GridIntersector16HybridMoeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH4GridMBIntersector16HybridMoeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH4GridIntersector16HybridPluecker)); /* select stream intersectors */ - SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4IntersectorStreamPacketFallback); + SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4IntersectorStreamPacketFallback); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4IntersectorStreamMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4IntersectorStreamMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersectorStreamMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4vIntersectorStreamPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Triangle4iIntersectorStreamPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4IntersectorStreamMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4IntersectorStreamMoellerNoFilter)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iIntersectorStreamMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4vIntersectorStreamPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iIntersectorStreamPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersectorStreamMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4vIntersectorStreamPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4Quad4iIntersectorStreamPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersectorStreamMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersectorStreamMoellerNoFilter)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iIntersectorStreamMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersectorStreamPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iIntersectorStreamPluecker)); - IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4VirtualIntersectorStream)); + IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4VirtualIntersectorStream)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH4InstanceIntersectorStream)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceIntersectorStream)); #endif } diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h b/thirdparty/embree/kernels/bvh/bvh4_factory.h index a68227b41f..30973971a4 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh4_factory.h +++ b/thirdparty/embree/kernels/bvh/bvh4_factory.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp b/thirdparty/embree/kernels/bvh/bvh8_factory.cpp index 9fe057c392..d4521af241 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.cpp +++ b/thirdparty/embree/kernels/bvh/bvh8_factory.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "../common/isa.h" // to define EMBREE_TARGET_SIMD8 @@ -238,17 +238,17 @@ namespace embree IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH8Curve8vBuilder_OBB_New)); IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX(features,BVH8OBBCurve8iMBBuilder_OBB)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4SceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4iSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4iMBSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vMBSceneBuilderSAH)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8Triangle4SceneBuilderSAH)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8Triangle4vSceneBuilderSAH)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8Triangle4iSceneBuilderSAH)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8Triangle4iMBSceneBuilderSAH)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8Triangle4vMBSceneBuilderSAH)); IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedTriangle4iSceneBuilderSAH)); IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedTriangle4SceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4vSceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4iSceneBuilderSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4iMBSceneBuilderSAH)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX(features,BVH8Quad4vSceneBuilderSAH)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX(features,BVH8Quad4iSceneBuilderSAH)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX(features,BVH8Quad4iMBSceneBuilderSAH)); IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX(features,BVH8QuantizedQuad4iSceneBuilderSAH)); IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX(features,BVH8VirtualSceneBuilderSAH)); @@ -260,189 +260,189 @@ namespace embree IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridSceneBuilderSAH)); IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridMBSceneBuilderSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4SceneBuilderFastSpatialSAH)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Triangle4vSceneBuilderFastSpatialSAH)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8Quad4vSceneBuilderFastSpatialSAH)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8Triangle4SceneBuilderFastSpatialSAH)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX(features,BVH8Triangle4vSceneBuilderFastSpatialSAH)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX(features,BVH8Quad4vSceneBuilderFastSpatialSAH)); - IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4MeshSAH)); - IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4vMeshSAH)); - IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelTriangle4iMeshSAH)); - IF_ENABLED_QUADS (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelQuadMeshSAH)); - IF_ENABLED_USER (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelVirtualSAH)); - IF_ENABLED_INSTANCE (SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,BVH8BuilderTwoLevelInstanceSAH)); + IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelTriangle4MeshSAH)); + IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelTriangle4vMeshSAH)); + IF_ENABLED_TRIS (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelTriangle4iMeshSAH)); + IF_ENABLED_QUADS (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelQuadMeshSAH)); + IF_ENABLED_USER (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelVirtualSAH)); + IF_ENABLED_INSTANCE (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelInstanceSAH)); } void BVH8Factory::selectIntersectors(int features) { - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8v)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,VirtualCurveIntersector8iMB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,VirtualCurveIntersector8v)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,VirtualCurveIntersector8iMB)); /* select intersectors1 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector1)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector1MB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust1)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust1MB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersector1)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersector1MB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersectorRobust1)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersectorRobust1MB)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4Intersector1Moeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersector1Moeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector1Woop)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vIntersector1Woop)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector1Moeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vMBIntersector1Moeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iMBIntersector1Moeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vMBIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iMBIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector1Pluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersector1Moeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersector1Moeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersector1Pluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersector1Pluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector1Pluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iMBIntersector1Moeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iMBIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Triangle4iIntersector1Pluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Triangle4Intersector1Moeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,QBVH8Quad4iIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,QBVH8Triangle4iIntersector1Pluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,QBVH8Triangle4Intersector1Moeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,QBVH8Quad4iIntersector1Pluecker)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersector1)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualMBIntersector1)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8VirtualIntersector1)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8VirtualMBIntersector1)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersector1)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceMBIntersector1)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersector1)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceMBIntersector1)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector1Moeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridMBIntersector1Moeller)) - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector1Pluecker)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector1Moeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridMBIntersector1Moeller)) + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector1Pluecker)); #if defined (EMBREE_RAY_PACKETS) /* select intersectors4 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector4Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector4HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust4Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust4HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector4HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vIntersector4HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector4HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector4HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector4HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector4HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector4HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector4HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector4HybridPluecker)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersector4Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersector4HybridMB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersectorRobust4Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersectorRobust4HybridMB)); + + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4Intersector4HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4Intersector4HybridMoellerNoFilter)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersector4HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vIntersector4HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersector4HybridPluecker)); + + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vMBIntersector4HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iMBIntersector4HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vMBIntersector4HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iMBIntersector4HybridPluecker)); + + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersector4HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersector4HybridMoellerNoFilter)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersector4HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersector4HybridPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersector4HybridPluecker)); IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector4HybridMoeller)); IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector4HybridPluecker)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualIntersector4Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualMBIntersector4Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8VirtualIntersector4Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8VirtualMBIntersector4Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceIntersector4Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceMBIntersector4Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersector4Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceMBIntersector4Chunk)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector4HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector4HybridPluecker)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector4HybridMoeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector4HybridPluecker)); /* select intersectors8 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector8Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersector8HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust8Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust8HybridMB)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4Intersector8HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vIntersector8HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iIntersector8HybridPluecker)); - - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector8HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4vMBIntersector8HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Triangle4iMBIntersector8HybridPluecker)); - - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector8HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4vIntersector8HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8Quad4iIntersector8HybridPluecker)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersector8Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersector8HybridMB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersectorRobust8Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8OBBVirtualCurveIntersectorRobust8HybridMB)); + + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4Intersector8HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4Intersector8HybridMoellerNoFilter)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersector8HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vIntersector8HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersector8HybridPluecker)); + + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vMBIntersector8HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iMBIntersector8HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vMBIntersector8HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iMBIntersector8HybridPluecker)); + + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersector8HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersector8HybridMoellerNoFilter)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersector8HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersector8HybridPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersector8HybridPluecker)); IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector8HybridMoeller)); IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2(features,BVH8Quad4iMBIntersector8HybridPluecker)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualIntersector8Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8VirtualMBIntersector8Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8VirtualIntersector8Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8VirtualMBIntersector8Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceIntersector8Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8InstanceMBIntersector8Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersector8Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceMBIntersector8Chunk)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector8HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,BVH8GridIntersector8HybridPluecker)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector8HybridMoeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector8HybridPluecker)); /* select intersectors16 */ - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector16Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersector16HybridMB)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust16Hybrid)); - IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8OBBVirtualCurveIntersectorRobust16HybridMB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512(features,BVH8OBBVirtualCurveIntersector16Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512(features,BVH8OBBVirtualCurveIntersector16HybridMB)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512(features,BVH8OBBVirtualCurveIntersectorRobust16Hybrid)); + IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_INIT_AVX512(features,BVH8OBBVirtualCurveIntersectorRobust16HybridMB)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4Intersector16HybridMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersector16HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersector16HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Triangle4Intersector16HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Triangle4Intersector16HybridMoellerNoFilter)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Triangle4iIntersector16HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Triangle4vIntersector16HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Triangle4iIntersector16HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector16HybridMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4vMBIntersector16HybridPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Triangle4iMBIntersector16HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Triangle4vMBIntersector16HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Triangle4iMBIntersector16HybridMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Triangle4vMBIntersector16HybridPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Triangle4iMBIntersector16HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersector16HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersector16HybridPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Quad4vIntersector16HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Quad4vIntersector16HybridMoellerNoFilter)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Quad4iIntersector16HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Quad4vIntersector16HybridPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Quad4iIntersector16HybridPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector16HybridMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8Quad4iMBIntersector16HybridPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Quad4iMBIntersector16HybridMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX512(features,BVH8Quad4iMBIntersector16HybridPluecker)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersector16Chunk)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8VirtualMBIntersector16Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512(features,BVH8VirtualIntersector16Chunk)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX512(features,BVH8VirtualMBIntersector16Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersector16Chunk)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8InstanceMBIntersector16Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH8InstanceIntersector16Chunk)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH8InstanceMBIntersector16Chunk)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8GridIntersector16HybridMoeller)); - IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,BVH8GridIntersector16HybridPluecker)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH8GridIntersector16HybridMoeller)); + IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH8GridIntersector16HybridPluecker)); /* select stream intersectors */ - SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8IntersectorStreamPacketFallback); + SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8IntersectorStreamPacketFallback); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4IntersectorStreamMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4IntersectorStreamMoellerNoFilter)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersectorStreamMoeller)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4vIntersectorStreamPluecker)); - IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Triangle4iIntersectorStreamPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4IntersectorStreamMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4IntersectorStreamMoellerNoFilter)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersectorStreamMoeller)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vIntersectorStreamPluecker)); + IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersectorStreamPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamMoellerNoFilter)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersectorStreamMoeller)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4vIntersectorStreamPluecker)); - IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8Quad4iIntersectorStreamPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersectorStreamMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersectorStreamMoellerNoFilter)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersectorStreamMoeller)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersectorStreamPluecker)); + IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersectorStreamPluecker)); - IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8VirtualIntersectorStream)); + IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8VirtualIntersectorStream)); - IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,BVH8InstanceIntersectorStream)); + IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersectorStream)); #endif } diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h b/thirdparty/embree/kernels/bvh/bvh8_factory.h index b92188e7d3..198d6f1df0 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh8_factory.h +++ b/thirdparty/embree/kernels/bvh/bvh8_factory.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp b/thirdparty/embree/kernels/bvh/bvh_builder.cpp index e832537ec5..161d01bb5c 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_builder.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh_builder.h" diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h b/thirdparty/embree/kernels/bvh/bvh_builder.h index 1b86bb45ad..e35d052a62 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder.h +++ b/thirdparty/embree/kernels/bvh/bvh_builder.h @@ -1,8 +1,9 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh.h" #include "../builders/bvh_builder_sah.h" +#include "../builders/bvh_builder_msmblur.h" namespace embree { diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp b/thirdparty/embree/kernels/bvh/bvh_builder_morton.cpp index 64759c1294..4a4d8d71df 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_morton.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_builder_morton.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh.h" @@ -18,7 +18,7 @@ #include "../geometry/object.h" #include "../geometry/instance.h" -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) # define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform #else # define ROTATE_TREE 0 // do not use tree rotations on 32 bit platforms, barrier bit in NodeRef will cause issues diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp b/thirdparty/embree/kernels/bvh/bvh_builder_sah.cpp index cf5b2eb47f..fad02fcc04 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_builder_sah.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh.h" @@ -153,8 +153,8 @@ namespace embree prims.resize(numPrimitives); PrimInfo pinfo = mesh ? - createPrimRefArray(mesh,geomID_,prims,bvh->scene->progressInterface) : - createPrimRefArray(scene,gtype_,false,prims,bvh->scene->progressInterface); + createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) : + createPrimRefArray(scene,gtype_,false,numPrimitives,prims,bvh->scene->progressInterface); /* pinfo might has zero size due to invalid geometry */ if (unlikely(pinfo.size() == 0)) @@ -242,8 +242,8 @@ namespace embree /* create primref array */ prims.resize(numPrimitives); PrimInfo pinfo = mesh ? - createPrimRefArray(mesh,geomID_,prims,bvh->scene->progressInterface) : - createPrimRefArray(scene,gtype_,false,prims,bvh->scene->progressInterface); + createPrimRefArray(mesh,geomID_,numPrimitives,prims,bvh->scene->progressInterface) : + createPrimRefArray(scene,gtype_,false,numPrimitives,prims,bvh->scene->progressInterface); /* enable os_malloc for two level build */ if (mesh) @@ -356,7 +356,7 @@ namespace embree mvector<PrimRef> prims; mvector<SubGridBuildData> sgrids; GeneralBVHBuilder::Settings settings; - unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); + const unsigned int geomID_ = std::numeric_limits<unsigned int>::max(); unsigned int numPreviousPrimitives = 0; BVHNBuilderSAHGrid (BVH* bvh, Scene* scene, const size_t sahBlockSize, const float intCost, const size_t minLeafSize, const size_t maxLeafSize, const size_t mode) @@ -378,109 +378,10 @@ namespace embree const size_t numGridPrimitives = mesh ? mesh->size() : scene->getNumPrimitives(GridMesh::geom_type,false); numPreviousPrimitives = numGridPrimitives; - - PrimInfo pinfo(empty); - size_t numPrimitives = 0; - - if (!mesh) - { - /* first run to get #primitives */ - - ParallelForForPrefixSumState<PrimInfo> pstate; - Scene::Iterator<GridMesh,false> iter(scene); - - pstate.init(iter,size_t(1024)); - - /* iterate over all meshes in the scene */ - pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j)) continue; - BBox3fa bounds = empty; - const PrimRef prim(bounds,(unsigned)geomID,(unsigned)j); - if (!mesh->valid(j)) continue; - pinfo.add_center2(prim,mesh->getNumSubGrids(j)); - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - numPrimitives = pinfo.size(); - - /* resize arrays */ - sgrids.resize(numPrimitives); - prims.resize(numPrimitives); - - /* second run to fill primrefs and SubGridBuildData arrays */ - pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo { - k = base.size(); - size_t p_index = k; - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j)) continue; - const GridMesh::Grid &g = mesh->grid(j); - for (unsigned int y=0; y<g.resY-1u; y+=2) - for (unsigned int x=0; x<g.resX-1u; x+=2) - { - BBox3fa bounds = empty; - if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid - const PrimRef prim(bounds,(unsigned)geomID,(unsigned)p_index); - pinfo.add_center2(prim); - sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j)); - prims[p_index++] = prim; - } - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - assert(pinfo.size() == numPrimitives); - } - else - { - ParallelPrefixSumState<PrimInfo> pstate; - /* iterate over all grids in a single mesh */ - pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo - { - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j)) continue; - BBox3fa bounds = empty; - const PrimRef prim(bounds,geomID_,unsigned(j)); - pinfo.add_center2(prim,mesh->getNumSubGrids(j)); - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - numPrimitives = pinfo.size(); - /* resize arrays */ - sgrids.resize(numPrimitives); - prims.resize(numPrimitives); - - /* second run to fill primrefs and SubGridBuildData arrays */ - pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo - { - - size_t p_index = base.size(); - PrimInfo pinfo(empty); - for (size_t j=r.begin(); j<r.end(); j++) - { - if (!mesh->valid(j)) continue; - const GridMesh::Grid &g = mesh->grid(j); - for (unsigned int y=0; y<g.resY-1u; y+=2) - for (unsigned int x=0; x<g.resX-1u; x+=2) - { - BBox3fa bounds = empty; - if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid - const PrimRef prim(bounds,geomID_,unsigned(p_index)); - pinfo.add_center2(prim); - sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j)); - prims[p_index++] = prim; - } - } - return pinfo; - }, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); }); - } + PrimInfo pinfo = mesh ? createPrimRefArrayGrids(mesh,prims,sgrids) : createPrimRefArrayGrids(scene,prims,sgrids); + const size_t numPrimitives = pinfo.size(); /* no primitives */ if (numPrimitives == 0) { bvh->clear(); @@ -546,6 +447,7 @@ namespace embree /************************************************************************************/ /************************************************************************************/ + #if defined(EMBREE_GEOMETRY_TRIANGLE) Builder* BVH4Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); } Builder* BVH4Triangle4vMeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); } @@ -555,7 +457,6 @@ namespace embree Builder* BVH4Triangle4vSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4v>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } Builder* BVH4Triangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAH<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type,true); } - Builder* BVH4QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<4,Triangle4i>((BVH4*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } #if defined(__AVX__) Builder* BVH8Triangle4MeshBuilderSAH (void* bvh, TriangleMesh* mesh, unsigned int geomID, size_t mode) { return new BVHNBuilderSAH<8,Triangle4>((BVH8*)bvh,mesh,geomID,4,1.0f,4,inf,TriangleMesh::geom_type); } @@ -568,6 +469,8 @@ namespace embree Builder* BVH8QuantizedTriangle4iSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4i>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } Builder* BVH8QuantizedTriangle4SceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderSAHQuantized<8,Triangle4>((BVH8*)bvh,scene,4,1.0f,4,inf,TriangleMesh::geom_type); } + + #endif #endif diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp b/thirdparty/embree/kernels/bvh/bvh_builder_sah_mb.cpp index 9c01553ec6..d163a80ab1 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_mb.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_builder_sah_mb.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh.h" @@ -142,7 +142,7 @@ namespace embree { /* create primref array */ mvector<PrimRef> prims(scene->device,numPrimitives); - const PrimInfo pinfo = createPrimRefArrayMBlur(scene,gtype_,prims,bvh->scene->progressInterface,0); + const PrimInfo pinfo = createPrimRefArrayMBlur(scene,gtype_,numPrimitives,prims,bvh->scene->progressInterface,0); /* early out if no valid primitives */ if (pinfo.size() == 0) { bvh->clear(); return; } /* estimate acceleration structure size */ @@ -175,7 +175,7 @@ namespace embree { /* create primref array */ mvector<PrimRefMB> prims(scene->device,numPrimitives); - PrimInfoMB pinfo = createPrimRefArrayMSMBlur(scene,gtype_,prims,bvh->scene->progressInterface); + PrimInfoMB pinfo = createPrimRefArrayMSMBlur(scene,gtype_,numPrimitives,prims,bvh->scene->progressInterface); /* early out if no valid primitives */ if (pinfo.size() == 0) { bvh->clear(); return; } diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp b/thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp index 285b38c39d..a4e55d7484 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_sah_spatial.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh.h" @@ -127,8 +127,8 @@ namespace embree { /* standard spatial split SAH BVH builder */ pinfo = mesh ? - createPrimRefArray(mesh,geomID_,/*numSplitPrimitives,*/prims0,bvh->scene->progressInterface) : - createPrimRefArray(scene,Mesh::geom_type,false,/*numSplitPrimitives,*/prims0,bvh->scene->progressInterface); + createPrimRefArray(mesh,geomID_,numSplitPrimitives,prims0,bvh->scene->progressInterface) : + createPrimRefArray(scene,Mesh::geom_type,false,numSplitPrimitives,prims0,bvh->scene->progressInterface); Splitter splitter(scene); diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp b/thirdparty/embree/kernels/bvh/bvh_builder_twolevel.cpp index 1a78f347ac..5d45ed3748 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_builder_twolevel.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh_builder_twolevel.h" @@ -129,10 +129,6 @@ namespace embree prims.resize(refs.size()); #endif -#if defined(TASKING_TBB) && defined(__AVX512ER__) && USE_TASK_ARENA // KNL - tbb::task_arena limited(min(32,(int)TaskScheduler::threadCount())); - limited.execute([&] -#endif { #if ENABLE_DIRECT_SAH_MERGE_BUILDER @@ -211,10 +207,6 @@ namespace embree bvh->set(root,LBBox3fa(pinfo.geomBounds),numPrimitives); } } -#if defined(TASKING_TBB) && defined(__AVX512ER__) && USE_TASK_ARENA // KNL - ); -#endif - } bvh->alloc.cleanup(); diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h b/thirdparty/embree/kernels/bvh/bvh_builder_twolevel.h index 8f57c3b406..dc7ec7d278 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel.h +++ b/thirdparty/embree/kernels/bvh/bvh_builder_twolevel.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -137,7 +137,7 @@ namespace embree assert(isSmallGeometry(mesh)); mvector<PrimRef> prefs(topBuilder->scene->device, meshSize); - auto pinfo = createPrimRefArray(mesh,objectID_,prefs,topBuilder->bvh->scene->progressInterface); + auto pinfo = createPrimRefArray(mesh,objectID_,meshSize,prefs,topBuilder->bvh->scene->progressInterface); size_t begin=0; while (begin < pinfo.size()) diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h b/thirdparty/embree/kernels/bvh/bvh_builder_twolevel_internal.h index 1c1ae8d6a7..023b52b780 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_builder_twolevel_internal.h +++ b/thirdparty/embree/kernels/bvh/bvh_builder_twolevel_internal.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp b/thirdparty/embree/kernels/bvh/bvh_collider.cpp index a27be8bae8..9428c0b88e 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_collider.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh_collider.h" diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h b/thirdparty/embree/kernels/bvh/bvh_collider.h index ac4f99c96a..3c42f211c1 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_collider.h +++ b/thirdparty/embree/kernels/bvh/bvh_collider.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h b/thirdparty/embree/kernels/bvh/bvh_factory.h index 54021ca6eb..453d455bd9 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_factory.h +++ b/thirdparty/embree/kernels/bvh/bvh_factory.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp b/thirdparty/embree/kernels/bvh/bvh_intersector1.cpp index ea6adc2717..9594f402c3 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_intersector1.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh_intersector1.h" @@ -61,10 +61,10 @@ namespace embree assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f)); /* load the ray into SIMD registers */ - TravRay<N,Nx,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f)); + TravRay<N,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f)); /* initialize the node traverser */ - BVHNNodeTraverser1Hit<N, Nx, types> nodeTraverser; + BVHNNodeTraverser1Hit<N, types> nodeTraverser; /* pop loop */ while (true) pop: @@ -75,22 +75,16 @@ namespace embree NodeRef cur = NodeRef(stackPtr->ptr); /* if popped node is too far, pop next one */ -#if defined(__AVX512ER__) - /* much faster on KNL */ - if (unlikely(any(vfloat<Nx>(*(float*)&stackPtr->dist) > tray.tfar))) - continue; -#else if (unlikely(*(float*)&stackPtr->dist > ray.tfar)) continue; -#endif /* downtraversal loop */ while (true) { /* intersect node */ - size_t mask; vfloat<Nx> tNear; + size_t mask; vfloat<N> tNear; STAT3(normal.trav_nodes,1,1,1); - bool nodeIntersected = BVHNNodeIntersector1<N, Nx, types, robust>::intersect(cur, tray, ray.time(), tNear, mask); + bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray, ray.time(), tNear, mask); if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; } /* if no child is hit, pop next node */ @@ -153,10 +147,10 @@ namespace embree assert(!(types & BVH_MB) || (ray.time() >= 0.0f && ray.time() <= 1.0f)); /* load the ray into SIMD registers */ - TravRay<N,Nx,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f)); + TravRay<N,robust> tray(ray.org, ray.dir, max(ray.tnear(), 0.0f), max(ray.tfar, 0.0f)); /* initialize the node traverser */ - BVHNNodeTraverser1Hit<N, Nx, types> nodeTraverser; + BVHNNodeTraverser1Hit<N, types> nodeTraverser; /* pop loop */ while (true) pop: @@ -170,9 +164,9 @@ namespace embree while (true) { /* intersect node */ - size_t mask; vfloat<Nx> tNear; + size_t mask; vfloat<N> tNear; STAT3(shadow.trav_nodes,1,1,1); - bool nodeIntersected = BVHNNodeIntersector1<N, Nx, types, robust>::intersect(cur, tray, ray.time(), tNear, mask); + bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray, ray.time(), tNear, mask); if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; } /* if no child is hit, pop next node */ @@ -213,9 +207,6 @@ namespace embree static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store - /* right now AVX512KNL SIMD extension only for standard node types */ - static const size_t Nx = (types == BVH_AN1 || types == BVH_QN1) ? vextend<N>::size : N; - static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context) { const BVH* __restrict__ bvh = (const BVH*)This->ptr; @@ -238,7 +229,7 @@ namespace embree TravPointQuery<N> tquery(query->p, context->query_radius); /* initialize the node traverser */ - BVHNNodeTraverser1Hit<N, N, types> nodeTraverser; + BVHNNodeTraverser1Hit<N,types> nodeTraverser; bool changed = false; float cull_radius = context->query_type == POINT_QUERY_TYPE_SPHERE diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h b/thirdparty/embree/kernels/bvh/bvh_intersector1.h index 1a269c319a..2df3d6eddb 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1.h +++ b/thirdparty/embree/kernels/bvh/bvh_intersector1.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -25,9 +25,6 @@ namespace embree static const size_t stackSize = 1+(N-1)*BVH::maxDepth+3; // +3 due to 16-wide store - /* right now AVX512KNL SIMD extension only for standard node types */ - static const size_t Nx = (types == BVH_AN1 || types == BVH_QN1) ? vextend<N>::size : N; - public: static void intersect (const Accel::Intersectors* This, RayHit& ray, IntersectContext* context); static void occluded (const Accel::Intersectors* This, Ray& ray, IntersectContext* context); diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp b/thirdparty/embree/kernels/bvh/bvh_intersector1_bvh4.cpp index 989f7354fd..831d613367 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector1_bvh4.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_intersector1_bvh4.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh_intersector1.cpp" @@ -21,15 +21,15 @@ namespace embree IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1,BVHNIntersector1<4 COMMA BVH_AN1_UN1 COMMA true COMMA VirtualCurveIntersector1 >)); IF_ENABLED_CURVES_OR_POINTS(DEFINE_INTERSECTOR1(BVH4OBBVirtualCurveIntersectorRobust1MB,BVHNIntersector1<4 COMMA BVH_AN2_AN4D_UN2 COMMA true COMMA VirtualCurveIntersector1 >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4Intersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMIntersector1Moeller <SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Moeller <SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMvIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >)); + IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4Intersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMIntersector1Moeller <4 COMMA true> > >)); + IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Moeller <4 COMMA true> > >)); + IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMvIntersector1Pluecker<4 COMMA true> > >)); + IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN1 COMMA true COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<4 COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMvMBIntersector1Moeller <SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMiMBIntersector1Moeller <SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMvMBIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMiMBIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >)); + IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMvMBIntersector1Moeller <4 COMMA true> > >)); + IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<TriangleMiMBIntersector1Moeller <4 COMMA true> > >)); + IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4vMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMvMBIntersector1Pluecker<4 COMMA true> > >)); + IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(BVH4Triangle4iMBIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA true COMMA ArrayIntersector1<TriangleMiMBIntersector1Pluecker<4 COMMA true> > >)); IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4vIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMvIntersector1Moeller <4 COMMA true> > >)); IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(BVH4Quad4iIntersector1Moeller, BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Moeller <4 COMMA true> > >)); @@ -48,7 +48,7 @@ namespace embree IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceIntersector1,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA ArrayIntersector1<InstanceIntersector1> >)); IF_ENABLED_INSTANCE(DEFINE_INTERSECTOR1(BVH4InstanceMBIntersector1,BVHNIntersector1<4 COMMA BVH_AN2_AN4D COMMA false COMMA ArrayIntersector1<InstanceIntersector1MB> >)); - IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(QBVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<SIMD_MODE(4) COMMA true> > >)); + IF_ENABLED_TRIS(DEFINE_INTERSECTOR1(QBVH4Triangle4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<TriangleMiIntersector1Pluecker<4 COMMA true> > >)); IF_ENABLED_QUADS(DEFINE_INTERSECTOR1(QBVH4Quad4iIntersector1Pluecker,BVHNIntersector1<4 COMMA BVH_QN1 COMMA false COMMA ArrayIntersector1<QuadMiIntersector1Pluecker<4 COMMA true> > >)); IF_ENABLED_GRIDS(DEFINE_INTERSECTOR1(BVH4GridIntersector1Moeller,BVHNIntersector1<4 COMMA BVH_AN1 COMMA false COMMA SubGridIntersector1Moeller<4 COMMA true> >)); diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h b/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.h index d764cc928d..50ebf375c4 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_hybrid.h +++ b/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -19,9 +19,6 @@ namespace embree template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single = true> class BVHNIntersectorKHybrid { - /* right now AVX512KNL SIMD extension only for standard node types */ - static const size_t Nx = types == BVH_AN1 ? vextend<N>::size : N; - /* shortcuts for frequently used types */ typedef typename PrimitiveIntersectorK::Precalculations Precalculations; typedef typename PrimitiveIntersectorK::Primitive Primitive; diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h b/thirdparty/embree/kernels/bvh/bvh_intersector_stream.h index 83d1fb4d3d..717f559677 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream.h +++ b/thirdparty/embree/kernels/bvh/bvh_intersector_stream.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -12,11 +12,9 @@ namespace embree namespace isa { /*! BVH ray stream intersector. */ - template<int N, int Nx, int types, bool robust, typename PrimitiveIntersector> + template<int N, int types, bool robust, typename PrimitiveIntersector> class BVHNIntersectorStream { - static const int Nxd = (Nx == N) ? N : Nx/2; - /* shortcuts for frequently used types */ template<int K> using PrimitiveIntersectorK = typename PrimitiveIntersector::template Type<K>; template<int K> using PrimitiveK = typename PrimitiveIntersectorK<K>::PrimitiveK; @@ -128,13 +126,13 @@ namespace embree const AABBNode* __restrict__ node, const Frustum<robust>& frustum, size_t* maskK, - vfloat<Nx>& dist) + vfloat<N>& dist) { - size_t m_node_hit = intersectNodeFrustum<N,Nx>(node, frustum, dist); + size_t m_node_hit = intersectNodeFrustum<N>(node, frustum, dist); const size_t first_index = bsf(m_active); const size_t first_packetID = first_index / K; const size_t first_rayID = first_index % K; - size_t m_first_hit = intersectNode1<N,Nx>(node, packets[first_packetID], first_rayID, frustum.nf); + size_t m_first_hit = intersectNode1<N>(node, packets[first_packetID], first_rayID, frustum.nf); /* this make traversal independent of the ordering of rays */ size_t m_node = m_node_hit ^ m_first_hit; @@ -150,20 +148,20 @@ namespace embree // TODO: explicit 16-wide path for KNL template<int K> - __forceinline static vint<Nx> traverseIncoherentStream(size_t m_active, + __forceinline static vint<N> traverseIncoherentStream(size_t m_active, TravRayKStreamFast<K>* __restrict__ packets, const AABBNode* __restrict__ node, const NearFarPrecalculations& nf, const int shiftTable[32]) { - const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); - const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); - const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); - const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); - const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); - const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); + const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); + const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); + const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); + const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); + const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); + const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); assert(m_active); - vint<Nx> vmask(zero); + vint<N> vmask(zero); do { STAT3(shadow.trav_nodes,1,1,1); @@ -171,58 +169,41 @@ namespace embree assert(rayID < MAX_INTERNAL_STREAM_SIZE); TravRayKStream<K,robust> &p = packets[rayID / K]; const size_t i = rayID % K; - const vint<Nx> bitmask(shiftTable[rayID]); - -#if defined (__aarch64__) - const vfloat<Nx> tNearX = madd(bminX, p.rdir.x[i], p.neg_org_rdir.x[i]); - const vfloat<Nx> tNearY = madd(bminY, p.rdir.y[i], p.neg_org_rdir.y[i]); - const vfloat<Nx> tNearZ = madd(bminZ, p.rdir.z[i], p.neg_org_rdir.z[i]); - const vfloat<Nx> tFarX = madd(bmaxX, p.rdir.x[i], p.neg_org_rdir.x[i]); - const vfloat<Nx> tFarY = madd(bmaxY, p.rdir.y[i], p.neg_org_rdir.y[i]); - const vfloat<Nx> tFarZ = madd(bmaxZ, p.rdir.z[i], p.neg_org_rdir.z[i]); -#else - const vfloat<Nx> tNearX = msub(bminX, p.rdir.x[i], p.org_rdir.x[i]); - const vfloat<Nx> tNearY = msub(bminY, p.rdir.y[i], p.org_rdir.y[i]); - const vfloat<Nx> tNearZ = msub(bminZ, p.rdir.z[i], p.org_rdir.z[i]); - const vfloat<Nx> tFarX = msub(bmaxX, p.rdir.x[i], p.org_rdir.x[i]); - const vfloat<Nx> tFarY = msub(bmaxY, p.rdir.y[i], p.org_rdir.y[i]); - const vfloat<Nx> tFarZ = msub(bmaxZ, p.rdir.z[i], p.org_rdir.z[i]); -#endif - - const vfloat<Nx> tNear = maxi(tNearX, tNearY, tNearZ, vfloat<Nx>(p.tnear[i])); - const vfloat<Nx> tFar = mini(tFarX , tFarY , tFarZ, vfloat<Nx>(p.tfar[i])); - -#if defined(__AVX512ER__) - const vboolx m_node((1 << N)-1); - const vbool<Nx> hit_mask = le(m_node, tNear, tFar); - vmask = mask_or(hit_mask, vmask, vmask, bitmask); -#else - const vbool<Nx> hit_mask = tNear <= tFar; + const vint<N> bitmask(shiftTable[rayID]); + const vfloat<N> tNearX = msub(bminX, p.rdir.x[i], p.org_rdir.x[i]); + const vfloat<N> tNearY = msub(bminY, p.rdir.y[i], p.org_rdir.y[i]); + const vfloat<N> tNearZ = msub(bminZ, p.rdir.z[i], p.org_rdir.z[i]); + const vfloat<N> tFarX = msub(bmaxX, p.rdir.x[i], p.org_rdir.x[i]); + const vfloat<N> tFarY = msub(bmaxY, p.rdir.y[i], p.org_rdir.y[i]); + const vfloat<N> tFarZ = msub(bmaxZ, p.rdir.z[i], p.org_rdir.z[i]); + const vfloat<N> tNear = maxi(tNearX, tNearY, tNearZ, vfloat<N>(p.tnear[i])); + const vfloat<N> tFar = mini(tFarX , tFarY , tFarZ, vfloat<N>(p.tfar[i])); + + const vbool<N> hit_mask = tNear <= tFar; #if defined(__AVX2__) - vmask = vmask | (bitmask & vint<Nx>(hit_mask)); + vmask = vmask | (bitmask & vint<N>(hit_mask)); #else vmask = select(hit_mask, vmask | bitmask, vmask); #endif -#endif } while(m_active); return vmask; } template<int K> - __forceinline static vint<Nx> traverseIncoherentStream(size_t m_active, + __forceinline static vint<N> traverseIncoherentStream(size_t m_active, TravRayKStreamRobust<K>* __restrict__ packets, const AABBNode* __restrict__ node, const NearFarPrecalculations& nf, const int shiftTable[32]) { - const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); - const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); - const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); - const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); - const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); - const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); + const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); + const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); + const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); + const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); + const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); + const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); assert(m_active); - vint<Nx> vmask(zero); + vint<N> vmask(zero); do { STAT3(shadow.trav_nodes,1,1,1); @@ -230,29 +211,23 @@ namespace embree assert(rayID < MAX_INTERNAL_STREAM_SIZE); TravRayKStream<K,robust> &p = packets[rayID / K]; const size_t i = rayID % K; - const vint<Nx> bitmask(shiftTable[rayID]); - const vfloat<Nx> tNearX = (bminX - p.org.x[i]) * p.rdir.x[i]; - const vfloat<Nx> tNearY = (bminY - p.org.y[i]) * p.rdir.y[i]; - const vfloat<Nx> tNearZ = (bminZ - p.org.z[i]) * p.rdir.z[i]; - const vfloat<Nx> tFarX = (bmaxX - p.org.x[i]) * p.rdir.x[i]; - const vfloat<Nx> tFarY = (bmaxY - p.org.y[i]) * p.rdir.y[i]; - const vfloat<Nx> tFarZ = (bmaxZ - p.org.z[i]) * p.rdir.z[i]; - const vfloat<Nx> tNear = maxi(tNearX, tNearY, tNearZ, vfloat<Nx>(p.tnear[i])); - const vfloat<Nx> tFar = mini(tFarX , tFarY , tFarZ, vfloat<Nx>(p.tfar[i])); + const vint<N> bitmask(shiftTable[rayID]); + const vfloat<N> tNearX = (bminX - p.org.x[i]) * p.rdir.x[i]; + const vfloat<N> tNearY = (bminY - p.org.y[i]) * p.rdir.y[i]; + const vfloat<N> tNearZ = (bminZ - p.org.z[i]) * p.rdir.z[i]; + const vfloat<N> tFarX = (bmaxX - p.org.x[i]) * p.rdir.x[i]; + const vfloat<N> tFarY = (bmaxY - p.org.y[i]) * p.rdir.y[i]; + const vfloat<N> tFarZ = (bmaxZ - p.org.z[i]) * p.rdir.z[i]; + const vfloat<N> tNear = maxi(tNearX, tNearY, tNearZ, vfloat<N>(p.tnear[i])); + const vfloat<N> tFar = mini(tFarX , tFarY , tFarZ, vfloat<N>(p.tfar[i])); const float round_down = 1.0f-2.0f*float(ulp); const float round_up = 1.0f+2.0f*float(ulp); -#if defined(__AVX512ER__) - const vboolx m_node((1 << N)-1); - const vbool<Nx> hit_mask = le(m_node, round_down*tNear, round_up*tFar); - vmask = mask_or(hit_mask, vmask, vmask, bitmask); -#else - const vbool<Nx> hit_mask = round_down*tNear <= round_up*tFar; + const vbool<N> hit_mask = round_down*tNear <= round_up*tFar; #if defined(__AVX2__) - vmask = vmask | (bitmask & vint<Nx>(hit_mask)); + vmask = vmask | (bitmask & vint<N>(hit_mask)); #else vmask = select(hit_mask, vmask | bitmask, vmask); #endif -#endif } while(m_active); return vmask; } @@ -277,7 +252,7 @@ namespace embree /*! BVH ray stream intersector with direct fallback to packets. */ - template<int N, int Nx> + template<int N> class BVHNIntersectorStreamPacketFallback { public: diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h b/thirdparty/embree/kernels/bvh/bvh_intersector_stream_filters.h index cdeb923637..e7df7c2ae2 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_intersector_stream_filters.h +++ b/thirdparty/embree/kernels/bvh/bvh_intersector_stream_filters.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h b/thirdparty/embree/kernels/bvh/bvh_node_aabb.h index baa4a8d805..57530692bc 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb.h +++ b/thirdparty/embree/kernels/bvh/bvh_node_aabb.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h b/thirdparty/embree/kernels/bvh/bvh_node_aabb_mb.h index 501f4bce5b..c4cea7d8ba 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb.h +++ b/thirdparty/embree/kernels/bvh/bvh_node_aabb_mb.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h b/thirdparty/embree/kernels/bvh/bvh_node_aabb_mb4d.h index e968bbbc39..46a81d7581 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_aabb_mb4d.h +++ b/thirdparty/embree/kernels/bvh/bvh_node_aabb_mb4d.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h b/thirdparty/embree/kernels/bvh/bvh_node_base.h index 8268f3b932..a5570a7b9e 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_base.h +++ b/thirdparty/embree/kernels/bvh/bvh_node_base.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h b/thirdparty/embree/kernels/bvh/bvh_node_obb.h index fa7cc08211..e6b500691e 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb.h +++ b/thirdparty/embree/kernels/bvh/bvh_node_obb.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h b/thirdparty/embree/kernels/bvh/bvh_node_obb_mb.h index 834cf5ec28..c06b1aea5e 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_obb_mb.h +++ b/thirdparty/embree/kernels/bvh/bvh_node_obb_mb.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h b/thirdparty/embree/kernels/bvh/bvh_node_qaabb.h index 5212821f3f..2afc8c98e7 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_qaabb.h +++ b/thirdparty/embree/kernels/bvh/bvh_node_qaabb.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h b/thirdparty/embree/kernels/bvh/bvh_node_ref.h index 0f6d4dac7e..6f6da758de 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_node_ref.h +++ b/thirdparty/embree/kernels/bvh/bvh_node_ref.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -102,7 +102,7 @@ namespace embree /*! Sets the barrier bit. */ __forceinline void setBarrier() { -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) assert(!isBarrier()); ptr |= barrier_mask; #else @@ -112,7 +112,7 @@ namespace embree /*! Clears the barrier bit. */ __forceinline void clearBarrier() { -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) ptr &= ~barrier_mask; #else assert(false); diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp b/thirdparty/embree/kernels/bvh/bvh_refit.cpp index a273c21e8b..bf5c8538ba 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_refit.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh_refit.h" diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h b/thirdparty/embree/kernels/bvh/bvh_refit.h index 4aa9bdd7cc..09bb3d8da5 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_refit.h +++ b/thirdparty/embree/kernels/bvh/bvh_refit.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp b/thirdparty/embree/kernels/bvh/bvh_rotate.cpp index 2bb431bf0e..460bd60c62 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_rotate.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh_rotate.h" diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h b/thirdparty/embree/kernels/bvh/bvh_rotate.h index 009bef339e..61ef64a679 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_rotate.h +++ b/thirdparty/embree/kernels/bvh/bvh_rotate.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp index aa56035026..d857ff7d95 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "bvh_statistics.h" @@ -162,7 +162,7 @@ namespace embree template class BVHNStatistics<8>; #endif -#if !defined(__AVX__) || (!defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42)) || defined(__aarch64__) +#if !defined(__AVX__) || !defined(EMBREE_TARGET_SSE2) && !defined(EMBREE_TARGET_SSE42) template class BVHNStatistics<4>; #endif } diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h b/thirdparty/embree/kernels/bvh/bvh_statistics.h index 73dfc6fbcc..a28e115f1c 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.h +++ b/thirdparty/embree/kernels/bvh/bvh_statistics.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h b/thirdparty/embree/kernels/bvh/bvh_traverser1.h index 7f17084b81..8ce01b57f5 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser1.h +++ b/thirdparty/embree/kernels/bvh/bvh_traverser1.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -14,213 +14,9 @@ namespace embree namespace isa { /*! BVH regular node traversal for single rays. */ - template<int N, int Nx, int types> + template<int N, int types> class BVHNNodeTraverser1Hit; - /*! Helper functions for fast sorting using AVX512 instructions. */ -#if defined(__AVX512ER__) - - /* KNL code path */ - __forceinline void isort_update(vfloat16 &dist, vllong8 &ptr, const vfloat16 &d, const vllong8 &p) - { - const vfloat16 dist_shift = align_shift_right<15>(dist,dist); - const vllong8 ptr_shift = align_shift_right<7>(ptr,ptr); - const vbool16 m_geq = d >= dist; - const vbool16 m_geq_shift = m_geq << 1; - dist = select(m_geq,d,dist); - ptr = select(vboold8(m_geq),p,ptr); - dist = select(m_geq_shift,dist_shift,dist); - ptr = select(vboold8(m_geq_shift),ptr_shift,ptr); - } - - __forceinline void isort_quick_update(vfloat16 &dist, vllong8 &ptr, const vfloat16 &d, const vllong8 &p) - { - //dist = align_shift_right<15>(dist,d); - //ptr = align_shift_right<7>(ptr,p); - dist = align_shift_right<15>(dist,permute(d,vint16(zero))); - ptr = align_shift_right<7>(ptr,permute(p,vllong8(zero))); - } - - template<int N, int Nx, int types, class NodeRef, class BaseNode> - __forceinline void traverseClosestHitAVX512(NodeRef& cur, - size_t mask, - const vfloat<Nx>& tNear, - StackItemT<NodeRef>*& stackPtr, - StackItemT<NodeRef>* stackEnd) - { - assert(mask != 0); - const BaseNode* node = cur.baseNode(); - - vllong8 children( vllong<N>::loadu((void*)node->children) ); - children = vllong8::compact((int)mask,children); - vfloat16 distance = tNear; - distance = vfloat16::compact((int)mask,distance,tNear); - - cur = toScalar(children); - BVHN<N>::prefetch(cur,types); - - mask &= mask-1; - if (likely(mask == 0)) return; - - /* 2 hits: order A0 B0 */ - const vllong8 c0(children); - const vfloat16 d0(distance); - children = align_shift_right<1>(children,children); - distance = align_shift_right<1>(distance,distance); - const vllong8 c1(children); - const vfloat16 d1(distance); - - cur = toScalar(children); - BVHN<N>::prefetch(cur,types); - - /* a '<' keeps the order for equal distances, scenes like powerplant largely benefit from it */ - const vboolf16 m_dist = d0 < d1; - const vfloat16 dist_A0 = select(m_dist, d0, d1); - const vfloat16 dist_B0 = select(m_dist, d1, d0); - const vllong8 ptr_A0 = select(vboold8(m_dist), c0, c1); - const vllong8 ptr_B0 = select(vboold8(m_dist), c1, c0); - - mask &= mask-1; - if (likely(mask == 0)) { - cur = toScalar(ptr_A0); - stackPtr[0].ptr = toScalar(ptr_B0); - *(float*)&stackPtr[0].dist = toScalar(dist_B0); - stackPtr++; - return; - } - - /* 3 hits: order A1 B1 C1 */ - - children = align_shift_right<1>(children,children); - distance = align_shift_right<1>(distance,distance); - - const vllong8 c2(children); - const vfloat16 d2(distance); - - cur = toScalar(children); - BVHN<N>::prefetch(cur,types); - - const vboolf16 m_dist1 = dist_A0 <= d2; - const vfloat16 dist_tmp_B1 = select(m_dist1, d2, dist_A0); - const vllong8 ptr_A1 = select(vboold8(m_dist1), ptr_A0, c2); - const vllong8 ptr_tmp_B1 = select(vboold8(m_dist1), c2, ptr_A0); - - const vboolf16 m_dist2 = dist_B0 <= dist_tmp_B1; - const vfloat16 dist_B1 = select(m_dist2, dist_B0 , dist_tmp_B1); - const vfloat16 dist_C1 = select(m_dist2, dist_tmp_B1, dist_B0); - const vllong8 ptr_B1 = select(vboold8(m_dist2), ptr_B0, ptr_tmp_B1); - const vllong8 ptr_C1 = select(vboold8(m_dist2), ptr_tmp_B1, ptr_B0); - - mask &= mask-1; - if (likely(mask == 0)) { - cur = toScalar(ptr_A1); - stackPtr[0].ptr = toScalar(ptr_C1); - *(float*)&stackPtr[0].dist = toScalar(dist_C1); - stackPtr[1].ptr = toScalar(ptr_B1); - *(float*)&stackPtr[1].dist = toScalar(dist_B1); - stackPtr+=2; - return; - } - - /* 4 hits: order A2 B2 C2 D2 */ - - const vfloat16 dist_A1 = select(m_dist1, dist_A0, d2); - - children = align_shift_right<1>(children,children); - distance = align_shift_right<1>(distance,distance); - - const vllong8 c3(children); - const vfloat16 d3(distance); - - cur = toScalar(children); - BVHN<N>::prefetch(cur,types); - - const vboolf16 m_dist3 = dist_A1 <= d3; - const vfloat16 dist_tmp_B2 = select(m_dist3, d3, dist_A1); - const vllong8 ptr_A2 = select(vboold8(m_dist3), ptr_A1, c3); - const vllong8 ptr_tmp_B2 = select(vboold8(m_dist3), c3, ptr_A1); - - const vboolf16 m_dist4 = dist_B1 <= dist_tmp_B2; - const vfloat16 dist_B2 = select(m_dist4, dist_B1 , dist_tmp_B2); - const vfloat16 dist_tmp_C2 = select(m_dist4, dist_tmp_B2, dist_B1); - const vllong8 ptr_B2 = select(vboold8(m_dist4), ptr_B1, ptr_tmp_B2); - const vllong8 ptr_tmp_C2 = select(vboold8(m_dist4), ptr_tmp_B2, ptr_B1); - - const vboolf16 m_dist5 = dist_C1 <= dist_tmp_C2; - const vfloat16 dist_C2 = select(m_dist5, dist_C1 , dist_tmp_C2); - const vfloat16 dist_D2 = select(m_dist5, dist_tmp_C2, dist_C1); - const vllong8 ptr_C2 = select(vboold8(m_dist5), ptr_C1, ptr_tmp_C2); - const vllong8 ptr_D2 = select(vboold8(m_dist5), ptr_tmp_C2, ptr_C1); - - mask &= mask-1; - if (likely(mask == 0)) { - cur = toScalar(ptr_A2); - stackPtr[0].ptr = toScalar(ptr_D2); - *(float*)&stackPtr[0].dist = toScalar(dist_D2); - stackPtr[1].ptr = toScalar(ptr_C2); - *(float*)&stackPtr[1].dist = toScalar(dist_C2); - stackPtr[2].ptr = toScalar(ptr_B2); - *(float*)&stackPtr[2].dist = toScalar(dist_B2); - stackPtr+=3; - return; - } - - /* >=5 hits: reverse to descending order for writing to stack */ - - const size_t hits = 4 + popcnt(mask); - const vfloat16 dist_A2 = select(m_dist3, dist_A1, d3); - vfloat16 dist(neg_inf); - vllong8 ptr(zero); - - - isort_quick_update(dist,ptr,dist_A2,ptr_A2); - isort_quick_update(dist,ptr,dist_B2,ptr_B2); - isort_quick_update(dist,ptr,dist_C2,ptr_C2); - isort_quick_update(dist,ptr,dist_D2,ptr_D2); - - do { - - children = align_shift_right<1>(children,children); - distance = align_shift_right<1>(distance,distance); - - cur = toScalar(children); - BVHN<N>::prefetch(cur,types); - - const vfloat16 new_dist(permute(distance,vint16(zero))); - const vllong8 new_ptr(permute(children,vllong8(zero))); - - mask &= mask-1; - isort_update(dist,ptr,new_dist,new_ptr); - - } while(mask); - - const vboold8 m_stack_ptr(0x55); // 10101010 (lsb -> msb) - const vboolf16 m_stack_dist(0x4444); // 0010001000100010 (lsb -> msb) - - /* extract current noderef */ - cur = toScalar(permute(ptr,vllong8(hits-1))); - /* rearrange pointers to beginning of 16 bytes block */ - vllong8 stackElementA0; - stackElementA0 = vllong8::expand(m_stack_ptr,ptr,stackElementA0); - /* put distances in between */ - vuint16 stackElementA1((__m512i)stackElementA0); - stackElementA1 = vuint16::expand(m_stack_dist,asUInt(dist),stackElementA1); - /* write out first 4 x 16 bytes block to stack */ - vuint16::storeu(stackPtr,stackElementA1); - /* get upper half of dist and ptr */ - dist = align_shift_right<4>(dist,dist); - ptr = align_shift_right<4>(ptr,ptr); - /* assemble and write out second block */ - vllong8 stackElementB0; - stackElementB0 = vllong8::expand(m_stack_ptr,ptr,stackElementB0); - vuint16 stackElementB1((__m512i)stackElementB0); - stackElementB1 = vuint16::expand(m_stack_dist,asUInt(dist),stackElementB1); - vuint16::storeu(stackPtr + 4,stackElementB1); - /* increase stack pointer */ - stackPtr += hits-1; - } -#endif - #if defined(__AVX512VL__) // SKX template<int N> @@ -249,8 +45,8 @@ namespace embree #endif /* Specialization for BVH4. */ - template<int Nx, int types> - class BVHNNodeTraverser1Hit<4, Nx, types> + template<int types> + class BVHNNodeTraverser1Hit<4, types> { typedef BVH4 BVH; typedef BVH4::NodeRef NodeRef; @@ -261,14 +57,11 @@ namespace embree /* Traverses a node with at least one hit child. Optimized for finding the closest hit (intersection). */ static __forceinline void traverseClosestHit(NodeRef& cur, size_t mask, - const vfloat<Nx>& tNear, + const vfloat4& tNear, StackItemT<NodeRef>*& stackPtr, StackItemT<NodeRef>* stackEnd) { assert(mask != 0); -#if defined(__AVX512ER__) - traverseClosestHitAVX512<4,Nx,types,NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd); -#else const BaseNode* node = cur.baseNode(); /*! one child is hit, continue with that child */ @@ -344,13 +137,12 @@ namespace embree sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]); cur = (NodeRef) stackPtr[-1].ptr; stackPtr--; #endif -#endif } /* Traverses a node with at least one hit child. Optimized for finding any hit (occlusion). */ static __forceinline void traverseAnyHit(NodeRef& cur, size_t mask, - const vfloat<Nx>& tNear, + const vfloat4& tNear, NodeRef*& stackPtr, NodeRef* stackEnd) { @@ -380,8 +172,8 @@ namespace embree }; /* Specialization for BVH8. */ - template<int Nx, int types> - class BVHNNodeTraverser1Hit<8, Nx, types> + template<int types> + class BVHNNodeTraverser1Hit<8, types> { typedef BVH8 BVH; typedef BVH8::NodeRef NodeRef; @@ -485,10 +277,10 @@ namespace embree const size_t hits = 4 + popcnt(mask); vint8 dist(INT_MIN); // this will work with -0.0f (0x80000000) as distance, isort_update uses >= to insert - isort_quick_update(dist,dist_A2); - isort_quick_update(dist,dist_B2); - isort_quick_update(dist,dist_C2); - isort_quick_update(dist,dist_D2); + isort_quick_update<8>(dist,dist_A2); + isort_quick_update<8>(dist,dist_B2); + isort_quick_update<8>(dist,dist_C2); + isort_quick_update<8>(dist,dist_D2); do { @@ -497,7 +289,7 @@ namespace embree BVH::prefetch(cur,types); const vint8 new_dist(permute(distance_i,vint8(zero))); mask &= mask-1; - isort_update(dist,new_dist); + isort_update<8>(dist,new_dist); } while(mask); @@ -518,14 +310,12 @@ namespace embree public: static __forceinline void traverseClosestHit(NodeRef& cur, size_t mask, - const vfloat<Nx>& tNear, + const vfloat8& tNear, StackItemT<NodeRef>*& stackPtr, StackItemT<NodeRef>* stackEnd) { assert(mask != 0); -#if defined(__AVX512ER__) - traverseClosestHitAVX512<8,Nx,types,NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd); -#elif defined(__AVX512VL__) +#if defined(__AVX512VL__) traverseClosestHitAVX512VL8<NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd); #else @@ -644,7 +434,7 @@ namespace embree static __forceinline void traverseAnyHit(NodeRef& cur, size_t mask, - const vfloat<Nx>& tNear, + const vfloat8& tNear, NodeRef*& stackPtr, NodeRef* stackEnd) { diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h b/thirdparty/embree/kernels/bvh/bvh_traverser_stream.h index 9c603babf0..852981e69d 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/bvh_traverser_stream.h +++ b/thirdparty/embree/kernels/bvh/bvh_traverser_stream.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -11,7 +11,7 @@ namespace embree { namespace isa { - template<int N, int Nx, int types> + template<int N, int types> class BVHNNodeTraverserStreamHitCoherent { typedef BVHN<N> BVH; @@ -22,8 +22,8 @@ namespace embree template<class T> static __forceinline void traverseClosestHit(NodeRef& cur, size_t& m_trav_active, - const vbool<Nx>& vmask, - const vfloat<Nx>& tNear, + const vbool<N>& vmask, + const vfloat<N>& tNear, const T* const tMask, StackItemMaskCoherent*& stackPtr) { @@ -79,14 +79,9 @@ namespace embree /*! slow path for more than two hits */ size_t hits = movemask(vmask); - const vint<Nx> dist_i = select(vmask, (asInt(tNear) & 0xfffffff8) | vint<Nx>(step), 0); - #if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL - const vint<N> tmp = extractN<N,0>(dist_i); - const vint<Nx> dist_i_sorted = usort_descending(tmp); - #else - const vint<Nx> dist_i_sorted = usort_descending(dist_i); - #endif - const vint<Nx> sorted_index = dist_i_sorted & 7; + const vint<N> dist_i = select(vmask, (asInt(tNear) & 0xfffffff8) | vint<N>(step), 0); + const vint<N> dist_i_sorted = usort_descending(dist_i); + const vint<N> sorted_index = dist_i_sorted & 7; size_t i = 0; for (;;) @@ -112,7 +107,7 @@ namespace embree template<class T> static __forceinline void traverseAnyHit(NodeRef& cur, size_t& m_trav_active, - const vbool<Nx>& vmask, + const vbool<N>& vmask, const T* const tMask, StackItemMaskCoherent*& stackPtr) { diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h b/thirdparty/embree/kernels/bvh/node_intersector.h index a978c0c459..25edaf295d 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector.h +++ b/thirdparty/embree/kernels/bvh/node_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h b/thirdparty/embree/kernels/bvh/node_intersector1.h index aa0d4ba4d7..1ec4fc63fc 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector1.h +++ b/thirdparty/embree/kernels/bvh/node_intersector1.h @@ -1,19 +1,10 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "node_intersector.h" -#if defined(__AVX2__) -#define __FMA_X4__ -#endif - -#if defined(__aarch64__) -#define __FMA_X4__ -#endif - - namespace embree { namespace isa @@ -22,12 +13,12 @@ namespace embree // Ray structure used in single-ray traversal ////////////////////////////////////////////////////////////////////////////////////// - template<int N, int Nx, bool robust> + template<int N, bool robust> struct TravRayBase; /* Base (without tnear and tfar) */ - template<int N, int Nx> - struct TravRayBase<N,Nx,false> + template<int N> + struct TravRayBase<N,false> { __forceinline TravRayBase() {} @@ -38,15 +29,9 @@ namespace embree org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z); dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z); rdir = Vec3vf<N>(ray_rdir.x,ray_rdir.y,ray_rdir.z); -#if defined(__FMA_X4__) +#if defined(__AVX2__) || defined(__ARM_NEON) const Vec3fa ray_org_rdir = ray_org*ray_rdir; -#if !defined(__aarch64__) org_rdir = Vec3vf<N>(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z); -#else - //for aarch64, we do not have msub equal instruction, so we negeate orig and use madd - //x86 will use msub - neg_org_rdir = Vec3vf<N>(-ray_org_rdir.x,-ray_org_rdir.y,-ray_org_rdir.z); -#endif #endif nearX = ray_rdir.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>); nearY = ray_rdir.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>); @@ -54,32 +39,18 @@ namespace embree farX = nearX ^ sizeof(vfloat<N>); farY = nearY ^ sizeof(vfloat<N>); farZ = nearZ ^ sizeof(vfloat<N>); - -#if defined(__AVX512ER__) // KNL+ - /* optimization works only for 8-wide BVHs with 16-wide SIMD */ - const vint<16> id(step); - const vint<16> id2 = align_shift_right<16/2>(id, id); - permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2); - permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2); - permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2); -#endif - } template<int K> - __forceinline TravRayBase(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, - const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, - size_t flip = sizeof(vfloat<N>)) + __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, + const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, + size_t flip = sizeof(vfloat<N>)) { - org = Vec3vf<Nx>(ray_org.x[k], ray_org.y[k], ray_org.z[k]); - dir = Vec3vf<Nx>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]); - rdir = Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); -#if defined(__FMA_X4__) -#if !defined(__aarch64__) - org_rdir = org*rdir; -#else - neg_org_rdir = -(org*rdir); -#endif + org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]); + dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]); + rdir = Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); +#if defined(__AVX2__) || defined(__ARM_NEON) + org_rdir = org*rdir; #endif nearX = nearXYZ.x[k]; nearY = nearXYZ.y[k]; @@ -87,39 +58,20 @@ namespace embree farX = nearX ^ flip; farY = nearY ^ flip; farZ = nearZ ^ flip; - -#if defined(__AVX512ER__) // KNL+ - /* optimization works only for 8-wide BVHs with 16-wide SIMD */ - const vint<16> id(step); - const vint<16> id2 = align_shift_right<16/2>(id, id); - permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2); - permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2); - permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2); -#endif } Vec3fa org_xyz, dir_xyz; - Vec3vf<Nx> org, dir, rdir; -#if defined(__FMA_X4__) -#if !defined(__aarch64__) - Vec3vf<Nx> org_rdir; -#else - //aarch64 version are keeping negation of the org_rdir and use madd - //x86 uses msub - Vec3vf<Nx> neg_org_rdir; -#endif + Vec3vf<N> org, dir, rdir; +#if defined(__AVX2__) || defined(__ARM_NEON) + Vec3vf<N> org_rdir; #endif -#if defined(__AVX512ER__) // KNL+ - vint16 permX, permY, permZ; -#endif - size_t nearX, nearY, nearZ; size_t farX, farY, farZ; }; /* Base (without tnear and tfar) */ - template<int N, int Nx> - struct TravRayBase<N,Nx,true> + template<int N> + struct TravRayBase<N,true> { __forceinline TravRayBase() {} @@ -135,34 +87,26 @@ namespace embree dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z); rdir_near = Vec3vf<N>(ray_rdir_near.x,ray_rdir_near.y,ray_rdir_near.z); rdir_far = Vec3vf<N>(ray_rdir_far .x,ray_rdir_far .y,ray_rdir_far .z); + nearX = ray_rdir_near.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>); nearY = ray_rdir_near.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>); nearZ = ray_rdir_near.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>); farX = nearX ^ sizeof(vfloat<N>); farY = nearY ^ sizeof(vfloat<N>); farZ = nearZ ^ sizeof(vfloat<N>); - -#if defined(__AVX512ER__) // KNL+ - /* optimization works only for 8-wide BVHs with 16-wide SIMD */ - const vint<16> id(step); - const vint<16> id2 = align_shift_right<16/2>(id, id); - permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2); - permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2); - permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2); -#endif } template<int K> - __forceinline TravRayBase(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, - const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, - size_t flip = sizeof(vfloat<N>)) + __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, + const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, + size_t flip = sizeof(vfloat<N>)) { - const vfloat<Nx> round_down = 1.0f-3.0f*float(ulp); - const vfloat<Nx> round_up = 1.0f+3.0f*float(ulp); - org = Vec3vf<Nx>(ray_org.x[k], ray_org.y[k], ray_org.z[k]); - dir = Vec3vf<Nx>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]); - rdir_near = round_down*Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); - rdir_far = round_up *Vec3vf<Nx>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); + const vfloat<N> round_down = 1.0f-3.0f*float(ulp); + const vfloat<N> round_up = 1.0f+3.0f*float(ulp); + org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]); + dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]); + rdir_near = round_down*Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); + rdir_far = round_up *Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]); nearX = nearXYZ.x[k]; nearY = nearXYZ.y[k]; @@ -170,47 +114,36 @@ namespace embree farX = nearX ^ flip; farY = nearY ^ flip; farZ = nearZ ^ flip; - -#if defined(__AVX512ER__) // KNL+ - /* optimization works only for 8-wide BVHs with 16-wide SIMD */ - const vint<16> id(step); - const vint<16> id2 = align_shift_right<16/2>(id, id); - permX = select(vfloat<16>(dir.x) >= 0.0f, id, id2); - permY = select(vfloat<16>(dir.y) >= 0.0f, id, id2); - permZ = select(vfloat<16>(dir.z) >= 0.0f, id, id2); -#endif } Vec3fa org_xyz, dir_xyz; - Vec3vf<Nx> org, dir, rdir_near, rdir_far; -#if defined(__AVX512ER__) // KNL+ - vint16 permX, permY, permZ; -#endif - + Vec3vf<N> org, dir, rdir_near, rdir_far; size_t nearX, nearY, nearZ; size_t farX, farY, farZ; }; /* Full (with tnear and tfar) */ - template<int N, int Nx, bool robust> - struct TravRay : TravRayBase<N,Nx,robust> + template<int N, bool robust> + struct TravRay : TravRayBase<N,robust> { __forceinline TravRay() {} __forceinline TravRay(const Vec3fa& ray_org, const Vec3fa& ray_dir, float ray_tnear, float ray_tfar) - : TravRayBase<N,Nx,robust>(ray_org, ray_dir), + : TravRayBase<N,robust>(ray_org, ray_dir), tnear(ray_tnear), tfar(ray_tfar) {} template<int K> - __forceinline TravRay(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, - const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, - float ray_tnear, float ray_tfar, - size_t flip = sizeof(vfloat<N>)) - : TravRayBase<N,Nx,robust>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip), - tnear(ray_tnear), tfar(ray_tfar) {} + __forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, + const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ, + float ray_tnear, float ray_tfar, + size_t flip = sizeof(vfloat<N>)) + { + TravRayBase<N,robust>::template init<K>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip); + tnear = ray_tnear; tfar = ray_tfar; + } - vfloat<Nx> tnear; - vfloat<Nx> tfar; + vfloat<N> tnear; + vfloat<N> tfar; }; ////////////////////////////////////////////////////////////////////////////////////// @@ -465,28 +398,19 @@ namespace embree // Fast AABBNode intersection ////////////////////////////////////////////////////////////////////////////////////// - template<int N, int Nx, bool robust> - __forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,Nx,robust>& ray, vfloat<Nx>& dist); + template<int N, bool robust> + __forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist); template<> - __forceinline size_t intersectNode<4,4>(const typename BVH4::AABBNode* node, const TravRay<4,4,false>& ray, vfloat4& dist) + __forceinline size_t intersectNode<4>(const typename BVH4::AABBNode* node, const TravRay<4,false>& ray, vfloat4& dist) { -#if defined(__FMA_X4__) -#if defined(__aarch64__) - const vfloat4 tNearX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat4 tNearY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat4 tNearZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z); - const vfloat4 tFarX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat4 tFarY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat4 tFarZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z); -#else +#if defined(__AVX2__) || defined(__ARM_NEON) const vfloat4 tNearX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x); const vfloat4 tNearY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y); const vfloat4 tNearZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z); const vfloat4 tFarX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x); const vfloat4 tFarY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y); const vfloat4 tFarZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z); -#endif #else const vfloat4 tNearX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x; const vfloat4 tNearY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y; @@ -506,7 +430,7 @@ namespace embree const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); const vbool4 vmask = asInt(tNear) > asInt(tFar); const size_t mask = movemask(vmask) ^ ((1<<4)-1); -#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX +#elif defined(__AVX512F__) // SKX const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); const vbool4 vmask = asInt(tNear) <= asInt(tFar); @@ -524,25 +448,15 @@ namespace embree #if defined(__AVX__) template<> - __forceinline size_t intersectNode<8,8>(const typename BVH8::AABBNode* node, const TravRay<8,8,false>& ray, vfloat8& dist) + __forceinline size_t intersectNode<8>(const typename BVH8::AABBNode* node, const TravRay<8,false>& ray, vfloat8& dist) { -#if defined(__AVX2__) -#if defined(__aarch64__) - const vfloat8 tNearX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat8 tNearY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat8 tNearZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z); - const vfloat8 tFarX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat8 tFarY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat8 tFarZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z); -#else +#if defined(__AVX2__) || defined(__ARM_NEON) const vfloat8 tNearX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x); const vfloat8 tNearY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y); const vfloat8 tNearZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z); const vfloat8 tFarX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x); const vfloat8 tFarY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y); const vfloat8 tFarZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z); -#endif - #else const vfloat8 tNearX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x; const vfloat8 tNearY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y; @@ -557,7 +471,7 @@ namespace embree const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); const vbool8 vmask = asInt(tNear) > asInt(tFar); const size_t mask = movemask(vmask) ^ ((1<<8)-1); -#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX +#elif defined(__AVX512F__) // SKX const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); const vbool8 vmask = asInt(tNear) <= asInt(tFar); @@ -574,52 +488,12 @@ namespace embree #endif -#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL - - template<> - __forceinline size_t intersectNode<4,16>(const typename BVH4::AABBNode* node, const TravRay<4,16,false>& ray, vfloat16& dist) - { - const vfloat16 tNearX = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x); - const vfloat16 tNearY = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y); - const vfloat16 tNearZ = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z); - const vfloat16 tFarX = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x); - const vfloat16 tFarY = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y); - const vfloat16 tFarZ = msub(vfloat16(*(vfloat4*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z); - const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool16 vmask = le(vbool16(0xf),tNear,tFar); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - template<> - __forceinline size_t intersectNode<8,16>(const typename BVH8::AABBNode* node, const TravRay<8,16,false>& ray, vfloat16& dist) - { - const vllong8 invalid((size_t)BVH8::emptyNode); - const vboold8 m_valid(invalid != vllong8::loadu(node->children)); - const vfloat16 bminmaxX = permute(vfloat16::load((const float*)&node->lower_x), ray.permX); - const vfloat16 bminmaxY = permute(vfloat16::load((const float*)&node->lower_y), ray.permY); - const vfloat16 bminmaxZ = permute(vfloat16::load((const float*)&node->lower_z), ray.permZ); - const vfloat16 tNearFarX = msub(bminmaxX, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tNearFarY = msub(bminmaxY, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tNearFarZ = msub(bminmaxZ, ray.rdir.z, ray.org_rdir.z); - const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear); - const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar); - const vbool16 vmask = le(vboolf16(m_valid),tNear,align_shift_right<8>(tFar, tFar)); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - -#endif - ////////////////////////////////////////////////////////////////////////////////////// // Robust AABBNode intersection ////////////////////////////////////////////////////////////////////////////////////// - template<int N, int Nx> - __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,Nx,true>& ray, vfloat<Nx>& dist) + template<int N> + __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,true>& ray, vfloat<N>& dist) { const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x; const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y; @@ -635,50 +509,12 @@ namespace embree return mask; } -#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL - - template<> - __forceinline size_t intersectNodeRobust<4,16>(const typename BVHN<4>::AABBNode* node, const TravRay<4,16,true>& ray, vfloat<16>& dist) - { - const vfloat16 tNearX = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x; - const vfloat16 tNearY = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y; - const vfloat16 tNearZ = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z; - const vfloat16 tFarX = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x; - const vfloat16 tFarY = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y; - const vfloat16 tFarZ = (vfloat16(*(vfloat<4>*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z; - const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool16 vmask = le((1 << 4)-1,tNear,tFar); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - template<> - __forceinline size_t intersectNodeRobust<8,16>(const typename BVHN<8>::AABBNode* node, const TravRay<8,16,true>& ray, vfloat<16>& dist) - { - const vfloat16 tNearX = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x; - const vfloat16 tNearY = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y; - const vfloat16 tNearZ = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z; - const vfloat16 tFarX = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x; - const vfloat16 tFarY = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y; - const vfloat16 tFarZ = (vfloat16(*(vfloat<8>*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z; - const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool16 vmask = le((1 << 8)-1,tNear,tFar); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - -#endif - ////////////////////////////////////////////////////////////////////////////////////// // Fast AABBNodeMB intersection ////////////////////////////////////////////////////////////////////////////////////// template<int N> - __forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,N,false>& ray, const float time, vfloat<N>& dist) + __forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) { const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); @@ -686,22 +522,13 @@ namespace embree const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); -#if defined(__FMA_X4__) -#if defined(__aarch64__) - const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z); -#else +#if defined(__AVX2__) || defined(__ARM_NEON) const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x); const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y); const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z); const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x); const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y); const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z); -#endif #else const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x; const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y; @@ -710,12 +537,12 @@ namespace embree const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y; const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z; #endif -#if defined(__FMA_X4__) && !defined(__AVX512F__) // HSW +#if defined(__AVX2__) && !defined(__AVX512F__) // HSW const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); const vbool<N> vmask = asInt(tNear) > asInt(tFar); const size_t mask = movemask(vmask) ^ ((1<<N)-1); -#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX +#elif defined(__AVX512F__) // SKX const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); const vbool<N> vmask = asInt(tNear) <= asInt(tFar); @@ -735,7 +562,7 @@ namespace embree ////////////////////////////////////////////////////////////////////////////////////// template<int N> - __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,N,true>& ray, const float time, vfloat<N>& dist) + __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) { const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX); const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY); @@ -761,7 +588,7 @@ namespace embree ////////////////////////////////////////////////////////////////////////////////////// template<int N> - __forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,N,false>& ray, const float time, vfloat<N>& dist) + __forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) { const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); @@ -771,22 +598,13 @@ namespace embree const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX); const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY); const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ); -#if defined (__FMA_X4__) -#if defined(__aarch64__) - const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z); -#else +#if defined (__AVX2__) || defined(__ARM_NEON) const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x); const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y); const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z); const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x); const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y); const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z); -#endif #else const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x; const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y; @@ -795,7 +613,7 @@ namespace embree const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y; const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z; #endif -#if defined(__FMA_X4__) && !defined(__AVX512F__) +#if defined(__AVX2__) && !defined(__AVX512F__) const vfloat<N> tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray.tnear)); const vfloat<N> tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray.tfar )); #else @@ -817,7 +635,7 @@ namespace embree ////////////////////////////////////////////////////////////////////////////////////// template<int N> - __forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,N,true>& ray, const float time, vfloat<N>& dist) + __forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) { const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB(); @@ -849,11 +667,11 @@ namespace embree // Fast QuantizedBaseNode intersection ////////////////////////////////////////////////////////////////////////////////////// - template<int N, int Nx, bool robust> - __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,robust>& ray, vfloat<Nx>& dist); + template<int N, bool robust> + __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist); template<> - __forceinline size_t intersectNode<4,4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,4,false>& ray, vfloat4& dist) + __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,false>& ray, vfloat4& dist) { const size_t mvalid = movemask(node->validMask()); const vfloat4 start_x(node->start.x); @@ -869,22 +687,13 @@ namespace embree const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z); const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z); -#if defined(__FMA_X4__) -#if defined(__aarch64__) - const vfloat4 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat4 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat4 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat4 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat4 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat4 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); -#else +#if defined(__AVX2__) || defined(__ARM_NEON) const vfloat4 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); const vfloat4 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); const vfloat4 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); const vfloat4 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); const vfloat4 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); const vfloat4 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); -#endif #else const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir.x; const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir.y; @@ -894,12 +703,12 @@ namespace embree const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir.z; #endif -#if (defined(__aarch64__) && defined(BUILD_IOS)) || defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW +#if defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); const vbool4 vmask = asInt(tNear) > asInt(tFar); const size_t mask = movemask(vmask) ^ ((1<<4)-1); -#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX +#elif defined(__AVX512F__) // SKX const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); const vbool4 vmask = asInt(tNear) <= asInt(tFar); @@ -915,7 +724,7 @@ namespace embree } template<> - __forceinline size_t intersectNode<4,4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,4,true>& ray, vfloat4& dist) + __forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,true>& ray, vfloat4& dist) { const size_t mvalid = movemask(node->validMask()); const vfloat4 start_x(node->start.x); @@ -950,7 +759,7 @@ namespace embree #if defined(__AVX__) template<> - __forceinline size_t intersectNode<8,8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,8,false>& ray, vfloat8& dist) + __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,false>& ray, vfloat8& dist) { const size_t mvalid = movemask(node->validMask()); const vfloat8 start_x(node->start.x); @@ -966,22 +775,13 @@ namespace embree const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z); const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z); -#if defined(__AVX2__) -#if defined(__aarch64__) - const vfloat8 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat8 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat8 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat8 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat8 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat8 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); -#else +#if defined(__AVX2__) || defined(__ARM_NEON) const vfloat8 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); const vfloat8 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); const vfloat8 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); const vfloat8 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); const vfloat8 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); const vfloat8 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); -#endif #else const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir.x; const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir.y; @@ -996,7 +796,7 @@ namespace embree const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); const vbool8 vmask = asInt(tNear) > asInt(tFar); const size_t mask = movemask(vmask) ^ ((1<<8)-1); -#elif defined(__AVX512F__) && !defined(__AVX512ER__) // SKX +#elif defined(__AVX512F__) // SKX const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear); const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar); const vbool8 vmask = asInt(tNear) <= asInt(tFar); @@ -1012,7 +812,7 @@ namespace embree } template<> - __forceinline size_t intersectNode<8,8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,8,true>& ray, vfloat8& dist) + __forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,true>& ray, vfloat8& dist) { const size_t mvalid = movemask(node->validMask()); const vfloat8 start_x(node->start.x); @@ -1047,113 +847,8 @@ namespace embree #endif -#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL - - template<> - __forceinline size_t intersectNode<4,16>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,16,false>& ray, vfloat16& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat16 start_x(node->start.x); - const vfloat16 scale_x(node->scale.x); - const vfloat16 lower_x = madd(vfloat16(node->dequantize<4>(ray.nearX >> 2)),scale_x,start_x); - const vfloat16 upper_x = madd(vfloat16(node->dequantize<4>(ray.farX >> 2)),scale_x,start_x); - const vfloat16 start_y(node->start.y); - const vfloat16 scale_y(node->scale.y); - const vfloat16 lower_y = madd(vfloat16(node->dequantize<4>(ray.nearY >> 2)),scale_y,start_y); - const vfloat16 upper_y = madd(vfloat16(node->dequantize<4>(ray.farY >> 2)),scale_y,start_y); - const vfloat16 start_z(node->start.z); - const vfloat16 scale_z(node->scale.z); - const vfloat16 lower_z = madd(vfloat16(node->dequantize<4>(ray.nearZ >> 2)),scale_z,start_z); - const vfloat16 upper_z = madd(vfloat16(node->dequantize<4>(ray.farZ >> 2)),scale_z,start_z); - - const vfloat16 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); - const vfloat16 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); - const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool16 vmask = le(vbool16(0xf),tNear,tFar); - const size_t mask = movemask(vmask) & mvalid; - dist = tNear; - return mask; - } - - template<> - __forceinline size_t intersectNode<4,16>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,16,true>& ray, vfloat16& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat16 start_x(node->start.x); - const vfloat16 scale_x(node->scale.x); - const vfloat16 lower_x = madd(vfloat16(node->dequantize<4>(ray.nearX >> 2)),scale_x,start_x); - const vfloat16 upper_x = madd(vfloat16(node->dequantize<4>(ray.farX >> 2)),scale_x,start_x); - const vfloat16 start_y(node->start.y); - const vfloat16 scale_y(node->scale.y); - const vfloat16 lower_y = madd(vfloat16(node->dequantize<4>(ray.nearY >> 2)),scale_y,start_y); - const vfloat16 upper_y = madd(vfloat16(node->dequantize<4>(ray.farY >> 2)),scale_y,start_y); - const vfloat16 start_z(node->start.z); - const vfloat16 scale_z(node->scale.z); - const vfloat16 lower_z = madd(vfloat16(node->dequantize<4>(ray.nearZ >> 2)),scale_z,start_z); - const vfloat16 upper_z = madd(vfloat16(node->dequantize<4>(ray.farZ >> 2)),scale_z,start_z); - - const vfloat16 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; - const vfloat16 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; - const vfloat16 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; - const vfloat16 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; - const vfloat16 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; - const vfloat16 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; - - const vfloat16 tNear = max(tNearX,tNearY,tNearZ,ray.tnear); - const vfloat16 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar); - const vbool16 vmask = le(vbool16(0xf),tNear,tFar); - const size_t mask = movemask(vmask) & mvalid; - dist = tNear; - return mask; - } - - template<> - __forceinline size_t intersectNode<8,16>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,16,false>& ray, vfloat16& dist) - { - const vbool16 m_valid(node->validMask16()); - const vfloat16 bminmaxX = node->dequantizeLowerUpperX(ray.permX); - const vfloat16 bminmaxY = node->dequantizeLowerUpperY(ray.permY); - const vfloat16 bminmaxZ = node->dequantizeLowerUpperZ(ray.permZ); - const vfloat16 tNearFarX = msub(bminmaxX, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tNearFarY = msub(bminmaxY, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tNearFarZ = msub(bminmaxZ, ray.rdir.z, ray.org_rdir.z); - const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear); - const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar); - const vbool16 vmask = le(m_valid,tNear,align_shift_right<8>(tFar, tFar)); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - template<> - __forceinline size_t intersectNode<8,16>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,16,true>& ray, vfloat16& dist) - { - const vbool16 m_valid(node->validMask16()); - const vfloat16 bminmaxX = node->dequantizeLowerUpperX(ray.permX); - const vfloat16 bminmaxY = node->dequantizeLowerUpperY(ray.permY); - const vfloat16 bminmaxZ = node->dequantizeLowerUpperZ(ray.permZ); - const vfloat16 tNearFarX = (bminmaxX - ray.org.x) * ray.rdir_far.x; // FIXME: this is not conservative !!!!!!!!! - const vfloat16 tNearFarY = (bminmaxY - ray.org.y) * ray.rdir_far.y; - const vfloat16 tNearFarZ = (bminmaxZ - ray.org.z) * ray.rdir_far.z; - const vfloat16 tNear = max(tNearFarX, tNearFarY, tNearFarZ, ray.tnear); - const vfloat16 tFar = min(tNearFarX, tNearFarY, tNearFarZ, ray.tfar); - const vbool16 vmask = le(m_valid,tNear,align_shift_right<8>(tFar, tFar)); - const size_t mask = movemask(vmask); - dist = tNear; - return mask; - } - - -#endif - - - template<int N, int Nx> - __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,false>& ray, const float time, vfloat<N>& dist) + template<int N> + __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) { const vboolf<N> mvalid = node->validMask(); const vfloat<N> lower_x = node->dequantizeLowerX(time); @@ -1162,22 +857,13 @@ namespace embree const vfloat<N> upper_y = node->dequantizeUpperY(time); const vfloat<N> lower_z = node->dequantizeLowerZ(time); const vfloat<N> upper_z = node->dequantizeUpperZ(time); -#if defined(__FMA_X4__) -#if defined(__aarch64__) - const vfloat<N> tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<N> tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<N> tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<N> tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); -#else +#if defined(__AVX2__) || defined(__ARM_NEON) const vfloat<N> tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); const vfloat<N> tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); const vfloat<N> tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); const vfloat<N> tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); const vfloat<N> tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); const vfloat<N> tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); -#endif #else const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir.x; const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir.y; @@ -1195,7 +881,7 @@ namespace embree const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ); const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear); const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar); -#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX +#if defined(__AVX512F__) // SKX const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar)); #else const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid; @@ -1205,8 +891,8 @@ namespace embree return mask; } - template<int N, int Nx> - __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,true>& ray, const float time, vfloat<N>& dist) + template<int N> + __forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) { const vboolf<N> mvalid = node->validMask(); const vfloat<N> lower_x = node->dequantizeLowerX(time); @@ -1230,7 +916,7 @@ namespace embree const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ); const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear); const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar); -#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX +#if defined(__AVX512F__) // SKX const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar)); #else const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid; @@ -1240,83 +926,12 @@ namespace embree return mask; } - -#if defined(__AVX512ER__) - // for KNL - template<> - __forceinline size_t intersectNode<4,16>(const typename BVHN<4>::QuantizedBaseNodeMB* node, const TravRay<4,16,false>& ray, const float time, vfloat<4>& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat16 lower_x = node->dequantizeLowerX(time); - const vfloat16 upper_x = node->dequantizeUpperX(time); - const vfloat16 lower_y = node->dequantizeLowerY(time); - const vfloat16 upper_y = node->dequantizeUpperY(time); - const vfloat16 lower_z = node->dequantizeLowerZ(time); - const vfloat16 upper_z = node->dequantizeUpperZ(time); - - const vfloat16 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); - const vfloat16 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x); - const vfloat16 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y); - const vfloat16 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z); - - const vfloat16 tminX = min(tNearX,tFarX); - const vfloat16 tmaxX = max(tNearX,tFarX); - const vfloat16 tminY = min(tNearY,tFarY); - const vfloat16 tmaxY = max(tNearY,tFarY); - const vfloat16 tminZ = min(tNearZ,tFarZ); - const vfloat16 tmaxZ = max(tNearZ,tFarZ); - const vfloat16 tNear = max(tminX,tminY,tminZ,ray.tnear); - const vfloat16 tFar = min(tmaxX,tmaxY,tmaxZ,ray.tfar ); - const vbool16 vmask = tNear <= tFar; - const size_t mask = movemask(vmask) & mvalid; - dist = extractN<4,0>(tNear); - return mask; - } - - - // for KNL - template<> - __forceinline size_t intersectNode<4,16>(const typename BVHN<4>::QuantizedBaseNodeMB* node, const TravRay<4,16,true>& ray, const float time, vfloat<4>& dist) - { - const size_t mvalid = movemask(node->validMask()); - const vfloat16 lower_x = node->dequantizeLowerX(time); - const vfloat16 upper_x = node->dequantizeUpperX(time); - const vfloat16 lower_y = node->dequantizeLowerY(time); - const vfloat16 upper_y = node->dequantizeUpperY(time); - const vfloat16 lower_z = node->dequantizeLowerZ(time); - const vfloat16 upper_z = node->dequantizeUpperZ(time); - - const vfloat16 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x; - const vfloat16 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y; - const vfloat16 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z; - const vfloat16 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x; - const vfloat16 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y; - const vfloat16 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z; - - const vfloat16 tminX = min(tNearX,tFarX); - const vfloat16 tmaxX = max(tNearX,tFarX); - const vfloat16 tminY = min(tNearY,tFarY); - const vfloat16 tmaxY = max(tNearY,tFarY); - const vfloat16 tminZ = min(tNearZ,tFarZ); - const vfloat16 tmaxZ = max(tNearZ,tFarZ); - const vfloat16 tNear = max(tminX,tminY,tminZ,ray.tnear); - const vfloat16 tFar = min(tmaxX,tmaxY,tmaxZ,ray.tfar ); - const vbool16 vmask = tNear <= tFar; - const size_t mask = movemask(vmask) & mvalid; - dist = extractN<4,0>(tNear); - return mask; - } - -#endif - ////////////////////////////////////////////////////////////////////////////////////// // Fast OBBNode intersection ////////////////////////////////////////////////////////////////////////////////////// template<int N, bool robust> - __forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,N,robust>& ray, vfloat<N>& dist) + __forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist) { const Vec3vf<N> dir = xfmVector(node->naabb,ray.dir); //const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))/dir; @@ -1347,7 +962,7 @@ namespace embree ////////////////////////////////////////////////////////////////////////////////////// template<int N, bool robust> - __forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,N,robust>& ray, const float time, vfloat<N>& dist) + __forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,robust>& ray, const float time, vfloat<N>& dist) { const AffineSpace3vf<N> xfm = node->space0; const Vec3vf<N> b0_lower = zero; @@ -1586,13 +1201,13 @@ namespace embree ////////////////////////////////////////////////////////////////////////////////////// /*! Intersects N nodes with 1 ray */ - template<int N, int Nx, int types, bool robust> + template<int N, int types, bool robust> struct BVHNNodeIntersector1; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN1, false> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN1, false> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) { if (unlikely(node.isLeaf())) return false; mask = intersectNode(node.getAABBNode(), ray, dist); @@ -1600,10 +1215,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN1, true> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN1, true> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) { if (unlikely(node.isLeaf())) return false; mask = intersectNodeRobust(node.getAABBNode(), ray, dist); @@ -1611,10 +1226,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2, false> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN2, false> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) { if (unlikely(node.isLeaf())) return false; mask = intersectNode(node.getAABBNodeMB(), ray, time, dist); @@ -1622,10 +1237,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2, true> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN2, true> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) { if (unlikely(node.isLeaf())) return false; mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist); @@ -1633,10 +1248,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D, false> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, false> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) { if (unlikely(node.isLeaf())) return false; mask = intersectNodeMB4D<N>(node, ray, time, dist); @@ -1644,10 +1259,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D, true> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, true> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) { if (unlikely(node.isLeaf())) return false; mask = intersectNodeMB4DRobust<N>(node, ray, time, dist); @@ -1655,10 +1270,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN1_UN1, false> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN1_UN1, false> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) { if (likely(node.isAABBNode())) mask = intersectNode(node.getAABBNode(), ray, dist); else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist); @@ -1667,10 +1282,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN1_UN1, true> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN1_UN1, true> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) { if (likely(node.isAABBNode())) mask = intersectNodeRobust(node.getAABBNode(), ray, dist); else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist); @@ -1679,10 +1294,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_UN2, false> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN2_UN2, false> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) { if (likely(node.isAABBNodeMB())) mask = intersectNode(node.getAABBNodeMB(), ray, time, dist); else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); @@ -1691,10 +1306,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_UN2, true> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN2_UN2, true> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) { if (likely(node.isAABBNodeMB())) mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist); else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); @@ -1703,10 +1318,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D_UN2, false> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, false> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) { if (unlikely(node.isLeaf())) return false; if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); @@ -1715,10 +1330,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_AN2_AN4D_UN2, true> + template<int N> + struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, true> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) { if (unlikely(node.isLeaf())) return false; if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist); @@ -1727,10 +1342,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_QN1, false> + template<int N> + struct BVHNNodeIntersector1<N, BVH_QN1, false> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,false>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask) { if (unlikely(node.isLeaf())) return false; mask = intersectNode((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist); @@ -1738,10 +1353,10 @@ namespace embree } }; - template<int N, int Nx> - struct BVHNNodeIntersector1<N, Nx, BVH_QN1, true> + template<int N> + struct BVHNNodeIntersector1<N, BVH_QN1, true> { - static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,Nx,true>& ray, float time, vfloat<Nx>& dist, size_t& mask) + static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask) { if (unlikely(node.isLeaf())) return false; mask = intersectNodeRobust((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist); @@ -1750,33 +1365,33 @@ namespace embree }; /*! Intersects N nodes with K rays */ - template<int N, int Nx, bool robust> + template<int N, bool robust> struct BVHNQuantizedBaseNodeIntersector1; - template<int N, int Nx> - struct BVHNQuantizedBaseNodeIntersector1<N, Nx, false> + template<int N> + struct BVHNQuantizedBaseNodeIntersector1<N, false> { - static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,false>& ray, vfloat<Nx>& dist) + static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,false>& ray, vfloat<N>& dist) { return intersectNode(node,ray,dist); } - static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,false>& ray, const float time, vfloat<N>& dist) + static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist) { return intersectNode(node,ray,time,dist); } }; - template<int N, int Nx> - struct BVHNQuantizedBaseNodeIntersector1<N, Nx, true> + template<int N> + struct BVHNQuantizedBaseNodeIntersector1<N, true> { - static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,Nx,true>& ray, vfloat<Nx>& dist) + static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,true>& ray, vfloat<N>& dist) { return intersectNode(node,ray,dist); } - static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,Nx,true>& ray, const float time, vfloat<N>& dist) + static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist) { return intersectNode(node,ray,time,dist); } diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h b/thirdparty/embree/kernels/bvh/node_intersector_frustum.h index 800ac8b478..1f7215e5df 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_frustum.h +++ b/thirdparty/embree/kernels/bvh/node_intersector_frustum.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -38,12 +38,6 @@ namespace embree __forceinline Frustum() {} template<int K> - __forceinline Frustum(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) - { - init(valid, org, rdir, ray_tnear, ray_tfar, N); - } - - template<int K> __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) { const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)), @@ -81,13 +75,9 @@ namespace embree min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir); max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir); -#if defined (__aarch64__) - neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org)); - neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org)); -#else min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org); max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org); -#endif + min_dist = reduced_min_dist; max_dist = reduced_max_dist; @@ -105,13 +95,9 @@ namespace embree Vec3fa min_rdir; Vec3fa max_rdir; -#if defined (__aarch64__) - Vec3fa neg_min_org_rdir; - Vec3fa neg_max_org_rdir; -#else Vec3fa min_org_rdir; Vec3fa max_org_rdir; -#endif + float min_dist; float max_dist; }; @@ -125,12 +111,6 @@ namespace embree __forceinline Frustum() {} template<int K> - __forceinline Frustum(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) - { - init(valid, org, rdir, ray_tnear, ray_tfar, N); - } - - template<int K> __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N) { const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)), @@ -200,36 +180,28 @@ namespace embree // Fast AABBNode intersection ////////////////////////////////////////////////////////////////////////////////////// - template<int N, int Nx> + template<int N> __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node, - const FrustumFast& frustum, vfloat<Nx>& dist) + const FrustumFast& frustum, vfloat<N>& dist) { - const vfloat<Nx> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX); - const vfloat<Nx> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY); - const vfloat<Nx> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ); - const vfloat<Nx> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX); - const vfloat<Nx> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY); - const vfloat<Nx> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ); - -#if defined (__aarch64__) - const vfloat<Nx> fminX = madd(bminX, vfloat<Nx>(frustum.min_rdir.x), vfloat<Nx>(frustum.neg_min_org_rdir.x)); - const vfloat<Nx> fminY = madd(bminY, vfloat<Nx>(frustum.min_rdir.y), vfloat<Nx>(frustum.neg_min_org_rdir.y)); - const vfloat<Nx> fminZ = madd(bminZ, vfloat<Nx>(frustum.min_rdir.z), vfloat<Nx>(frustum.neg_min_org_rdir.z)); - const vfloat<Nx> fmaxX = madd(bmaxX, vfloat<Nx>(frustum.max_rdir.x), vfloat<Nx>(frustum.neg_max_org_rdir.x)); - const vfloat<Nx> fmaxY = madd(bmaxY, vfloat<Nx>(frustum.max_rdir.y), vfloat<Nx>(frustum.neg_max_org_rdir.y)); - const vfloat<Nx> fmaxZ = madd(bmaxZ, vfloat<Nx>(frustum.max_rdir.z), vfloat<Nx>(frustum.neg_max_org_rdir.z)); -#else - const vfloat<Nx> fminX = msub(bminX, vfloat<Nx>(frustum.min_rdir.x), vfloat<Nx>(frustum.min_org_rdir.x)); - const vfloat<Nx> fminY = msub(bminY, vfloat<Nx>(frustum.min_rdir.y), vfloat<Nx>(frustum.min_org_rdir.y)); - const vfloat<Nx> fminZ = msub(bminZ, vfloat<Nx>(frustum.min_rdir.z), vfloat<Nx>(frustum.min_org_rdir.z)); - const vfloat<Nx> fmaxX = msub(bmaxX, vfloat<Nx>(frustum.max_rdir.x), vfloat<Nx>(frustum.max_org_rdir.x)); - const vfloat<Nx> fmaxY = msub(bmaxY, vfloat<Nx>(frustum.max_rdir.y), vfloat<Nx>(frustum.max_org_rdir.y)); - const vfloat<Nx> fmaxZ = msub(bmaxZ, vfloat<Nx>(frustum.max_rdir.z), vfloat<Nx>(frustum.max_org_rdir.z)); -#endif - const vfloat<Nx> fmin = maxi(fminX, fminY, fminZ, vfloat<Nx>(frustum.min_dist)); + const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX); + const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY); + const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ); + const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX); + const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY); + const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ); + + const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x)); + const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y)); + const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z)); + const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x)); + const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y)); + const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z)); + + const vfloat<N> fmin = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist)); dist = fmin; - const vfloat<Nx> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<Nx>(frustum.max_dist)); - const vbool<Nx> vmask_node_hit = fmin <= fmax; + const vfloat<N> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist)); + const vbool<N> vmask_node_hit = fmin <= fmax; size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1); return m_node; } @@ -238,30 +210,30 @@ namespace embree // Robust AABBNode intersection ////////////////////////////////////////////////////////////////////////////////////// - template<int N, int Nx> + template<int N> __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node, - const FrustumRobust& frustum, vfloat<Nx>& dist) + const FrustumRobust& frustum, vfloat<N>& dist) { - const vfloat<Nx> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX); - const vfloat<Nx> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY); - const vfloat<Nx> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ); - const vfloat<Nx> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX); - const vfloat<Nx> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY); - const vfloat<Nx> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ); - - const vfloat<Nx> fminX = (bminX - vfloat<Nx>(frustum.min_org.x)) * vfloat<Nx>(frustum.min_rdir.x); - const vfloat<Nx> fminY = (bminY - vfloat<Nx>(frustum.min_org.y)) * vfloat<Nx>(frustum.min_rdir.y); - const vfloat<Nx> fminZ = (bminZ - vfloat<Nx>(frustum.min_org.z)) * vfloat<Nx>(frustum.min_rdir.z); - const vfloat<Nx> fmaxX = (bmaxX - vfloat<Nx>(frustum.max_org.x)) * vfloat<Nx>(frustum.max_rdir.x); - const vfloat<Nx> fmaxY = (bmaxY - vfloat<Nx>(frustum.max_org.y)) * vfloat<Nx>(frustum.max_rdir.y); - const vfloat<Nx> fmaxZ = (bmaxZ - vfloat<Nx>(frustum.max_org.z)) * vfloat<Nx>(frustum.max_rdir.z); + const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX); + const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY); + const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ); + const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX); + const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY); + const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ); + + const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x); + const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y); + const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z); + const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x); + const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y); + const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z); const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512 const float round_up = 1.0f+2.0f*float(ulp); - const vfloat<Nx> fmin = max(fminX, fminY, fminZ, vfloat<Nx>(frustum.min_dist)); + const vfloat<N> fmin = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist)); dist = fmin; - const vfloat<Nx> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<Nx>(frustum.max_dist)); - const vbool<Nx> vmask_node_hit = (round_down*fmin <= round_up*fmax); + const vfloat<N> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist)); + const vbool<N> vmask_node_hit = (round_down*fmin <= round_up*fmax); size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1); return m_node; } diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h b/thirdparty/embree/kernels/bvh/node_intersector_packet.h index 0543e56f8e..d5498fc5db 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet.h +++ b/thirdparty/embree/kernels/bvh/node_intersector_packet.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -39,11 +39,10 @@ namespace embree org = ray_org; dir = ray_dir; rdir = rcp_safe(ray_dir); -#if defined(__aarch64__) - neg_org_rdir = -(org * rdir); -#elif defined(__AVX2__) +#if defined(__AVX2__) || defined(__ARM_NEON) org_rdir = org * rdir; #endif + if (N) { const int size = sizeof(float)*N; @@ -56,9 +55,7 @@ namespace embree Vec3vf<K> org; Vec3vf<K> dir; Vec3vf<K> rdir; -#if defined(__aarch64__) - Vec3vf<K> neg_org_rdir; -#elif defined(__AVX2__) +#if defined(__AVX2__) || defined(__ARM_NEON) Vec3vf<K> org_rdir; #endif Vec3vi<K> nearXYZ; @@ -122,14 +119,7 @@ namespace embree const TravRayKFast<K>& ray, vfloat<K>& dist) { -#if defined(__aarch64__) - const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z); -#elif defined(__AVX2__) + #if defined(__AVX2__) || defined(__ARM_NEON) const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x); const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y); const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z); @@ -145,7 +135,7 @@ namespace embree const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z; #endif - #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX + #if defined(__AVX512F__) // SKX if (K == 16) { /* use mixed float/int min/max */ @@ -160,7 +150,7 @@ namespace embree { const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); - #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX + #if defined(__AVX512F__) // SKX const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); #else const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); @@ -209,14 +199,7 @@ namespace embree const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); -#if defined(__aarch64__) - const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z); -#elif defined(__AVX2__) +#if defined(__AVX2__) || defined(__ARM_NEON) const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); @@ -232,7 +215,7 @@ namespace embree const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z; #endif -#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX +#if defined(__AVX512F__) // SKX if (K == 16) { /* use mixed float/int min/max */ @@ -247,7 +230,7 @@ namespace embree { const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); -#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX +#if defined(__AVX512F__) // SKX const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); #else const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); @@ -282,7 +265,7 @@ namespace embree const float round_up = 1.0f+3.0f*float(ulp); const float round_down = 1.0f-3.0f*float(ulp); -#if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX +#if defined(__AVX512F__) // SKX if (K == 16) { const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ)); @@ -319,14 +302,7 @@ namespace embree const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i])); const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i])); -#if defined(__aarch64__) - const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z); -#elif defined(__AVX2__) +#if defined(__AVX2__) || defined(__ARM_NEON) const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x); const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y); const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z); @@ -488,14 +464,7 @@ namespace embree const vfloat<N> lower_z = node->dequantizeLowerZ(); const vfloat<N> upper_z = node->dequantizeUpperZ(); - #if defined(__aarch64__) - const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z); - #elif defined(__AVX2__) + #if defined(__AVX2__) || defined(__ARM_NEON) const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x); const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y); const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z); @@ -511,7 +480,7 @@ namespace embree const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z; #endif - #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX + #if defined(__AVX512F__) // SKX if (K == 16) { /* use mixed float/int min/max */ @@ -526,7 +495,7 @@ namespace embree { const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ)); const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ)); - #if defined(__AVX512F__) && !defined(__AVX512ER__) // SKX + #if defined(__AVX512F__) // SKX const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar)); #else const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar); @@ -573,21 +542,14 @@ namespace embree { assert(movemask(node->validMask()) & ((size_t)1 << i)); - const vfloat<K> lower_x = node->dequantizeLowerX(i,time); - const vfloat<K> upper_x = node->dequantizeUpperX(i,time); - const vfloat<K> lower_y = node->dequantizeLowerY(i,time); - const vfloat<K> upper_y = node->dequantizeUpperY(i,time); - const vfloat<K> lower_z = node->dequantizeLowerZ(i,time); - const vfloat<K> upper_z = node->dequantizeUpperZ(i,time); + const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time); + const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time); + const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time); + const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time); + const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time); + const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time); -#if defined(__aarch64__) - const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z); -#elif defined(__AVX2__) +#if defined(__AVX2__) || defined(__ARM_NEON) const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x); const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y); const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z); @@ -617,12 +579,12 @@ namespace embree { assert(movemask(node->validMask()) & ((size_t)1 << i)); - const vfloat<K> lower_x = node->dequantizeLowerX(i,time); - const vfloat<K> upper_x = node->dequantizeUpperX(i,time); - const vfloat<K> lower_y = node->dequantizeLowerY(i,time); - const vfloat<K> upper_y = node->dequantizeUpperY(i,time); - const vfloat<K> lower_z = node->dequantizeLowerZ(i,time); - const vfloat<K> upper_z = node->dequantizeUpperZ(i,time); + const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time); + const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time); + const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time); + const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time); + const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time); + const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time); const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x; const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y; diff --git a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h b/thirdparty/embree/kernels/bvh/node_intersector_packet_stream.h index f379b57aea..55b2c27231 100644 --- a/thirdparty/embree-aarch64/kernels/bvh/node_intersector_packet_stream.h +++ b/thirdparty/embree/kernels/bvh/node_intersector_packet_stream.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -32,19 +32,11 @@ namespace embree __forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir) { rdir = rcp_safe(ray_dir); -#if defined(__aarch64__) - neg_org_rdir = -(ray_org * rdir); -#else org_rdir = ray_org * rdir; -#endif } Vec3vf<K> rdir; -#if defined(__aarch64__) - Vec3vf<K> neg_org_rdir; -#else Vec3vf<K> org_rdir; -#endif vfloat<K> tnear; vfloat<K> tfar; }; @@ -84,36 +76,27 @@ namespace embree // Fast AABBNode intersection ////////////////////////////////////////////////////////////////////////////////////// - template<int N, int Nx, int K> + template<int N, int K> __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node, const TravRayKStreamFast<K>& ray, size_t k, const NearFarPrecalculations& nf) { - const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); - const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); - const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); - const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); - const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); - const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); - -#if defined (__aarch64__) - const vfloat<Nx> rminX = madd(bminX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.neg_org_rdir.x[k])); - const vfloat<Nx> rminY = madd(bminY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.neg_org_rdir.y[k])); - const vfloat<Nx> rminZ = madd(bminZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.neg_org_rdir.z[k])); - const vfloat<Nx> rmaxX = madd(bmaxX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.neg_org_rdir.x[k])); - const vfloat<Nx> rmaxY = madd(bmaxY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.neg_org_rdir.y[k])); - const vfloat<Nx> rmaxZ = madd(bmaxZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.neg_org_rdir.z[k])); -#else - const vfloat<Nx> rminX = msub(bminX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.org_rdir.x[k])); - const vfloat<Nx> rminY = msub(bminY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.org_rdir.y[k])); - const vfloat<Nx> rminZ = msub(bminZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.org_rdir.z[k])); - const vfloat<Nx> rmaxX = msub(bmaxX, vfloat<Nx>(ray.rdir.x[k]), vfloat<Nx>(ray.org_rdir.x[k])); - const vfloat<Nx> rmaxY = msub(bmaxY, vfloat<Nx>(ray.rdir.y[k]), vfloat<Nx>(ray.org_rdir.y[k])); - const vfloat<Nx> rmaxZ = msub(bmaxZ, vfloat<Nx>(ray.rdir.z[k]), vfloat<Nx>(ray.org_rdir.z[k])); -#endif - const vfloat<Nx> rmin = maxi(rminX, rminY, rminZ, vfloat<Nx>(ray.tnear[k])); - const vfloat<Nx> rmax = mini(rmaxX, rmaxY, rmaxZ, vfloat<Nx>(ray.tfar[k])); - - const vbool<Nx> vmask_first_hit = rmin <= rmax; + const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); + const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); + const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); + const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); + const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); + const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); + + const vfloat<N> rminX = msub(bminX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k])); + const vfloat<N> rminY = msub(bminY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k])); + const vfloat<N> rminZ = msub(bminZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k])); + const vfloat<N> rmaxX = msub(bmaxX, vfloat<N>(ray.rdir.x[k]), vfloat<N>(ray.org_rdir.x[k])); + const vfloat<N> rmaxY = msub(bmaxY, vfloat<N>(ray.rdir.y[k]), vfloat<N>(ray.org_rdir.y[k])); + const vfloat<N> rmaxZ = msub(bmaxZ, vfloat<N>(ray.rdir.z[k]), vfloat<N>(ray.org_rdir.z[k])); + const vfloat<N> rmin = maxi(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k])); + const vfloat<N> rmax = mini(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k])); + + const vbool<N> vmask_first_hit = rmin <= rmax; return movemask(vmask_first_hit) & (((size_t)1 << N)-1); } @@ -130,21 +113,12 @@ namespace embree const vfloat<K> bmaxY = *(const float*)(ptr + nf.farY); const vfloat<K> bmaxZ = *(const float*)(ptr + nf.farZ); -#if defined (__aarch64__) - const vfloat<K> rminX = madd(bminX, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> rminY = madd(bminY, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> rminZ = madd(bminZ, ray.rdir.z, ray.neg_org_rdir.z); - const vfloat<K> rmaxX = madd(bmaxX, ray.rdir.x, ray.neg_org_rdir.x); - const vfloat<K> rmaxY = madd(bmaxY, ray.rdir.y, ray.neg_org_rdir.y); - const vfloat<K> rmaxZ = madd(bmaxZ, ray.rdir.z, ray.neg_org_rdir.z); -#else const vfloat<K> rminX = msub(bminX, ray.rdir.x, ray.org_rdir.x); const vfloat<K> rminY = msub(bminY, ray.rdir.y, ray.org_rdir.y); const vfloat<K> rminZ = msub(bminZ, ray.rdir.z, ray.org_rdir.z); const vfloat<K> rmaxX = msub(bmaxX, ray.rdir.x, ray.org_rdir.x); const vfloat<K> rmaxY = msub(bmaxY, ray.rdir.y, ray.org_rdir.y); const vfloat<K> rmaxZ = msub(bmaxZ, ray.rdir.z, ray.org_rdir.z); -#endif const vfloat<K> rmin = maxi(rminX, rminY, rminZ, ray.tnear); const vfloat<K> rmax = mini(rmaxX, rmaxY, rmaxZ, ray.tfar); @@ -158,28 +132,28 @@ namespace embree // Robust AABBNode intersection ////////////////////////////////////////////////////////////////////////////////////// - template<int N, int Nx, int K> + template<int N, int K> __forceinline size_t intersectNode1(const typename BVHN<N>::AABBNode* __restrict__ node, const TravRayKStreamRobust<K>& ray, size_t k, const NearFarPrecalculations& nf) { - const vfloat<Nx> bminX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); - const vfloat<Nx> bminY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); - const vfloat<Nx> bminZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); - const vfloat<Nx> bmaxX = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); - const vfloat<Nx> bmaxY = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); - const vfloat<Nx> bmaxZ = vfloat<Nx>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); - - const vfloat<Nx> rminX = (bminX - vfloat<Nx>(ray.org.x[k])) * vfloat<Nx>(ray.rdir.x[k]); - const vfloat<Nx> rminY = (bminY - vfloat<Nx>(ray.org.y[k])) * vfloat<Nx>(ray.rdir.y[k]); - const vfloat<Nx> rminZ = (bminZ - vfloat<Nx>(ray.org.z[k])) * vfloat<Nx>(ray.rdir.z[k]); - const vfloat<Nx> rmaxX = (bmaxX - vfloat<Nx>(ray.org.x[k])) * vfloat<Nx>(ray.rdir.x[k]); - const vfloat<Nx> rmaxY = (bmaxY - vfloat<Nx>(ray.org.y[k])) * vfloat<Nx>(ray.rdir.y[k]); - const vfloat<Nx> rmaxZ = (bmaxZ - vfloat<Nx>(ray.org.z[k])) * vfloat<Nx>(ray.rdir.z[k]); + const vfloat<N> bminX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearX)); + const vfloat<N> bminY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearY)); + const vfloat<N> bminZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.nearZ)); + const vfloat<N> bmaxX = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farX)); + const vfloat<N> bmaxY = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farY)); + const vfloat<N> bmaxZ = vfloat<N>(*(const vfloat<N>*)((const char*)&node->lower_x + nf.farZ)); + + const vfloat<N> rminX = (bminX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]); + const vfloat<N> rminY = (bminY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]); + const vfloat<N> rminZ = (bminZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]); + const vfloat<N> rmaxX = (bmaxX - vfloat<N>(ray.org.x[k])) * vfloat<N>(ray.rdir.x[k]); + const vfloat<N> rmaxY = (bmaxY - vfloat<N>(ray.org.y[k])) * vfloat<N>(ray.rdir.y[k]); + const vfloat<N> rmaxZ = (bmaxZ - vfloat<N>(ray.org.z[k])) * vfloat<N>(ray.rdir.z[k]); const float round_up = 1.0f+3.0f*float(ulp); // FIXME: use per instruction rounding for AVX512 - const vfloat<Nx> rmin = max(rminX, rminY, rminZ, vfloat<Nx>(ray.tnear[k])); - const vfloat<Nx> rmax = round_up *min(rmaxX, rmaxY, rmaxZ, vfloat<Nx>(ray.tfar[k])); + const vfloat<N> rmin = max(rminX, rminY, rminZ, vfloat<N>(ray.tnear[k])); + const vfloat<N> rmax = round_up *min(rmaxX, rmaxY, rmaxZ, vfloat<N>(ray.tfar[k])); - const vbool<Nx> vmask_first_hit = rmin <= rmax; + const vbool<N> vmask_first_hit = rmin <= rmax; return movemask(vmask_first_hit) & (((size_t)1 << N)-1); } diff --git a/thirdparty/embree-aarch64/kernels/common/accel.h b/thirdparty/embree/kernels/common/accel.h index c038d3cf21..cc4ea1805b 100644 --- a/thirdparty/embree-aarch64/kernels/common/accel.h +++ b/thirdparty/embree/kernels/common/accel.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -332,7 +332,7 @@ namespace embree intersectorN.intersect(this,rayN,N,context); } -#if defined(__SSE__) || defined(__ARM_NEON) +#if defined(__SSE__) __forceinline void intersect(const vbool4& valid, RayHitK<4>& ray, IntersectContext* context) { const vint<4> mask = valid.mask32(); intersect4(&mask,(RTCRayHit4&)ray,context); @@ -388,7 +388,7 @@ namespace embree intersectorN.occluded(this,rayN,N,context); } -#if defined(__SSE__) || defined(__ARM_NEON) +#if defined(__SSE__) __forceinline void occluded(const vbool4& valid, RayK<4>& ray, IntersectContext* context) { const vint<4> mask = valid.mask32(); occluded4(&mask,(RTCRay4&)ray,context); diff --git a/thirdparty/embree-aarch64/kernels/common/accelinstance.h b/thirdparty/embree/kernels/common/accelinstance.h index d74b96df3f..c63ef998bd 100644 --- a/thirdparty/embree-aarch64/kernels/common/accelinstance.h +++ b/thirdparty/embree/kernels/common/accelinstance.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/acceln.cpp b/thirdparty/embree/kernels/common/acceln.cpp index aadb4a64ef..32a27c560a 100644 --- a/thirdparty/embree-aarch64/kernels/common/acceln.cpp +++ b/thirdparty/embree/kernels/common/acceln.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "acceln.h" @@ -97,7 +97,7 @@ namespace embree for (size_t i=0; i<This->accels.size(); i++) { if (This->accels[i]->isEmpty()) continue; This->accels[i]->intersectors.occluded4(valid,ray,context); -#if defined(__SSE2__) || defined(__ARM_NEON) +#if defined(__SSE2__) vbool4 valid0 = asBool(((vint4*)valid)[0]); vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero); if (unlikely(none(valid0 & hit0))) break; @@ -111,7 +111,7 @@ namespace embree for (size_t i=0; i<This->accels.size(); i++) { if (This->accels[i]->isEmpty()) continue; This->accels[i]->intersectors.occluded8(valid,ray,context); -#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA +#if defined(__SSE2__) // FIXME: use higher ISA vbool4 valid0 = asBool(((vint4*)valid)[0]); vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero); vbool4 valid1 = asBool(((vint4*)valid)[1]); @@ -127,7 +127,7 @@ namespace embree for (size_t i=0; i<This->accels.size(); i++) { if (This->accels[i]->isEmpty()) continue; This->accels[i]->intersectors.occluded16(valid,ray,context); -#if defined(__SSE2__) || defined(__ARM_NEON) // FIXME: use higher ISA +#if defined(__SSE2__) // FIXME: use higher ISA vbool4 valid0 = asBool(((vint4*)valid)[0]); vbool4 hit0 = ((vfloat4*)ray.tfar)[0] >= vfloat4(zero); vbool4 valid1 = asBool(((vint4*)valid)[1]); diff --git a/thirdparty/embree-aarch64/kernels/common/acceln.h b/thirdparty/embree/kernels/common/acceln.h index 2edd98f647..0445b2e811 100644 --- a/thirdparty/embree-aarch64/kernels/common/acceln.h +++ b/thirdparty/embree/kernels/common/acceln.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/accelset.cpp b/thirdparty/embree/kernels/common/accelset.cpp index 79be1c4301..8c18f31776 100644 --- a/thirdparty/embree-aarch64/kernels/common/accelset.cpp +++ b/thirdparty/embree/kernels/common/accelset.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "accelset.h" diff --git a/thirdparty/embree-aarch64/kernels/common/accelset.h b/thirdparty/embree/kernels/common/accelset.h index 3774b2accb..90b184a07b 100644 --- a/thirdparty/embree-aarch64/kernels/common/accelset.h +++ b/thirdparty/embree/kernels/common/accelset.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/alloc.cpp b/thirdparty/embree/kernels/common/alloc.cpp index 6fa406f03a..1a0e1aeed3 100644 --- a/thirdparty/embree-aarch64/kernels/common/alloc.cpp +++ b/thirdparty/embree/kernels/common/alloc.cpp @@ -1,11 +1,8 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "alloc.h" #include "../../common/sys/thread.h" -#if defined(__aarch64__) && defined(BUILD_IOS) -#include "../../common/sys/barrier.h" -#endif namespace embree { diff --git a/thirdparty/embree-aarch64/kernels/common/alloc.h b/thirdparty/embree/kernels/common/alloc.h index 488fa707ef..4458e35c24 100644 --- a/thirdparty/embree-aarch64/kernels/common/alloc.h +++ b/thirdparty/embree/kernels/common/alloc.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -8,10 +8,6 @@ #include "scene.h" #include "primref.h" -#if defined(__aarch64__) && defined(BUILD_IOS) -#include <mutex> -#endif - namespace embree { class FastAllocator @@ -30,7 +26,7 @@ namespace embree public: struct ThreadLocal2; - enum AllocationType { ALIGNED_MALLOC, EMBREE_OS_MALLOC, SHARED, ANY_TYPE }; + enum AllocationType { ALIGNED_MALLOC, OS_MALLOC, SHARED, ANY_TYPE }; /*! Per thread structure holding the current memory block. */ struct __aligned(64) ThreadLocal @@ -136,11 +132,7 @@ namespace embree { assert(alloc_i); if (alloc.load() == alloc_i) return; -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(mutex); -#else Lock<SpinLock> lock(mutex); -#endif //if (alloc.load() == alloc_i) return; // not required as only one thread calls bind if (alloc.load()) { alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes(); @@ -158,11 +150,7 @@ namespace embree { assert(alloc_i); if (alloc.load() != alloc_i) return; -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(mutex); -#else Lock<SpinLock> lock(mutex); -#endif if (alloc.load() != alloc_i) return; // required as a different thread calls unbind alloc.load()->bytesUsed += alloc0.getUsedBytes() + alloc1.getUsedBytes(); alloc.load()->bytesFree += alloc0.getFreeBytes() + alloc1.getFreeBytes(); @@ -173,11 +161,7 @@ namespace embree } public: -#if defined(__aarch64__) && defined(BUILD_IOS) - std::mutex mutex; -#else SpinLock mutex; //!< required as unbind is called from other threads -#endif std::atomic<FastAllocator*> alloc; //!< parent allocator ThreadLocal alloc0; ThreadLocal alloc1; @@ -185,7 +169,7 @@ namespace embree FastAllocator (Device* device, bool osAllocation) : device(device), slotMask(0), usedBlocks(nullptr), freeBlocks(nullptr), use_single_mode(false), defaultBlockSize(PAGE_SIZE), estimatedSize(0), - growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC), + growSize(PAGE_SIZE), maxGrowSize(maxAllocationSize), log2_grow_size_scale(0), bytesUsed(0), bytesFree(0), bytesWasted(0), atype(osAllocation ? OS_MALLOC : ALIGNED_MALLOC), primrefarray(device,0) { for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++) @@ -222,7 +206,7 @@ namespace embree void setOSallocation(bool flag) { - atype = flag ? EMBREE_OS_MALLOC : ALIGNED_MALLOC; + atype = flag ? OS_MALLOC : ALIGNED_MALLOC; } private: @@ -233,11 +217,7 @@ namespace embree ThreadLocal2* alloc = thread_local_allocator2; if (alloc == nullptr) { thread_local_allocator2 = alloc = new ThreadLocal2; -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(s_thread_local_allocators_lock); -#else Lock<SpinLock> lock(s_thread_local_allocators_lock); -#endif s_thread_local_allocators.push_back(make_unique(alloc)); } return alloc; @@ -247,11 +227,7 @@ namespace embree __forceinline void join(ThreadLocal2* alloc) { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(s_thread_local_allocators_lock); -#else Lock<SpinLock> lock(thread_local_allocators_lock); -#endif thread_local_allocators.push_back(alloc); } @@ -321,11 +297,7 @@ namespace embree } static const size_t threadLocalAllocOverhead = 20; //! 20 means 5% parallel allocation overhead through unfilled thread local blocks -#if defined(__AVX512ER__) // KNL - static const size_t mainAllocOverheadStatic = 15; //! 15 means 7.5% allocation overhead through unfilled main alloc blocks -#else static const size_t mainAllocOverheadStatic = 20; //! 20 means 5% allocation overhead through unfilled main alloc blocks -#endif static const size_t mainAllocOverheadDynamic = 8; //! 20 means 12.5% allocation overhead through unfilled main alloc blocks /* calculates a single threaded threshold for the builders such @@ -520,11 +492,7 @@ namespace embree /* parallel block creation in case of no freeBlocks, avoids single global mutex */ if (likely(freeBlocks.load() == nullptr)) { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(slotMutex[slot]); -#else Lock<SpinLock> lock(slotMutex[slot]); -#endif if (myUsedBlocks == threadUsedBlocks[slot]) { const size_t alignedBytes = (bytes+(align-1)) & ~(align-1); const size_t allocSize = max(min(growSize,maxGrowSize),alignedBytes); @@ -537,11 +505,7 @@ namespace embree /* if this fails allocate new block */ { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(mutex); -#else - Lock<SpinLock> lock(mutex); -#endif + Lock<SpinLock> lock(mutex); if (myUsedBlocks == threadUsedBlocks[slot]) { if (freeBlocks.load() != nullptr) { @@ -563,11 +527,7 @@ namespace embree /*! add new block */ void addBlock(void* ptr, ssize_t bytes) { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(mutex); -#else Lock<SpinLock> lock(mutex); -#endif const size_t sizeof_Header = offsetof(Block,data[0]); void* aptr = (void*) ((((size_t)ptr)+maxAlignment-1) & ~(maxAlignment-1)); size_t ofs = (size_t) aptr - (size_t) ptr; @@ -653,8 +613,8 @@ namespace embree bytesWasted(alloc->bytesWasted), stat_all(alloc,ANY_TYPE), stat_malloc(alloc,ALIGNED_MALLOC), - stat_4K(alloc,EMBREE_OS_MALLOC,false), - stat_2M(alloc,EMBREE_OS_MALLOC,true), + stat_4K(alloc,OS_MALLOC,false), + stat_2M(alloc,OS_MALLOC,true), stat_shared(alloc,SHARED) {} AllStatistics (size_t bytesUsed, @@ -747,7 +707,7 @@ namespace embree /* We avoid using os_malloc for small blocks as this could * cause a risk of fragmenting the virtual address space and * reach the limit of vm.max_map_count = 65k under Linux. */ - if (atype == EMBREE_OS_MALLOC && bytesAllocate < maxAllocationSize) + if (atype == OS_MALLOC && bytesAllocate < maxAllocationSize) atype = ALIGNED_MALLOC; /* we need to additionally allocate some header */ @@ -756,7 +716,7 @@ namespace embree bytesReserve = sizeof_Header+bytesReserve; /* consume full 4k pages with using os_malloc */ - if (atype == EMBREE_OS_MALLOC) { + if (atype == OS_MALLOC) { bytesAllocate = ((bytesAllocate+PAGE_SIZE-1) & ~(PAGE_SIZE-1)); bytesReserve = ((bytesReserve +PAGE_SIZE-1) & ~(PAGE_SIZE-1)); } @@ -788,11 +748,11 @@ namespace embree return new (ptr) Block(ALIGNED_MALLOC,bytesAllocate-sizeof_Header,bytesAllocate-sizeof_Header,next,alignment); } } - else if (atype == EMBREE_OS_MALLOC) + else if (atype == OS_MALLOC) { if (device) device->memoryMonitor(bytesAllocate,false); bool huge_pages; ptr = os_malloc(bytesReserve,huge_pages); - return new (ptr) Block(EMBREE_OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages); + return new (ptr) Block(OS_MALLOC,bytesAllocate-sizeof_Header,bytesReserve-sizeof_Header,next,0,huge_pages); } else assert(false); @@ -836,7 +796,7 @@ namespace embree if (device) device->memoryMonitor(-sizeof_Alloced,true); } - else if (atype == EMBREE_OS_MALLOC) { + else if (atype == OS_MALLOC) { size_t sizeof_This = sizeof_Header+reserveEnd; os_free(this,sizeof_This,huge_pages); if (device) device->memoryMonitor(-sizeof_Alloced,true); @@ -897,7 +857,7 @@ namespace embree bool hasType(AllocationType atype_i, bool huge_pages_i) const { if (atype_i == ANY_TYPE ) return true; - else if (atype == EMBREE_OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages; + else if (atype == OS_MALLOC) return atype_i == atype && huge_pages_i == huge_pages; else return atype_i == atype; } @@ -946,7 +906,7 @@ namespace embree void print_block() const { if (atype == ALIGNED_MALLOC) std::cout << "A"; - else if (atype == EMBREE_OS_MALLOC) std::cout << "O"; + else if (atype == OS_MALLOC) std::cout << "O"; else if (atype == SHARED) std::cout << "S"; if (huge_pages) std::cout << "H"; size_t bytesUsed = getBlockUsedBytes(); @@ -976,11 +936,7 @@ namespace embree std::atomic<Block*> freeBlocks; std::atomic<Block*> threadBlocks[MAX_THREAD_USED_BLOCK_SLOTS]; -#if defined(__aarch64__) && defined(BUILD_IOS) - std::mutex slotMutex[MAX_THREAD_USED_BLOCK_SLOTS]; -#else SpinLock slotMutex[MAX_THREAD_USED_BLOCK_SLOTS]; -#endif bool use_single_mode; size_t defaultBlockSize; @@ -994,11 +950,7 @@ namespace embree static __thread ThreadLocal2* thread_local_allocator2; static SpinLock s_thread_local_allocators_lock; static std::vector<std::unique_ptr<ThreadLocal2>> s_thread_local_allocators; -#if defined(__aarch64__) && defined(BUILD_IOS) - std::mutex thread_local_allocators_lock; -#else SpinLock thread_local_allocators_lock; -#endif std::vector<ThreadLocal2*> thread_local_allocators; AllocationType atype; mvector<PrimRef> primrefarray; //!< primrefarray used to allocate nodes diff --git a/thirdparty/embree-aarch64/kernels/common/buffer.h b/thirdparty/embree/kernels/common/buffer.h index 02d319c59d..793012c04d 100644 --- a/thirdparty/embree-aarch64/kernels/common/buffer.h +++ b/thirdparty/embree/kernels/common/buffer.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/builder.h b/thirdparty/embree/kernels/common/builder.h index d2a1cfe3ce..07fe7b069b 100644 --- a/thirdparty/embree-aarch64/kernels/common/builder.h +++ b/thirdparty/embree/kernels/common/builder.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/context.h b/thirdparty/embree/kernels/common/context.h index d0185a74f2..ccd88bdeac 100644 --- a/thirdparty/embree-aarch64/kernels/common/context.h +++ b/thirdparty/embree/kernels/common/context.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/default.h b/thirdparty/embree/kernels/common/default.h index 709119163b..f15d61b768 100644 --- a/thirdparty/embree-aarch64/kernels/common/default.h +++ b/thirdparty/embree/kernels/common/default.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -55,11 +55,6 @@ #include <utility> #include <sstream> -#if !defined(_DEBUG) && defined(BUILD_IOS) -#undef assert -#define assert(_EXPR) -#endif - namespace embree { //////////////////////////////////////////////////////////////////////////////// diff --git a/thirdparty/embree-aarch64/kernels/common/device.cpp b/thirdparty/embree/kernels/common/device.cpp index 16ec11b892..068e0c2983 100644 --- a/thirdparty/embree-aarch64/kernels/common/device.cpp +++ b/thirdparty/embree/kernels/common/device.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "device.h" @@ -64,6 +64,9 @@ namespace embree case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break; case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break; case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break; + case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break; + case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break; + case CPU::ARM: frequency_level = FREQUENCY_SIMD128; break; } /* initialize global state */ @@ -71,10 +74,6 @@ namespace embree State::parseString(EMBREE_CONFIG); #endif State::parseString(cfg); - if (!ignore_config_files && FileName::executableFolder() != FileName("")) - State::parseFile(FileName::executableFolder()+FileName(".embree" TOSTRING(RTC_VERSION_MAJOR))); - if (!ignore_config_files && FileName::homeFolder() != FileName("")) - State::parseFile(FileName::homeFolder()+FileName(".embree" TOSTRING(RTC_VERSION_MAJOR))); State::verify(); /* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */ @@ -127,7 +126,7 @@ namespace embree /* ray stream SOA to AOS conversion */ #if defined(EMBREE_RAY_PACKETS) RayStreamFilterFuncsType rayStreamFilterFuncs; - SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(enabled_cpu_features,rayStreamFilterFuncs); + SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(enabled_cpu_features,rayStreamFilterFuncs); rayStreamFilters = rayStreamFilterFuncs(); #endif } @@ -153,11 +152,8 @@ namespace embree #if defined(EMBREE_TARGET_AVX2) v += "AVX2 "; #endif -#if defined(EMBREE_TARGET_AVX512KNL) - v += "AVX512KNL "; -#endif -#if defined(EMBREE_TARGET_AVX512SKX) - v += "AVX512SKX "; +#if defined(EMBREE_TARGET_AVX512) + v += "AVX512 "; #endif return v; } @@ -221,9 +217,6 @@ namespace embree #if defined(TASKING_INTERNAL) std::cout << "internal_tasking_system "; #endif -#if defined(TASKING_GCD) && defined(BUILD_IOS) - std::cout << "GCD tasking system "; -#endif #if defined(TASKING_PPL) std::cout << "PPL "; #endif @@ -448,7 +441,7 @@ namespace embree #endif #if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS) - case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512KNL) | hasISA(AVX512SKX); + case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512); #else case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0; #endif @@ -506,10 +499,6 @@ namespace embree #if defined(TASKING_PPL) case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2; #endif - -#if defined(TASKING_GCD) && defined(BUILD_IOS) - case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 3; -#endif #if defined(EMBREE_GEOMETRY_TRIANGLE) case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1; diff --git a/thirdparty/embree-aarch64/kernels/common/device.h b/thirdparty/embree/kernels/common/device.h index e9a81bb109..21c42c654d 100644 --- a/thirdparty/embree-aarch64/kernels/common/device.h +++ b/thirdparty/embree/kernels/common/device.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/geometry.cpp b/thirdparty/embree/kernels/common/geometry.cpp index b3aa8e3396..d8d3f65a5c 100644 --- a/thirdparty/embree-aarch64/kernels/common/geometry.cpp +++ b/thirdparty/embree/kernels/common/geometry.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "geometry.h" diff --git a/thirdparty/embree-aarch64/kernels/common/geometry.h b/thirdparty/embree/kernels/common/geometry.h index 953974bfd2..2f9f2e7c94 100644 --- a/thirdparty/embree-aarch64/kernels/common/geometry.h +++ b/thirdparty/embree/kernels/common/geometry.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -301,7 +301,7 @@ namespace embree template<int N> __forceinline vint<N> timeSegment(const vfloat<N>& time, vfloat<N>& ftime) const { - return getTimeSegment(time,vfloat<N>(time_range.lower),vfloat<N>(time_range.upper),vfloat<N>(fnumTimeSegments),ftime); + return getTimeSegment<N>(time,vfloat<N>(time_range.lower),vfloat<N>(time_range.upper),vfloat<N>(fnumTimeSegments),ftime); } /* calculate overlapping time segment range */ diff --git a/thirdparty/embree-aarch64/kernels/common/hit.h b/thirdparty/embree/kernels/common/hit.h index 32a198cdfe..fd1a9d6391 100644 --- a/thirdparty/embree-aarch64/kernels/common/hit.h +++ b/thirdparty/embree/kernels/common/hit.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -22,7 +22,7 @@ namespace embree { for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) instID[l] = RTC_INVALID_GEOMETRY_ID; - instance_id_stack::copy(context->instID, instID); + instance_id_stack::copy_UV<K>(context->instID, instID); } /* Returns the size of the hit */ @@ -48,7 +48,7 @@ namespace embree __forceinline HitK(const RTCIntersectContext* context, unsigned int geomID, unsigned int primID, float u, float v, const Vec3fa& Ng) : Ng(Ng.x,Ng.y,Ng.z), u(u), v(v), primID(primID), geomID(geomID) { - instance_id_stack::copy(context->instID, instID); + instance_id_stack::copy_UU(context->instID, instID); } /* Returns the size of the hit */ @@ -96,7 +96,7 @@ namespace embree ray.v = hit.v; ray.primID = hit.primID; ray.geomID = hit.geomID; - instance_id_stack::copy(hit.instID, ray.instID); + instance_id_stack::copy_UU(hit.instID, ray.instID); } template<int K> @@ -109,6 +109,6 @@ namespace embree vfloat<K>::storeu(mask,&ray.v, hit.v); vuint<K>::storeu(mask,&ray.primID, hit.primID); vuint<K>::storeu(mask,&ray.geomID, hit.geomID); - instance_id_stack::copy(hit.instID, ray.instID, mask); + instance_id_stack::copy_VV<K>(hit.instID, ray.instID, mask); } } diff --git a/thirdparty/embree/kernels/common/instance_stack.h b/thirdparty/embree/kernels/common/instance_stack.h new file mode 100644 index 0000000000..d3c0a643f1 --- /dev/null +++ b/thirdparty/embree/kernels/common/instance_stack.h @@ -0,0 +1,179 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "rtcore.h" + +namespace embree { +namespace instance_id_stack { + +static_assert(RTC_MAX_INSTANCE_LEVEL_COUNT > 0, + "RTC_MAX_INSTANCE_LEVEL_COUNT must be greater than 0."); + +/******************************************************************************* + * Instance ID stack manipulation. + * This is used from the instance intersector. + ******************************************************************************/ + +/* + * Push an instance to the stack. + */ +RTC_FORCEINLINE bool push(RTCIntersectContext* context, + unsigned instanceId) +{ +#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1 + const bool spaceAvailable = context->instStackSize < RTC_MAX_INSTANCE_LEVEL_COUNT; + /* We assert here because instances are silently dropped when the stack is full. + This might be quite hard to find in production. */ + assert(spaceAvailable); + if (likely(spaceAvailable)) + context->instID[context->instStackSize++] = instanceId; + return spaceAvailable; +#else + const bool spaceAvailable = (context->instID[0] == RTC_INVALID_GEOMETRY_ID); + assert(spaceAvailable); + if (likely(spaceAvailable)) + context->instID[0] = instanceId; + return spaceAvailable; +#endif +} + + +/* + * Pop the last instance pushed to the stack. + * Do not call on an empty stack. + */ +RTC_FORCEINLINE void pop(RTCIntersectContext* context) +{ + assert(context); +#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1 + assert(context->instStackSize > 0); + context->instID[--context->instStackSize] = RTC_INVALID_GEOMETRY_ID; +#else + assert(context->instID[0] != RTC_INVALID_GEOMETRY_ID); + context->instID[0] = RTC_INVALID_GEOMETRY_ID; +#endif +} + +/* + * Optimized instance id stack copy. + * The copy() functions will either copy full + * stacks or copy only until the last valid element has been copied, depending + * on RTC_MAX_INSTANCE_LEVEL_COUNT. + */ +RTC_FORCEINLINE void copy_UU(const unsigned* src, unsigned* tgt) +{ +#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1) + tgt[0] = src[0]; + +#else + for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) { + tgt[l] = src[l]; + if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4) + if (src[l] == RTC_INVALID_GEOMETRY_ID) + break; + } +#endif +} + +template <int K> +RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt) +{ +#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1) + tgt[0] = src[0]; + +#else + for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) { + tgt[l] = src[l]; + if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4) + if (src[l] == RTC_INVALID_GEOMETRY_ID) + break; + } +#endif +} + +template <int K> +RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt, size_t j) +{ +#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1) + tgt[0][j] = src[0]; + +#else + for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) { + tgt[l][j] = src[l]; + if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4) + if (src[l] == RTC_INVALID_GEOMETRY_ID) + break; + } +#endif +} + +template <int K> +RTC_FORCEINLINE void copy_UV(const unsigned* src, vuint<K>* tgt, const vbool<K>& mask) +{ +#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1) + vuint<K>::store(mask, tgt, src[0]); + +#else + for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) { + vuint<K>::store(mask, tgt + l, src[l]); + if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4) + if (src[l] == RTC_INVALID_GEOMETRY_ID) + break; + } +#endif +} + +template <int K> +RTC_FORCEINLINE void copy_VU(const vuint<K>* src, unsigned* tgt, size_t i) +{ +#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1) + tgt[0] = src[0][i]; + +#else + for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) { + tgt[l] = src[l][i]; + if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4) + if (src[l][i] == RTC_INVALID_GEOMETRY_ID) + break; + } +#endif +} + +template <int K> +RTC_FORCEINLINE void copy_VV(const vuint<K>* src, vuint<K>* tgt, size_t i, size_t j) +{ +#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1) + tgt[0][j] = src[0][i]; + +#else + for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) { + tgt[l][j] = src[l][i]; + if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4) + if (src[l][i] == RTC_INVALID_GEOMETRY_ID) + break; + } +#endif +} + +template <int K> +RTC_FORCEINLINE void copy_VV(const vuint<K>* src, vuint<K>* tgt, const vbool<K>& mask) +{ +#if (RTC_MAX_INSTANCE_LEVEL_COUNT == 1) + vuint<K>::store(mask, tgt, src[0]); + +#else + vbool<K> done = !mask; + for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) { + vuint<K>::store(mask, tgt + l, src[l]); + if (RTC_MAX_INSTANCE_LEVEL_COUNT > 4) { + done |= src[l] == RTC_INVALID_GEOMETRY_ID; + if (all(done)) break; + } + } +#endif +} + +} // namespace instance_id_stack +} // namespace embree diff --git a/thirdparty/embree-aarch64/kernels/common/isa.h b/thirdparty/embree/kernels/common/isa.h index 63fb8d3351..ae6556336c 100644 --- a/thirdparty/embree-aarch64/kernels/common/isa.h +++ b/thirdparty/embree/kernels/common/isa.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -13,23 +13,21 @@ namespace embree name##Func name; #define DECLARE_SYMBOL2(type,name) \ - namespace sse2 { extern type name(); } \ - namespace sse42 { extern type name(); } \ - namespace avx { extern type name(); } \ - namespace avx2 { extern type name(); } \ - namespace avx512knl { extern type name(); } \ - namespace avx512skx { extern type name(); } \ + namespace sse2 { extern type name(); } \ + namespace sse42 { extern type name(); } \ + namespace avx { extern type name(); } \ + namespace avx2 { extern type name(); } \ + namespace avx512 { extern type name(); } \ void name##_error2() { throw_RTCError(RTC_ERROR_UNKNOWN,"internal error in ISA selection for " TOSTRING(name)); } \ type name##_error() { return type(name##_error2); } \ type name##_zero() { return type(nullptr); } #define DECLARE_ISA_FUNCTION(type,symbol,args) \ - namespace sse2 { extern type symbol(args); } \ - namespace sse42 { extern type symbol(args); } \ - namespace avx { extern type symbol(args); } \ - namespace avx2 { extern type symbol(args); } \ - namespace avx512knl { extern type symbol(args); } \ - namespace avx512skx { extern type symbol(args); } \ + namespace sse2 { extern type symbol(args); } \ + namespace sse42 { extern type symbol(args); } \ + namespace avx { extern type symbol(args); } \ + namespace avx2 { extern type symbol(args); } \ + namespace avx512 { extern type symbol(args); } \ inline type symbol##_error(args) { throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"function " TOSTRING(symbol) " not supported by your CPU"); } \ typedef type (*symbol##Ty)(args); \ @@ -46,7 +44,7 @@ namespace embree #define SELECT_SYMBOL_DEFAULT(features,intersector) \ intersector = isa::intersector; -#if defined(__SSE__) || defined(__ARM_NEON) +#if defined(__SSE__) #if !defined(EMBREE_TARGET_SIMD4) #define EMBREE_TARGET_SIMD4 #endif @@ -84,24 +82,14 @@ namespace embree #define SELECT_SYMBOL_AVX2(features,intersector) #endif -#if defined(EMBREE_TARGET_AVX512KNL) +#if defined(EMBREE_TARGET_AVX512) #if !defined(EMBREE_TARGET_SIMD16) #define EMBREE_TARGET_SIMD16 #endif -#define SELECT_SYMBOL_AVX512KNL(features,intersector) \ - if ((features & AVX512KNL) == AVX512KNL) intersector = avx512knl::intersector; +#define SELECT_SYMBOL_AVX512(features,intersector) \ + if ((features & AVX512) == AVX512) intersector = avx512::intersector; #else -#define SELECT_SYMBOL_AVX512KNL(features,intersector) -#endif - -#if defined(EMBREE_TARGET_AVX512SKX) -#if !defined(EMBREE_TARGET_SIMD16) -#define EMBREE_TARGET_SIMD16 -#endif -#define SELECT_SYMBOL_AVX512SKX(features,intersector) \ - if ((features & AVX512SKX) == AVX512SKX) intersector = avx512skx::intersector; -#else -#define SELECT_SYMBOL_AVX512SKX(features,intersector) +#define SELECT_SYMBOL_AVX512(features,intersector) #endif #define SELECT_SYMBOL_DEFAULT_SSE42(features,intersector) \ @@ -119,39 +107,37 @@ namespace embree SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); -#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX512(features,intersector) \ SELECT_SYMBOL_DEFAULT(features,intersector); \ SELECT_SYMBOL_SSE42(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); -#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \ SELECT_SYMBOL_DEFAULT(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); -#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \ SELECT_SYMBOL_DEFAULT(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); -#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,intersector) \ SELECT_SYMBOL_DEFAULT(features,intersector); \ SELECT_SYMBOL_SSE42(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); -#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,intersector) \ SELECT_SYMBOL_DEFAULT(features,intersector); \ SELECT_SYMBOL_SSE42(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); #define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \ SELECT_SYMBOL_DEFAULT(features,intersector); \ @@ -162,21 +148,19 @@ namespace embree SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); -#define SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL(features,intersector) \ +#define SELECT_SYMBOL_DEFAULT_AVX(features,intersector) \ SELECT_SYMBOL_DEFAULT(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); - -#define SELECT_SYMBOL_DEFAULT_AVX_AVX512KNL_AVX512SKX(features,intersector) \ + SELECT_SYMBOL_AVX(features,intersector); + +#define SELECT_SYMBOL_DEFAULT_AVX_AVX512(features,intersector) \ SELECT_SYMBOL_DEFAULT(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); -#define SELECT_SYMBOL_DEFAULT_AVX_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_DEFAULT_AVX_AVX512(features,intersector) \ SELECT_SYMBOL_DEFAULT(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); #define SELECT_SYMBOL_INIT_AVX(features,intersector) \ INIT_SYMBOL(features,intersector); \ @@ -187,11 +171,11 @@ namespace embree SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); -#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,intersector) \ INIT_SYMBOL(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); #define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2(features,intersector) \ INIT_SYMBOL(features,intersector); \ @@ -199,57 +183,49 @@ namespace embree SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); -#define SELECT_SYMBOL_INIT_AVX_AVX512KNL(features,intersector) \ +#define SELECT_SYMBOL_INIT_AVX(features,intersector) \ INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); + SELECT_SYMBOL_AVX(features,intersector); -#define SELECT_SYMBOL_INIT_AVX_AVX512KNL_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_INIT_AVX_AVX512(features,intersector) \ INIT_SYMBOL(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); -#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL(features,intersector) \ +#define SELECT_SYMBOL_INIT_AVX_AVX2(features,intersector) \ INIT_SYMBOL(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ - SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); + SELECT_SYMBOL_AVX2(features,intersector); -#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,intersector) \ INIT_SYMBOL(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); -#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_INIT_SSE42_AVX_AVX2_AVX512(features,intersector) \ INIT_SYMBOL(features,intersector); \ SELECT_SYMBOL_SSE42(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); -#define SELECT_SYMBOL_ZERO_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_ZERO_SSE42_AVX_AVX2_AVX512(features,intersector) \ ZERO_SYMBOL(features,intersector); \ SELECT_SYMBOL_SSE42(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); -#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(features,intersector) \ SELECT_SYMBOL_DEFAULT(features,intersector); \ SELECT_SYMBOL_AVX(features,intersector); \ SELECT_SYMBOL_AVX2(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); -#define SELECT_SYMBOL_INIT_AVX512KNL_AVX512SKX(features,intersector) \ +#define SELECT_SYMBOL_INIT_AVX512(features,intersector) \ INIT_SYMBOL(features,intersector); \ - SELECT_SYMBOL_AVX512KNL(features,intersector); \ - SELECT_SYMBOL_AVX512SKX(features,intersector); + SELECT_SYMBOL_AVX512(features,intersector); #define SELECT_SYMBOL_SSE42_AVX_AVX2(features,intersector) \ SELECT_SYMBOL_SSE42(features,intersector); \ @@ -262,10 +238,9 @@ namespace embree else return getISA(depth-1); } }; - namespace sse2 { int getISA(); }; - namespace sse42 { int getISA(); }; - namespace avx { int getISA(); }; - namespace avx2 { int getISA(); }; - namespace avx512knl { int getISA(); }; - namespace avx512skx { int getISA(); }; + namespace sse2 { int getISA(); }; + namespace sse42 { int getISA(); }; + namespace avx { int getISA(); }; + namespace avx2 { int getISA(); }; + namespace avx512 { int getISA(); }; } diff --git a/thirdparty/embree-aarch64/kernels/common/motion_derivative.h b/thirdparty/embree/kernels/common/motion_derivative.h index 82953f0e89..c619d6a675 100644 --- a/thirdparty/embree-aarch64/kernels/common/motion_derivative.h +++ b/thirdparty/embree/kernels/common/motion_derivative.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/point_query.h b/thirdparty/embree/kernels/common/point_query.h index 27d158ca3a..7d55c91fff 100644 --- a/thirdparty/embree-aarch64/kernels/common/point_query.h +++ b/thirdparty/embree/kernels/common/point_query.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/primref.h b/thirdparty/embree/kernels/common/primref.h index ce75c982bb..d61763487b 100644 --- a/thirdparty/embree-aarch64/kernels/common/primref.h +++ b/thirdparty/embree/kernels/common/primref.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -29,7 +29,7 @@ namespace embree __forceinline PrimRef (const BBox3fa& bounds, size_t id) { -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) lower = Vec3fx(bounds.lower, (unsigned)(id & 0xFFFFFFFF)); upper = Vec3fx(bounds.upper, (unsigned)((id >> 32) & 0xFFFFFFFF)); #else @@ -79,7 +79,7 @@ namespace embree /*! returns an size_t sized ID */ __forceinline size_t ID() const { -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) return size_t(lower.u) + (size_t(upper.u) << 32); #else return size_t(lower.u); diff --git a/thirdparty/embree-aarch64/kernels/common/primref_mb.h b/thirdparty/embree/kernels/common/primref_mb.h index b6c1ad5712..fb08a05003 100644 --- a/thirdparty/embree-aarch64/kernels/common/primref_mb.h +++ b/thirdparty/embree/kernels/common/primref_mb.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -32,7 +32,7 @@ namespace embree : lbounds((LBBox3fx)lbounds_i), time_range(time_range) { assert(activeTimeSegments > 0); -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) lbounds.bounds0.lower.a = id & 0xFFFFFFFF; lbounds.bounds0.upper.a = (id >> 32) & 0xFFFFFFFF; #else @@ -47,7 +47,7 @@ namespace embree : lbounds((LBBox3fx)lbounds_i), time_range(time_range) { assert(activeTimeSegments > 0); -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) lbounds.bounds0.lower.u = id & 0xFFFFFFFF; lbounds.bounds0.upper.u = (id >> 32) & 0xFFFFFFFF; #else @@ -115,7 +115,7 @@ namespace embree /*! returns an size_t sized ID */ __forceinline size_t ID() const { -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) return size_t(lbounds.bounds0.lower.u) + (size_t(lbounds.bounds0.upper.u) << 32); #else return size_t(lbounds.bounds0.lower.u); @@ -163,7 +163,7 @@ namespace embree : bbox(bounds.interpolate(0.5f)), _activeTimeSegments(activeTimeSegments), _totalTimeSegments(totalTimeSegments), time_range(time_range) { assert(activeTimeSegments > 0); -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) bbox.lower.u = id & 0xFFFFFFFF; bbox.upper.u = (id >> 32) & 0xFFFFFFFF; #else @@ -229,7 +229,7 @@ namespace embree /*! returns an size_t sized ID */ __forceinline size_t ID() const { -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) return size_t(bbox.lower.u) + (size_t(bbox.upper.u) << 32); #else return size_t(bbox.lower.u); diff --git a/thirdparty/embree-aarch64/kernels/common/profile.h b/thirdparty/embree/kernels/common/profile.h index a7de36414d..5ef7f6ec0f 100644 --- a/thirdparty/embree-aarch64/kernels/common/profile.h +++ b/thirdparty/embree/kernels/common/profile.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/ray.h b/thirdparty/embree/kernels/common/ray.h index 336d48942c..7b951cc1e8 100644 --- a/thirdparty/embree-aarch64/kernels/common/ray.h +++ b/thirdparty/embree/kernels/common/ray.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -292,7 +292,7 @@ namespace embree ray.u = u[i]; ray.v = v[i]; ray.primID = primID[i]; ray.geomID = geomID[i]; - instance_id_stack::copy(instID, ray.instID, i); + instance_id_stack::copy_VU<K>(instID, ray.instID, i); } /* Converts single rays to ray packet */ @@ -331,7 +331,7 @@ namespace embree u[i] = ray.u; v[i] = ray.v; primID[i] = ray.primID; geomID[i] = ray.geomID; - instance_id_stack::copy(ray.instID, instID, i); + instance_id_stack::copy_UV<K>(ray.instID, instID, i); } /* copies a ray packet element into another element*/ @@ -353,7 +353,7 @@ namespace embree u[dest] = u[source]; v[dest] = v[source]; primID[dest] = primID[source]; geomID[dest] = geomID[source]; - instance_id_stack::copy(instID, instID, source, dest); + instance_id_stack::copy_VV<K>(instID, instID, source, dest); } /* Shortcuts */ @@ -1112,7 +1112,7 @@ namespace embree __forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset) { const vint<K> valid_offset = select(valid, offset, vintx(zero)); - return getRayByOffset(valid_offset); + return getRayByOffset<K>(valid_offset); } template<int K> @@ -1153,7 +1153,7 @@ namespace embree ray_k->primID = ray.primID[k]; ray_k->geomID = ray.geomID[k]; - instance_id_stack::copy(ray.instID, ray_k->instID, k); + instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k); } #endif } @@ -1185,7 +1185,7 @@ namespace embree }; template<> - __forceinline Ray4 RayStreamAOS::getRayByOffset(const vint4& offset) + __forceinline Ray4 RayStreamAOS::getRayByOffset<4>(const vint4& offset) { Ray4 ray; @@ -1222,7 +1222,7 @@ namespace embree #if defined(__AVX__) template<> - __forceinline Ray8 RayStreamAOS::getRayByOffset(const vint8& offset) + __forceinline Ray8 RayStreamAOS::getRayByOffset<8>(const vint8& offset) { Ray8 ray; @@ -1260,7 +1260,7 @@ namespace embree #if defined(__AVX512F__) template<> - __forceinline Ray16 RayStreamAOS::getRayByOffset(const vint16& offset) + __forceinline Ray16 RayStreamAOS::getRayByOffset<16>(const vint16& offset) { Ray16 ray; @@ -1332,7 +1332,7 @@ namespace embree __forceinline RayK<K> getRayByIndex(const vbool<K>& valid, const vint<K>& index) { const vint<K> valid_index = select(valid, index, vintx(zero)); - return getRayByIndex(valid_index); + return getRayByIndex<K>(valid_index); } template<int K> @@ -1357,7 +1357,7 @@ namespace embree ray_k->v = ray.v[k]; ray_k->primID = ray.primID[k]; ray_k->geomID = ray.geomID[k]; - instance_id_stack::copy(ray.instID, ray_k->instID, k); + instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k); } } } @@ -1385,7 +1385,7 @@ namespace embree }; template<> - __forceinline Ray4 RayStreamAOP::getRayByIndex(const vint4& index) + __forceinline Ray4 RayStreamAOP::getRayByIndex<4>(const vint4& index) { Ray4 ray; @@ -1422,7 +1422,7 @@ namespace embree #if defined(__AVX__) template<> - __forceinline Ray8 RayStreamAOP::getRayByIndex(const vint8& index) + __forceinline Ray8 RayStreamAOP::getRayByIndex<8>(const vint8& index) { Ray8 ray; @@ -1460,7 +1460,7 @@ namespace embree #if defined(__AVX512F__) template<> - __forceinline Ray16 RayStreamAOP::getRayByIndex(const vint16& index) + __forceinline Ray16 RayStreamAOP::getRayByIndex<16>(const vint16& index) { Ray16 ray; diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore.cpp b/thirdparty/embree/kernels/common/rtcore.cpp index 625fbf6d4f..94b3819e42 100644 --- a/thirdparty/embree-aarch64/kernels/common/rtcore.cpp +++ b/thirdparty/embree/kernels/common/rtcore.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #define RTC_EXPORT_API @@ -8,31 +8,18 @@ #include "scene.h" #include "context.h" #include "../../include/embree3/rtcore_ray.h" - -#if defined(__aarch64__) && defined(BUILD_IOS) -#include <mutex> -#endif - using namespace embree; RTC_NAMESPACE_BEGIN; /* mutex to make API thread safe */ -#if defined(__aarch64__) && defined(BUILD_IOS) - static std::mutex g_mutex; -#else - static MutexSys g_mutex; -#endif + static MutexSys g_mutex; RTC_API RTCDevice rtcNewDevice(const char* config) { RTC_CATCH_BEGIN; RTC_TRACE(rtcNewDevice); -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else Lock<MutexSys> lock(g_mutex); -#endif Device* device = new Device(config); return (RTCDevice) device->refInc(); RTC_CATCH_END(nullptr); @@ -45,11 +32,7 @@ RTC_NAMESPACE_BEGIN; RTC_CATCH_BEGIN; RTC_TRACE(rtcRetainDevice); RTC_VERIFY_HANDLE(hdevice); -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else Lock<MutexSys> lock(g_mutex); -#endif device->refInc(); RTC_CATCH_END(nullptr); } @@ -60,11 +43,7 @@ RTC_NAMESPACE_BEGIN; RTC_CATCH_BEGIN; RTC_TRACE(rtcReleaseDevice); RTC_VERIFY_HANDLE(hdevice); -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else Lock<MutexSys> lock(g_mutex); -#endif device->refDec(); RTC_CATCH_END(nullptr); } @@ -75,11 +54,7 @@ RTC_NAMESPACE_BEGIN; RTC_CATCH_BEGIN; RTC_TRACE(rtcGetDeviceProperty); RTC_VERIFY_HANDLE(hdevice); -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else Lock<MutexSys> lock(g_mutex); -#endif return device->getProperty(prop); RTC_CATCH_END(device); return 0; @@ -92,11 +67,7 @@ RTC_NAMESPACE_BEGIN; RTC_TRACE(rtcSetDeviceProperty); const bool internal_prop = (size_t)prop >= 1000000 && (size_t)prop < 1000004; if (!internal_prop) RTC_VERIFY_HANDLE(hdevice); // allow NULL device for special internal settings -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else Lock<MutexSys> lock(g_mutex); -#endif device->setProperty(prop,val); RTC_CATCH_END(device); } @@ -212,11 +183,7 @@ RTC_NAMESPACE_BEGIN; RTC_CATCH_BEGIN; RTC_TRACE(rtcSetSceneProgressMonitorFunction); RTC_VERIFY_HANDLE(hscene); -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(g_mutex); -#else Lock<MutexSys> lock(g_mutex); -#endif scene->setProgressMonitorFunction(progress,ptr); RTC_CATCH_END2(scene); } @@ -515,12 +482,12 @@ RTC_NAMESPACE_BEGIN; IntersectContext context(scene,user_context); #if !defined(EMBREE_RAY_PACKETS) - RayHit4* rayhit4 = (RayHit4*)rayhit; + Ray4* ray4 = (Ray4*) rayhit; for (size_t i=0; i<4; i++) { if (!valid[i]) continue; - RayHit ray1; rayhit4->get(i,ray1); + RayHit ray1; ray4->get(i,ray1); scene->intersectors.intersect((RTCRayHit&)ray1,&context); - rayhit4->set(i,ray1); + ray4->set(i,ray1); } #else scene->intersectors.intersect4(valid,*rayhit,&context); @@ -546,12 +513,12 @@ RTC_NAMESPACE_BEGIN; IntersectContext context(scene,user_context); #if !defined(EMBREE_RAY_PACKETS) - RayHit8* rayhit8 = (RayHit8*) rayhit; + Ray8* ray8 = (Ray8*) rayhit; for (size_t i=0; i<8; i++) { if (!valid[i]) continue; - RayHit ray1; rayhit8->get(i,ray1); + RayHit ray1; ray8->get(i,ray1); scene->intersectors.intersect((RTCRayHit&)ray1,&context); - rayhit8->set(i,ray1); + ray8->set(i,ray1); } #else if (likely(scene->intersectors.intersector8)) @@ -579,12 +546,12 @@ RTC_NAMESPACE_BEGIN; IntersectContext context(scene,user_context); #if !defined(EMBREE_RAY_PACKETS) - RayHit16* rayhit16 = (RayHit16*) rayhit; + Ray16* ray16 = (Ray16*) rayhit; for (size_t i=0; i<16; i++) { if (!valid[i]) continue; - RayHit ray1; rayhit16->get(i,ray1); + RayHit ray1; ray16->get(i,ray1); scene->intersectors.intersect((RTCRayHit&)ray1,&context); - rayhit16->set(i,ray1); + ray16->set(i,ray1); } #else if (likely(scene->intersectors.intersector16)) @@ -766,12 +733,12 @@ RTC_NAMESPACE_BEGIN; IntersectContext context(scene,user_context); #if !defined(EMBREE_RAY_PACKETS) - Ray4* ray4 = (Ray4*) ray; + RayHit4* ray4 = (RayHit4*) ray; for (size_t i=0; i<4; i++) { if (!valid[i]) continue; - Ray ray1; ray4->get(i,ray1); + RayHit ray1; ray4->get(i,ray1); scene->intersectors.occluded((RTCRay&)ray1,&context); - ray4->set(i,ray1); + ray4->geomID[i] = ray1.geomID; } #else scene->intersectors.occluded4(valid,*ray,&context); @@ -797,10 +764,10 @@ RTC_NAMESPACE_BEGIN; IntersectContext context(scene,user_context); #if !defined(EMBREE_RAY_PACKETS) - Ray8* ray8 = (Ray8*) ray; + RayHit8* ray8 = (RayHit8*) ray; for (size_t i=0; i<8; i++) { if (!valid[i]) continue; - Ray ray1; ray8->get(i,ray1); + RayHit ray1; ray8->get(i,ray1); scene->intersectors.occluded((RTCRay&)ray1,&context); ray8->set(i,ray1); } @@ -831,10 +798,10 @@ RTC_NAMESPACE_BEGIN; IntersectContext context(scene,user_context); #if !defined(EMBREE_RAY_PACKETS) - Ray16* ray16 = (Ray16*) ray; + RayHit16* ray16 = (RayHit16*) ray; for (size_t i=0; i<16; i++) { if (!valid[i]) continue; - Ray ray1; ray16->get(i,ray1); + RayHit ray1; ray16->get(i,ray1); scene->intersectors.occluded((RTCRay&)ray1,&context); ray16->set(i,ray1); } @@ -1152,7 +1119,7 @@ RTC_NAMESPACE_BEGIN; { #if defined(EMBREE_GEOMETRY_TRIANGLE) createTriangleMeshTy createTriangleMesh = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createTriangleMesh); + SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(device->enabled_cpu_features,createTriangleMesh); Geometry* geom = createTriangleMesh(device); return (RTCGeometry) geom->refInc(); #else @@ -1164,7 +1131,7 @@ RTC_NAMESPACE_BEGIN; { #if defined(EMBREE_GEOMETRY_QUAD) createQuadMeshTy createQuadMesh = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createQuadMesh); + SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(device->enabled_cpu_features,createQuadMesh); Geometry* geom = createQuadMesh(device); return (RTCGeometry) geom->refInc(); #else @@ -1178,7 +1145,7 @@ RTC_NAMESPACE_BEGIN; { #if defined(EMBREE_GEOMETRY_POINT) createPointsTy createPoints = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_builder_cpu_features, createPoints); + SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(device->enabled_builder_cpu_features, createPoints); Geometry *geom; switch(type) { @@ -1223,9 +1190,9 @@ RTC_NAMESPACE_BEGIN; { #if defined(EMBREE_GEOMETRY_CURVE) createLineSegmentsTy createLineSegments = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createLineSegments); + SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(device->enabled_cpu_features,createLineSegments); createCurvesTy createCurves = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createCurves); + SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(device->enabled_cpu_features,createCurves); Geometry* geom; switch (type) { @@ -1262,7 +1229,7 @@ RTC_NAMESPACE_BEGIN; #if defined(EMBREE_GEOMETRY_SUBDIVISION) createSubdivMeshTy createSubdivMesh = nullptr; SELECT_SYMBOL_DEFAULT_AVX(device->enabled_cpu_features,createSubdivMesh); - //SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createSubdivMesh); // FIXME: this does not work for some reason? + //SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(device->enabled_cpu_features,createSubdivMesh); // FIXME: this does not work for some reason? Geometry* geom = createSubdivMesh(device); return (RTCGeometry) geom->refInc(); #else @@ -1274,7 +1241,7 @@ RTC_NAMESPACE_BEGIN; { #if defined(EMBREE_GEOMETRY_USER) createUserGeometryTy createUserGeometry = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createUserGeometry); + SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(device->enabled_cpu_features,createUserGeometry); Geometry* geom = createUserGeometry(device); return (RTCGeometry) geom->refInc(); #else @@ -1286,7 +1253,7 @@ RTC_NAMESPACE_BEGIN; { #if defined(EMBREE_GEOMETRY_INSTANCE) createInstanceTy createInstance = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createInstance); + SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(device->enabled_cpu_features,createInstance); Geometry* geom = createInstance(device); return (RTCGeometry) geom->refInc(); #else @@ -1298,7 +1265,7 @@ RTC_NAMESPACE_BEGIN; { #if defined(EMBREE_GEOMETRY_GRID) createGridMeshTy createGridMesh = nullptr; - SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512KNL_AVX512SKX(device->enabled_cpu_features,createGridMesh); + SELECT_SYMBOL_DEFAULT_AVX_AVX2_AVX512(device->enabled_cpu_features,createGridMesh); Geometry* geom = createGridMesh(device); return (RTCGeometry) geom->refInc(); #else diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore.h b/thirdparty/embree/kernels/common/rtcore.h index 4b070e122b..373e49a689 100644 --- a/thirdparty/embree-aarch64/kernels/common/rtcore.h +++ b/thirdparty/embree/kernels/common/rtcore.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp b/thirdparty/embree/kernels/common/rtcore_builder.cpp index 6bb96bba07..1f1b6f6ddf 100644 --- a/thirdparty/embree-aarch64/kernels/common/rtcore_builder.cpp +++ b/thirdparty/embree/kernels/common/rtcore_builder.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #define RTC_EXPORT_API diff --git a/thirdparty/embree-aarch64/kernels/common/scene.cpp b/thirdparty/embree/kernels/common/scene.cpp index 1e23aeb415..408d7eae6f 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene.cpp +++ b/thirdparty/embree/kernels/common/scene.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "scene.h" @@ -6,7 +6,7 @@ #include "../bvh/bvh4_factory.h" #include "../bvh/bvh8_factory.h" #include "../../common/algorithms/parallel_reduce.h" - + namespace embree { /* error raising rtcIntersect and rtcOccluded functions */ @@ -40,7 +40,7 @@ namespace embree { device->refDec(); } - + void Scene::printStatistics() { /* calculate maximum number of time segments */ @@ -56,12 +56,12 @@ namespace embree statistics[i].resize(max_time_steps); /* gather statistics */ - for (size_t i=0; i<size(); i++) + for (size_t i=0; i<size(); i++) { if (!get(i)) continue; - int ty = get(i)->getType(); + int ty = get(i)->getType(); assert(ty<Geometry::GTY_END); - int timesegments = get(i)->numTimeSegments(); + int timesegments = get(i)->numTimeSegments(); assert((unsigned int)timesegments < max_time_steps); statistics[ty][timesegments] += get(i)->size(); } @@ -76,7 +76,7 @@ namespace embree for (size_t t=0; t<max_time_steps; t++) std::cout << "----------"; std::cout << std::endl; - + for (size_t p=0; p<Geometry::GTY_END; p++) { if (std::string(Geometry::gtype_names[p]) == "") continue; @@ -90,34 +90,34 @@ namespace embree void Scene::createTriangleAccel() { #if defined(EMBREE_GEOMETRY_TRIANGLE) - if (device->tri_accel == "default") + if (device->tri_accel == "default") { if (quality_flags != RTC_BUILD_QUALITY_LOW) { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); + int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); switch (mode) { - case /*0b00*/ 0: + case /*0b00*/ 0: #if defined (EMBREE_TARGET_SIMD8) if (device->canUseAVX()) { - if (quality_flags == RTC_BUILD_QUALITY_HIGH) + if (quality_flags == RTC_BUILD_QUALITY_HIGH) accels_add(device->bvh8_factory->BVH8Triangle4(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST)); else accels_add(device->bvh8_factory->BVH8Triangle4(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); } - else + else #endif - { - if (quality_flags == RTC_BUILD_QUALITY_HIGH) + { + if (quality_flags == RTC_BUILD_QUALITY_HIGH) accels_add(device->bvh4_factory->BVH4Triangle4(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST)); - else + else accels_add(device->bvh4_factory->BVH4Triangle4(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); } break; - case /*0b01*/ 1: + case /*0b01*/ 1: #if defined (EMBREE_TARGET_SIMD8) - if (device->canUseAVX()) + if (device->canUseAVX()) accels_add(device->bvh8_factory->BVH8Triangle4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::ROBUST)); else #endif @@ -175,8 +175,8 @@ namespace embree #if defined(EMBREE_GEOMETRY_TRIANGLE) if (device->tri_accel_mb == "default") { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); - + int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); + #if defined (EMBREE_TARGET_SIMD8) if (device->canUseAVX2()) // BVH8 reduces performance on AVX only-machines { @@ -211,18 +211,18 @@ namespace embree void Scene::createQuadAccel() { #if defined(EMBREE_GEOMETRY_QUAD) - if (device->quad_accel == "default") + if (device->quad_accel == "default") { if (quality_flags != RTC_BUILD_QUALITY_LOW) { /* static */ - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); + int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); switch (mode) { case /*0b00*/ 0: #if defined (EMBREE_TARGET_SIMD8) if (device->canUseAVX()) { - if (quality_flags == RTC_BUILD_QUALITY_HIGH) + if (quality_flags == RTC_BUILD_QUALITY_HIGH) accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST)); else accels_add(device->bvh8_factory->BVH8Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); @@ -230,7 +230,7 @@ namespace embree else #endif { - if (quality_flags == RTC_BUILD_QUALITY_HIGH) + if (quality_flags == RTC_BUILD_QUALITY_HIGH) accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::HIGH_QUALITY,BVHFactory::IntersectVariant::FAST)); else accels_add(device->bvh4_factory->BVH4Quad4v(this,BVHFactory::BuildVariant::STATIC,BVHFactory::IntersectVariant::FAST)); @@ -292,9 +292,9 @@ namespace embree void Scene::createQuadMBAccel() { #if defined(EMBREE_GEOMETRY_QUAD) - if (device->quad_accel_mb == "default") + if (device->quad_accel_mb == "default") { - int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); + int mode = 2*(int)isCompactAccel() + 1*(int)isRobustAccel(); switch (mode) { case /*0b00*/ 0: #if defined (EMBREE_TARGET_SIMD8) @@ -416,7 +416,7 @@ namespace embree void Scene::createUserGeometryAccel() { #if defined(EMBREE_GEOMETRY_USER) - if (device->object_accel == "default") + if (device->object_accel == "default") { #if defined (EMBREE_TARGET_SIMD8) if (device->canUseAVX() && !isCompactAccel()) @@ -554,7 +554,7 @@ namespace embree { BVHFactory::IntersectVariant ivariant = isRobustAccel() ? BVHFactory::IntersectVariant::ROBUST : BVHFactory::IntersectVariant::FAST; #if defined(EMBREE_GEOMETRY_GRID) - if (device->grid_accel == "default") + if (device->grid_accel == "default") { #if defined (EMBREE_TARGET_SIMD8) if (device->canUseAVX() && !isCompactAccel()) @@ -579,7 +579,7 @@ namespace embree void Scene::createGridMBAccel() { #if defined(EMBREE_GEOMETRY_GRID) - if (device->grid_accel_mb == "default") + if (device->grid_accel_mb == "default") { accels_add(device->bvh4_factory->BVH4GridMB(this,BVHFactory::BuildVariant::STATIC)); } @@ -588,17 +588,13 @@ namespace embree #endif } - + void Scene::clear() { } - unsigned Scene::bind(unsigned geomID, Ref<Geometry> geometry) + unsigned Scene::bind(unsigned geomID, Ref<Geometry> geometry) { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(geometriesMutex); -#else Lock<SpinLock> lock(geometriesMutex); -#endif if (geomID == RTC_INVALID_GEOMETRY_ID) { geomID = id_pool.allocate(); if (geomID == RTC_INVALID_GEOMETRY_ID) @@ -624,19 +620,15 @@ namespace embree void Scene::detachGeometry(size_t geomID) { -#if defined(__aarch64__) && defined(BUILD_IOS) - std::scoped_lock lock(geometriesMutex); -#else Lock<SpinLock> lock(geometriesMutex); -#endif - + if (geomID >= geometries.size()) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry ID"); Ref<Geometry>& geometry = geometries[geomID]; if (geometry == null) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invalid geometry"); - + if (geometry->isEnabled()) { setModified (); } @@ -658,21 +650,21 @@ namespace embree if (!isModified()) { return; } - + /* print scene statistics */ if (device->verbosity(2)) printStatistics(); progress_monitor_counter = 0; - + /* gather scene stats and call preCommit function of each geometry */ - this->world = parallel_reduce (size_t(0), geometries.size(), GeometryCounts (), + this->world = parallel_reduce (size_t(0), geometries.size(), GeometryCounts (), [this](const range<size_t>& r)->GeometryCounts { GeometryCounts c; - for (auto i=r.begin(); i<r.end(); ++i) + for (auto i=r.begin(); i<r.end(); ++i) { - if (geometries[i] && geometries[i]->isEnabled()) + if (geometries[i] && geometries[i]->isEnabled()) { geometries[i]->preCommit(); geometries[i]->addElementsToCount (c); @@ -683,19 +675,19 @@ namespace embree }, std::plus<GeometryCounts>() ); - + /* select acceleration structures to build */ unsigned int new_enabled_geometry_types = world.enabledGeometryTypesMask(); if (flags_modified || new_enabled_geometry_types != enabled_geometry_types) { accels_init(); - /* we need to make all geometries modified, otherwise two level builder will + /* we need to make all geometries modified, otherwise two level builder will not rebuild currently not modified geometries */ parallel_for(geometryModCounters_.size(), [&] ( const size_t i ) { geometryModCounters_[i] = 0; }); - + if (getNumPrimitives(TriangleMesh::geom_type,false)) createTriangleAccel(); if (getNumPrimitives(TriangleMesh::geom_type,true)) createTriangleMBAccel(); if (getNumPrimitives(QuadMesh::geom_type,false)) createQuadAccel(); @@ -712,14 +704,14 @@ namespace embree if (getNumPrimitives(Geometry::MTY_INSTANCE_CHEAP,true)) createInstanceMBAccel(); if (getNumPrimitives(Geometry::MTY_INSTANCE_EXPENSIVE,false)) createInstanceExpensiveAccel(); if (getNumPrimitives(Geometry::MTY_INSTANCE_EXPENSIVE,true)) createInstanceExpensiveMBAccel(); - + flags_modified = false; enabled_geometry_types = new_enabled_geometry_types; } - + /* select fast code path if no filter function is present */ accels_select(hasFilterFunction()); - + /* build all hierarchies of this scene */ accels_build(); @@ -737,7 +729,7 @@ namespace embree geometryModCounters_[i] = geometries[i]->getModCounter(); } }); - + updateInterface(); if (device->verbosity(2)) { @@ -746,7 +738,7 @@ namespace embree std::cout << "selected scene intersector" << std::endl; intersectors.print(2); } - + setModified(false); } @@ -771,16 +763,16 @@ namespace embree RTCSceneFlags Scene::getSceneFlags() const { return scene_flags; } - + #if defined(TASKING_INTERNAL) - void Scene::commit (bool join) + void Scene::commit (bool join) { Lock<MutexSys> buildLock(buildMutex,false); /* allocates own taskscheduler for each build */ Ref<TaskScheduler> scheduler = nullptr; - { + { Lock<MutexSys> lock(schedulerMutex); scheduler = this->scheduler; if (scheduler == null) { @@ -792,9 +784,9 @@ namespace embree /* worker threads join build */ if (!buildLock.isLocked()) { - if (!join) + if (!join) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"use rtcJoinCommitScene to join a build operation"); - + scheduler->join(); return; } @@ -816,9 +808,9 @@ namespace embree #endif -#if defined(TASKING_TBB) || defined(TASKING_GCD) +#if defined(TASKING_TBB) - void Scene::commit (bool join) + void Scene::commit (bool join) { #if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8) if (join) @@ -832,15 +824,12 @@ namespace embree if (!lock.isLocked()) { #if !TASKING_TBB_USE_TASK_ISOLATION - if (!join) + if (!join) throw_RTCError(RTC_ERROR_INVALID_OPERATION,"invoking rtcCommitScene from multiple threads is not supported with this TBB version"); #endif - + do { -#if defined(TASKING_GCD) - // Do Nothing -#else #if USE_TASK_ARENA if (join) { device->arena->execute([&]{ group.wait(); }); @@ -850,24 +839,21 @@ namespace embree { group.wait(); } -#endif pause_cpu(); yield(); - } while (!buildMutex.try_lock()); - + buildMutex.unlock(); return; - } + } /* for best performance set FTZ and DAZ flags in the MXCSR control and status register */ const unsigned int mxcsr = _mm_getcsr(); _mm_setcsr(mxcsr | /* FTZ */ (1<<15) | /* DAZ */ (1<<6)); - + try { -#if defined(TASKING_TBB) -#if TBB_INTERFACE_VERSION_MAJOR < 8 +#if TBB_INTERFACE_VERSION_MAJOR < 8 tbb::task_group_context ctx( tbb::task_group_context::isolated, tbb::task_group_context::default_traits); #else tbb::task_group_context ctx( tbb::task_group_context::isolated, tbb::task_group_context::default_traits | tbb::task_group_context::fp_settings ); @@ -892,22 +878,15 @@ namespace embree }); group.wait(); } - + /* reset MXCSR register again */ _mm_setcsr(mxcsr); - -#elif defined(TASKING_GCD) - - commit_task(); - -#endif // #if defined(TASKING_TBB) - - } + } catch (...) { /* reset MXCSR register again */ _mm_setcsr(mxcsr); - + accels_clear(); updateInterface(); throw; @@ -917,7 +896,7 @@ namespace embree #if defined(TASKING_PPL) - void Scene::commit (bool join) + void Scene::commit (bool join) { #if defined(TASKING_PPL) if (join) @@ -935,7 +914,7 @@ namespace embree /* for best performance set FTZ and DAZ flags in the MXCSR control and status register */ const unsigned int mxcsr = _mm_getcsr(); _mm_setcsr(mxcsr | /* FTZ */ (1<<15) | /* DAZ */ (1<<6)); - + try { group.run([&]{ @@ -945,12 +924,12 @@ namespace embree /* reset MXCSR register again */ _mm_setcsr(mxcsr); - } + } catch (...) { /* reset MXCSR register again */ _mm_setcsr(mxcsr); - + accels_clear(); updateInterface(); throw; @@ -958,7 +937,7 @@ namespace embree } #endif - void Scene::setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr) + void Scene::setProgressMonitorFunction(RTCProgressMonitorFunction func, void* ptr) { progress_monitor_function = func; progress_monitor_ptr = ptr; diff --git a/thirdparty/embree-aarch64/kernels/common/scene.h b/thirdparty/embree/kernels/common/scene.h index b41c6cde91..5ed80a63f6 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene.h +++ b/thirdparty/embree/kernels/common/scene.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree/kernels/common/scene_curves.h b/thirdparty/embree/kernels/common/scene_curves.h new file mode 100644 index 0000000000..a5a39e42d4 --- /dev/null +++ b/thirdparty/embree/kernels/common/scene_curves.h @@ -0,0 +1,688 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "default.h" +#include "geometry.h" +#include "buffer.h" + +#include "../subdiv/bezier_curve.h" +#include "../subdiv/hermite_curve.h" +#include "../subdiv/bspline_curve.h" +#include "../subdiv/catmullrom_curve.h" +#include "../subdiv/linear_bezier_patch.h" + +namespace embree +{ + /*! represents an array of bicubic bezier curves */ + struct CurveGeometry : public Geometry + { + /*! type of this geometry */ + static const Geometry::GTypeMask geom_type = Geometry::MTY_CURVE4; + + public: + + /*! bezier curve construction */ + CurveGeometry (Device* device, Geometry::GType gtype); + + public: + void setMask(unsigned mask); + void setNumTimeSteps (unsigned int numTimeSteps); + void setVertexAttributeCount (unsigned int N); + void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num); + void* getBuffer(RTCBufferType type, unsigned int slot); + void updateBuffer(RTCBufferType type, unsigned int slot); + void commit(); + bool verify(); + void setTessellationRate(float N); + void setMaxRadiusScale(float s); + void addElementsToCount (GeometryCounts & counts) const; + + public: + + /*! returns the number of vertices */ + __forceinline size_t numVertices() const { + return vertices[0].size(); + } + + /*! returns the i'th curve */ + __forceinline const unsigned int& curve(size_t i) const { + return curves[i]; + } + + /*! returns i'th vertex of the first time step */ + __forceinline Vec3ff vertex(size_t i) const { + return vertices0[i]; + } + + /*! returns i'th normal of the first time step */ + __forceinline Vec3fa normal(size_t i) const { + return normals0[i]; + } + + /*! returns i'th tangent of the first time step */ + __forceinline Vec3ff tangent(size_t i) const { + return tangents0[i]; + } + + /*! returns i'th normal derivative of the first time step */ + __forceinline Vec3fa dnormal(size_t i) const { + return dnormals0[i]; + } + + /*! returns i'th radius of the first time step */ + __forceinline float radius(size_t i) const { + return vertices0[i].w; + } + + /*! returns i'th vertex of itime'th timestep */ + __forceinline Vec3ff vertex(size_t i, size_t itime) const { + return vertices[itime][i]; + } + + /*! returns i'th normal of itime'th timestep */ + __forceinline Vec3fa normal(size_t i, size_t itime) const { + return normals[itime][i]; + } + + /*! returns i'th tangent of itime'th timestep */ + __forceinline Vec3ff tangent(size_t i, size_t itime) const { + return tangents[itime][i]; + } + + /*! returns i'th normal derivative of itime'th timestep */ + __forceinline Vec3fa dnormal(size_t i, size_t itime) const { + return dnormals[itime][i]; + } + + /*! returns i'th radius of itime'th timestep */ + __forceinline float radius(size_t i, size_t itime) const { + return vertices[itime][i].w; + } + + /*! gathers the curve starting with i'th vertex */ + __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i) const + { + p0 = vertex(i+0); + p1 = vertex(i+1); + p2 = vertex(i+2); + p3 = vertex(i+3); + } + + /*! gathers the curve starting with i'th vertex of itime'th timestep */ + __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, size_t itime) const + { + p0 = vertex(i+0,itime); + p1 = vertex(i+1,itime); + p2 = vertex(i+2,itime); + p3 = vertex(i+3,itime); + } + + /*! gathers the curve starting with i'th vertex */ + __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i) const + { + p0 = vertex(i+0); + p1 = vertex(i+1); + p2 = vertex(i+2); + p3 = vertex(i+3); + n0 = normal(i+0); + n1 = normal(i+1); + n2 = normal(i+2); + n3 = normal(i+3); + } + + /*! gathers the curve starting with i'th vertex of itime'th timestep */ + __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, size_t itime) const + { + p0 = vertex(i+0,itime); + p1 = vertex(i+1,itime); + p2 = vertex(i+2,itime); + p3 = vertex(i+3,itime); + n0 = normal(i+0,itime); + n1 = normal(i+1,itime); + n2 = normal(i+2,itime); + n3 = normal(i+3,itime); + } + + /*! prefetches the curve starting with i'th vertex of itime'th timestep */ + __forceinline void prefetchL1_vertices(size_t i) const + { + prefetchL1(vertices0.getPtr(i)+0); + prefetchL1(vertices0.getPtr(i)+64); + } + + /*! prefetches the curve starting with i'th vertex of itime'th timestep */ + __forceinline void prefetchL2_vertices(size_t i) const + { + prefetchL2(vertices0.getPtr(i)+0); + prefetchL2(vertices0.getPtr(i)+64); + } + + /*! loads curve vertices for specified time */ + __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, size_t i, float time) const + { + float ftime; + const size_t itime = timeSegment(time, ftime); + + const float t0 = 1.0f - ftime; + const float t1 = ftime; + Vec3ff a0,a1,a2,a3; + gather(a0,a1,a2,a3,i,itime); + Vec3ff b0,b1,b2,b3; + gather(b0,b1,b2,b3,i,itime+1); + p0 = madd(Vec3ff(t0),a0,t1*b0); + p1 = madd(Vec3ff(t0),a1,t1*b1); + p2 = madd(Vec3ff(t0),a2,t1*b2); + p3 = madd(Vec3ff(t0),a3,t1*b3); + } + + /*! loads curve vertices for specified time */ + __forceinline void gather(Vec3ff& p0, Vec3ff& p1, Vec3ff& p2, Vec3ff& p3, Vec3fa& n0, Vec3fa& n1, Vec3fa& n2, Vec3fa& n3, size_t i, float time) const + { + float ftime; + const size_t itime = timeSegment(time, ftime); + + const float t0 = 1.0f - ftime; + const float t1 = ftime; + Vec3ff a0,a1,a2,a3; Vec3fa an0,an1,an2,an3; + gather(a0,a1,a2,a3,an0,an1,an2,an3,i,itime); + Vec3ff b0,b1,b2,b3; Vec3fa bn0,bn1,bn2,bn3; + gather(b0,b1,b2,b3,bn0,bn1,bn2,bn3,i,itime+1); + p0 = madd(Vec3ff(t0),a0,t1*b0); + p1 = madd(Vec3ff(t0),a1,t1*b1); + p2 = madd(Vec3ff(t0),a2,t1*b2); + p3 = madd(Vec3ff(t0),a3,t1*b3); + n0 = madd(Vec3ff(t0),an0,t1*bn0); + n1 = madd(Vec3ff(t0),an1,t1*bn1); + n2 = madd(Vec3ff(t0),an2,t1*bn2); + n3 = madd(Vec3ff(t0),an3,t1*bn3); + } + + template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> + __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const + { + Vec3ff v0,v1,v2,v3; Vec3fa n0,n1,n2,n3; + unsigned int vertexID = curve(primID); + gather(v0,v1,v2,v3,n0,n1,n2,n3,vertexID,itime); + SourceCurve3ff ccurve(v0,v1,v2,v3); + SourceCurve3fa ncurve(n0,n1,n2,n3); + ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve); + return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); + } + + template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> + __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const + { + float ftime; + const size_t itime = timeSegment(time, ftime); + const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+0); + const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context,ray_org,primID,itime+1); + return clerp(curve0,curve1,ftime); + } + + /*! gathers the hermite curve starting with i'th vertex */ + __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i) const + { + p0 = vertex (i+0); + p1 = vertex (i+1); + t0 = tangent(i+0); + t1 = tangent(i+1); + } + + /*! gathers the hermite curve starting with i'th vertex of itime'th timestep */ + __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, size_t itime) const + { + p0 = vertex (i+0,itime); + p1 = vertex (i+1,itime); + t0 = tangent(i+0,itime); + t1 = tangent(i+1,itime); + } + + /*! loads curve vertices for specified time */ + __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3ff& p1, Vec3ff& t1, size_t i, float time) const + { + float ftime; + const size_t itime = timeSegment(time, ftime); + const float f0 = 1.0f - ftime, f1 = ftime; + Vec3ff ap0,at0,ap1,at1; + gather_hermite(ap0,at0,ap1,at1,i,itime); + Vec3ff bp0,bt0,bp1,bt1; + gather_hermite(bp0,bt0,bp1,bt1,i,itime+1); + p0 = madd(Vec3ff(f0),ap0,f1*bp0); + t0 = madd(Vec3ff(f0),at0,f1*bt0); + p1 = madd(Vec3ff(f0),ap1,f1*bp1); + t1 = madd(Vec3ff(f0),at1,f1*bt1); + } + + /*! gathers the hermite curve starting with i'th vertex */ + __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i) const + { + p0 = vertex (i+0); + p1 = vertex (i+1); + t0 = tangent(i+0); + t1 = tangent(i+1); + n0 = normal(i+0); + n1 = normal(i+1); + dn0 = dnormal(i+0); + dn1 = dnormal(i+1); + } + + /*! gathers the hermite curve starting with i'th vertex of itime'th timestep */ + __forceinline void gather_hermite(Vec3ff& p0, Vec3ff& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3ff& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, size_t itime) const + { + p0 = vertex (i+0,itime); + p1 = vertex (i+1,itime); + t0 = tangent(i+0,itime); + t1 = tangent(i+1,itime); + n0 = normal(i+0,itime); + n1 = normal(i+1,itime); + dn0 = dnormal(i+0,itime); + dn1 = dnormal(i+1,itime); + } + + /*! loads curve vertices for specified time */ + __forceinline void gather_hermite(Vec3ff& p0, Vec3fa& t0, Vec3fa& n0, Vec3fa& dn0, Vec3ff& p1, Vec3fa& t1, Vec3fa& n1, Vec3fa& dn1, size_t i, float time) const + { + float ftime; + const size_t itime = timeSegment(time, ftime); + const float f0 = 1.0f - ftime, f1 = ftime; + Vec3ff ap0,at0,ap1,at1; Vec3fa an0,adn0,an1,adn1; + gather_hermite(ap0,at0,an0,adn0,ap1,at1,an1,adn1,i,itime); + Vec3ff bp0,bt0,bp1,bt1; Vec3fa bn0,bdn0,bn1,bdn1; + gather_hermite(bp0,bt0,bn0,bdn0,bp1,bt1,bn1,bdn1,i,itime+1); + p0 = madd(Vec3ff(f0),ap0,f1*bp0); + t0 = madd(Vec3ff(f0),at0,f1*bt0); + n0 = madd(Vec3ff(f0),an0,f1*bn0); + dn0= madd(Vec3ff(f0),adn0,f1*bdn0); + p1 = madd(Vec3ff(f0),ap1,f1*bp1); + t1 = madd(Vec3ff(f0),at1,f1*bt1); + n1 = madd(Vec3ff(f0),an1,f1*bn1); + dn1= madd(Vec3ff(f0),adn1,f1*bdn1); + } + + template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> + __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const size_t itime) const + { + Vec3ff v0,t0,v1,t1; Vec3fa n0,dn0,n1,dn1; + unsigned int vertexID = curve(primID); + gather_hermite(v0,t0,n0,dn0,v1,t1,n1,dn1,vertexID,itime); + + SourceCurve3ff ccurve(v0,t0,v1,t1); + SourceCurve3fa ncurve(n0,dn0,n1,dn1); + ccurve = enlargeRadiusToMinWidth(context,this,ray_org,ccurve); + return TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(ccurve,ncurve); + } + + template<typename SourceCurve3ff, typename SourceCurve3fa, typename TensorLinearCubicBezierSurface3fa> + __forceinline TensorLinearCubicBezierSurface3fa getNormalOrientedHermiteCurve(IntersectContext* context, const Vec3fa& ray_org, const unsigned int primID, const float time) const + { + float ftime; + const size_t itime = timeSegment(time, ftime); + const TensorLinearCubicBezierSurface3fa curve0 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+0); + const TensorLinearCubicBezierSurface3fa curve1 = getNormalOrientedHermiteCurve<SourceCurve3ff, SourceCurve3fa, TensorLinearCubicBezierSurface3fa>(context, ray_org, primID,itime+1); + return clerp(curve0,curve1,ftime); + } + + private: + void resizeBuffers(unsigned int numSteps); + + public: + BufferView<unsigned int> curves; //!< array of curve indices + BufferView<Vec3ff> vertices0; //!< fast access to first vertex buffer + BufferView<Vec3fa> normals0; //!< fast access to first normal buffer + BufferView<Vec3ff> tangents0; //!< fast access to first tangent buffer + BufferView<Vec3fa> dnormals0; //!< fast access to first normal derivative buffer + vector<BufferView<Vec3ff>> vertices; //!< vertex array for each timestep + vector<BufferView<Vec3fa>> normals; //!< normal array for each timestep + vector<BufferView<Vec3ff>> tangents; //!< tangent array for each timestep + vector<BufferView<Vec3fa>> dnormals; //!< normal derivative array for each timestep + BufferView<char> flags; //!< start, end flag per segment + vector<BufferView<char>> vertexAttribs; //!< user buffers + int tessellationRate; //!< tessellation rate for flat curve + float maxRadiusScale = 1.0; //!< maximal min-width scaling of curve radii + }; + + namespace isa + { + + template<template<typename Ty> class Curve> + struct CurveGeometryInterface : public CurveGeometry + { + typedef Curve<Vec3ff> Curve3ff; + typedef Curve<Vec3fa> Curve3fa; + + CurveGeometryInterface (Device* device, Geometry::GType gtype) + : CurveGeometry(device,gtype) {} + + __forceinline const Curve3ff getCurveScaledRadius(size_t i, size_t itime = 0) const + { + const unsigned int index = curve(i); + Vec3ff v0 = vertex(index+0,itime); + Vec3ff v1 = vertex(index+1,itime); + Vec3ff v2 = vertex(index+2,itime); + Vec3ff v3 = vertex(index+3,itime); + v0.w *= maxRadiusScale; + v1.w *= maxRadiusScale; + v2.w *= maxRadiusScale; + v3.w *= maxRadiusScale; + return Curve3ff (v0,v1,v2,v3); + } + + __forceinline const Curve3ff getCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const + { + const unsigned int index = curve(i); + const Vec3ff v0 = vertex(index+0,itime); + const Vec3ff v1 = vertex(index+1,itime); + const Vec3ff v2 = vertex(index+2,itime); + const Vec3ff v3 = vertex(index+3,itime); + const Vec3ff w0(xfmPoint(space,(Vec3fa)v0), maxRadiusScale*v0.w); + const Vec3ff w1(xfmPoint(space,(Vec3fa)v1), maxRadiusScale*v1.w); + const Vec3ff w2(xfmPoint(space,(Vec3fa)v2), maxRadiusScale*v2.w); + const Vec3ff w3(xfmPoint(space,(Vec3fa)v3), maxRadiusScale*v3.w); + return Curve3ff(w0,w1,w2,w3); + } + + __forceinline const Curve3ff getCurveScaledRadius(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const + { + const float r_scale = r_scale0*scale; + const unsigned int index = curve(i); + const Vec3ff v0 = vertex(index+0,itime); + const Vec3ff v1 = vertex(index+1,itime); + const Vec3ff v2 = vertex(index+2,itime); + const Vec3ff v3 = vertex(index+3,itime); + const Vec3ff w0(xfmPoint(space,((Vec3fa)v0-ofs)*Vec3fa(scale)), maxRadiusScale*v0.w*r_scale); + const Vec3ff w1(xfmPoint(space,((Vec3fa)v1-ofs)*Vec3fa(scale)), maxRadiusScale*v1.w*r_scale); + const Vec3ff w2(xfmPoint(space,((Vec3fa)v2-ofs)*Vec3fa(scale)), maxRadiusScale*v2.w*r_scale); + const Vec3ff w3(xfmPoint(space,((Vec3fa)v3-ofs)*Vec3fa(scale)), maxRadiusScale*v3.w*r_scale); + return Curve3ff(w0,w1,w2,w3); + } + + __forceinline const Curve3fa getNormalCurve(size_t i, size_t itime = 0) const + { + const unsigned int index = curve(i); + const Vec3fa n0 = normal(index+0,itime); + const Vec3fa n1 = normal(index+1,itime); + const Vec3fa n2 = normal(index+2,itime); + const Vec3fa n3 = normal(index+3,itime); + return Curve3fa (n0,n1,n2,n3); + } + + __forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(size_t i, size_t itime = 0) const + { + const Curve3ff center = getCurveScaledRadius(i,itime); + const Curve3fa normal = getNormalCurve(i,itime); + const TensorLinearCubicBezierSurface3fa ocurve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(center,normal); + return ocurve; + } + + __forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const { + return getOrientedCurveScaledRadius(i,itime).xfm(space); + } + + __forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const Vec3fa& ofs, const float scale, const LinearSpace3fa& space, size_t i, size_t itime = 0) const { + return getOrientedCurveScaledRadius(i,itime).xfm(space,ofs,scale); + } + + /*! check if the i'th primitive is valid at the itime'th time step */ + __forceinline bool valid(Geometry::GType ctype, size_t i, const range<size_t>& itime_range) const + { + const unsigned int index = curve(i); + if (index+3 >= numVertices()) return false; + + for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) + { + const float r0 = radius(index+0,itime); + const float r1 = radius(index+1,itime); + const float r2 = radius(index+2,itime); + const float r3 = radius(index+3,itime); + if (!isvalid(r0) || !isvalid(r1) || !isvalid(r2) || !isvalid(r3)) + return false; + + const Vec3fa v0 = vertex(index+0,itime); + const Vec3fa v1 = vertex(index+1,itime); + const Vec3fa v2 = vertex(index+2,itime); + const Vec3fa v3 = vertex(index+3,itime); + if (!isvalid(v0) || !isvalid(v1) || !isvalid(v2) || !isvalid(v3)) + return false; + + if (ctype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE) + { + const Vec3fa n0 = normal(index+0,itime); + const Vec3fa n1 = normal(index+1,itime); + if (!isvalid(n0) || !isvalid(n1)) + return false; + } + } + + return true; + } + + template<int N> + void interpolate_impl(const RTCInterpolateArguments* const args) + { + unsigned int primID = args->primID; + float u = args->u; + RTCBufferType bufferType = args->bufferType; + unsigned int bufferSlot = args->bufferSlot; + float* P = args->P; + float* dPdu = args->dPdu; + float* ddPdudu = args->ddPdudu; + unsigned int valueCount = args->valueCount; + + /* calculate base pointer and stride */ + assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || + (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); + const char* src = nullptr; + size_t stride = 0; + if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { + src = vertexAttribs[bufferSlot].getPtr(); + stride = vertexAttribs[bufferSlot].getStride(); + } else { + src = vertices[bufferSlot].getPtr(); + stride = vertices[bufferSlot].getStride(); + } + + for (unsigned int i=0; i<valueCount; i+=N) + { + size_t ofs = i*sizeof(float); + const size_t index = curves[primID]; + const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount); + const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+0)*stride+ofs]); + const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+1)*stride+ofs]); + const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+2)*stride+ofs]); + const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(index+3)*stride+ofs]); + + const Curve<vfloat<N>> curve(p0,p1,p2,p3); + if (P ) mem<vfloat<N>>::storeu(valid,P+i, curve.eval(u)); + if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, curve.eval_du(u)); + if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,curve.eval_dudu(u)); + } + } + + void interpolate(const RTCInterpolateArguments* const args) { + interpolate_impl<4>(args); + } + }; + + template<template<typename Ty> class Curve> + struct HermiteCurveGeometryInterface : public CurveGeometry + { + typedef Curve<Vec3ff> HermiteCurve3ff; + typedef Curve<Vec3fa> HermiteCurve3fa; + + HermiteCurveGeometryInterface (Device* device, Geometry::GType gtype) + : CurveGeometry(device,gtype) {} + + __forceinline const HermiteCurve3ff getCurveScaledRadius(size_t i, size_t itime = 0) const + { + const unsigned int index = curve(i); + Vec3ff v0 = vertex(index+0,itime); + Vec3ff v1 = vertex(index+1,itime); + Vec3ff t0 = tangent(index+0,itime); + Vec3ff t1 = tangent(index+1,itime); + v0.w *= maxRadiusScale; + v1.w *= maxRadiusScale; + t0.w *= maxRadiusScale; + t1.w *= maxRadiusScale; + return HermiteCurve3ff (v0,t0,v1,t1); + } + + __forceinline const HermiteCurve3ff getCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const + { + const unsigned int index = curve(i); + const Vec3ff v0 = vertex(index+0,itime); + const Vec3ff v1 = vertex(index+1,itime); + const Vec3ff t0 = tangent(index+0,itime); + const Vec3ff t1 = tangent(index+1,itime); + const Vec3ff V0(xfmPoint(space,(Vec3fa)v0),maxRadiusScale*v0.w); + const Vec3ff V1(xfmPoint(space,(Vec3fa)v1),maxRadiusScale*v1.w); + const Vec3ff T0(xfmVector(space,(Vec3fa)t0),maxRadiusScale*t0.w); + const Vec3ff T1(xfmVector(space,(Vec3fa)t1),maxRadiusScale*t1.w); + return HermiteCurve3ff(V0,T0,V1,T1); + } + + __forceinline const HermiteCurve3ff getCurveScaledRadius(const Vec3fa& ofs, const float scale, const float r_scale0, const LinearSpace3fa& space, size_t i, size_t itime = 0) const + { + const float r_scale = r_scale0*scale; + const unsigned int index = curve(i); + const Vec3ff v0 = vertex(index+0,itime); + const Vec3ff v1 = vertex(index+1,itime); + const Vec3ff t0 = tangent(index+0,itime); + const Vec3ff t1 = tangent(index+1,itime); + const Vec3ff V0(xfmPoint(space,(v0-ofs)*Vec3fa(scale)), maxRadiusScale*v0.w*r_scale); + const Vec3ff V1(xfmPoint(space,(v1-ofs)*Vec3fa(scale)), maxRadiusScale*v1.w*r_scale); + const Vec3ff T0(xfmVector(space,t0*Vec3fa(scale)), maxRadiusScale*t0.w*r_scale); + const Vec3ff T1(xfmVector(space,t1*Vec3fa(scale)), maxRadiusScale*t1.w*r_scale); + return HermiteCurve3ff(V0,T0,V1,T1); + } + + __forceinline const HermiteCurve3fa getNormalCurve(size_t i, size_t itime = 0) const + { + const unsigned int index = curve(i); + const Vec3fa n0 = normal(index+0,itime); + const Vec3fa n1 = normal(index+1,itime); + const Vec3fa dn0 = dnormal(index+0,itime); + const Vec3fa dn1 = dnormal(index+1,itime); + return HermiteCurve3fa (n0,dn0,n1,dn1); + } + + __forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(size_t i, size_t itime = 0) const + { + const HermiteCurve3ff center = getCurveScaledRadius(i,itime); + const HermiteCurve3fa normal = getNormalCurve(i,itime); + const TensorLinearCubicBezierSurface3fa ocurve = TensorLinearCubicBezierSurface3fa::fromCenterAndNormalCurve(center,normal); + return ocurve; + } + + __forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const LinearSpace3fa& space, size_t i, size_t itime = 0) const { + return getOrientedCurveScaledRadius(i,itime).xfm(space); + } + + __forceinline const TensorLinearCubicBezierSurface3fa getOrientedCurveScaledRadius(const Vec3fa& ofs, const float scale, const LinearSpace3fa& space, size_t i, size_t itime = 0) const { + return getOrientedCurveScaledRadius(i,itime).xfm(space,ofs,scale); + } + + /*! check if the i'th primitive is valid at the itime'th time step */ + __forceinline bool valid(Geometry::GType ctype, size_t i, const range<size_t>& itime_range) const + { + const unsigned int index = curve(i); + if (index+1 >= numVertices()) return false; + + for (size_t itime = itime_range.begin(); itime <= itime_range.end(); itime++) + { + const Vec3ff v0 = vertex(index+0,itime); + const Vec3ff v1 = vertex(index+1,itime); + if (!isvalid4(v0) || !isvalid4(v1)) + return false; + + const Vec3ff t0 = tangent(index+0,itime); + const Vec3ff t1 = tangent(index+1,itime); + if (!isvalid4(t0) || !isvalid4(t1)) + return false; + + if (ctype == Geometry::GTY_SUBTYPE_ORIENTED_CURVE) + { + const Vec3fa n0 = normal(index+0,itime); + const Vec3fa n1 = normal(index+1,itime); + if (!isvalid(n0) || !isvalid(n1)) + return false; + + const Vec3fa dn0 = dnormal(index+0,itime); + const Vec3fa dn1 = dnormal(index+1,itime); + if (!isvalid(dn0) || !isvalid(dn1)) + return false; + } + } + + return true; + } + + template<int N> + void interpolate_impl(const RTCInterpolateArguments* const args) + { + unsigned int primID = args->primID; + float u = args->u; + RTCBufferType bufferType = args->bufferType; + unsigned int bufferSlot = args->bufferSlot; + float* P = args->P; + float* dPdu = args->dPdu; + float* ddPdudu = args->ddPdudu; + unsigned int valueCount = args->valueCount; + + /* we interpolate vertex attributes linearly for hermite basis */ + if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) + { + assert(bufferSlot <= vertexAttribs.size()); + const char* vsrc = vertexAttribs[bufferSlot].getPtr(); + const size_t vstride = vertexAttribs[bufferSlot].getStride(); + + for (unsigned int i=0; i<valueCount; i+=N) + { + const size_t ofs = i*sizeof(float); + const size_t index = curves[primID]; + const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount); + const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+0)*vstride+ofs]); + const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+1)*vstride+ofs]); + + if (P ) mem<vfloat<N>>::storeu(valid,P+i, madd(1.0f-u,p0,u*p1)); + if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, p1-p0); + if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero)); + } + } + + /* interpolation for vertex buffers */ + else + { + assert(bufferSlot < numTimeSteps); + const char* vsrc = vertices[bufferSlot].getPtr(); + const char* tsrc = tangents[bufferSlot].getPtr(); + const size_t vstride = vertices[bufferSlot].getStride(); + const size_t tstride = vertices[bufferSlot].getStride(); + + for (unsigned int i=0; i<valueCount; i+=N) + { + const size_t ofs = i*sizeof(float); + const size_t index = curves[primID]; + const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>((int)valueCount); + const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+0)*vstride+ofs]); + const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&vsrc[(index+1)*vstride+ofs]); + const vfloat<N> t0 = mem<vfloat<N>>::loadu(valid,(float*)&tsrc[(index+0)*tstride+ofs]); + const vfloat<N> t1 = mem<vfloat<N>>::loadu(valid,(float*)&tsrc[(index+1)*tstride+ofs]); + + const HermiteCurveT<vfloat<N>> curve(p0,t0,p1,t1); + if (P ) mem<vfloat<N>>::storeu(valid,P+i, curve.eval(u)); + if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i, curve.eval_du(u)); + if (ddPdudu) mem<vfloat<N>>::storeu(valid,ddPdudu+i,curve.eval_dudu(u)); + } + } + } + + void interpolate(const RTCInterpolateArguments* const args) { + interpolate_impl<4>(args); + } + }; + } + + DECLARE_ISA_FUNCTION(CurveGeometry*, createCurves, Device* COMMA Geometry::GType); +} diff --git a/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h b/thirdparty/embree/kernels/common/scene_grid_mesh.h index c08658466a..fb6fed445b 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene_grid_mesh.h +++ b/thirdparty/embree/kernels/common/scene_grid_mesh.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -55,8 +55,87 @@ namespace embree void commit(); bool verify(); void interpolate(const RTCInterpolateArguments* const args); - void addElementsToCount (GeometryCounts & counts) const; + template<int N> + void interpolate_impl(const RTCInterpolateArguments* const args) + { + unsigned int primID = args->primID; + float U = args->u; + float V = args->v; + + /* clamp input u,v to [0;1] range */ + U = max(min(U,1.0f),0.0f); + V = max(min(V,1.0f),0.0f); + + RTCBufferType bufferType = args->bufferType; + unsigned int bufferSlot = args->bufferSlot; + float* P = args->P; + float* dPdu = args->dPdu; + float* dPdv = args->dPdv; + float* ddPdudu = args->ddPdudu; + float* ddPdvdv = args->ddPdvdv; + float* ddPdudv = args->ddPdudv; + unsigned int valueCount = args->valueCount; + + /* calculate base pointer and stride */ + assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || + (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); + const char* src = nullptr; + size_t stride = 0; + if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { + src = vertexAttribs[bufferSlot].getPtr(); + stride = vertexAttribs[bufferSlot].getStride(); + } else { + src = vertices[bufferSlot].getPtr(); + stride = vertices[bufferSlot].getStride(); + } + + const Grid& grid = grids[primID]; + const int grid_width = grid.resX-1; + const int grid_height = grid.resY-1; + const float rcp_grid_width = rcp(float(grid_width)); + const float rcp_grid_height = rcp(float(grid_height)); + const int iu = min((int)floor(U*grid_width ),grid_width); + const int iv = min((int)floor(V*grid_height),grid_height); + const float u = U*grid_width-float(iu); + const float v = V*grid_height-float(iv); + + for (unsigned int i=0; i<valueCount; i+=N) + { + const size_t ofs = i*sizeof(float); + const unsigned int idx0 = grid.startVtxID + (iv+0)*grid.lineVtxOffset + iu; + const unsigned int idx1 = grid.startVtxID + (iv+1)*grid.lineVtxOffset + iu; + + const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount)); + const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+0)*stride+ofs]); + const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx0+1)*stride+ofs]); + const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+1)*stride+ofs]); + const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[(idx1+0)*stride+ofs]); + const vbool<N> left = u+v <= 1.0f; + const vfloat<N> Q0 = select(left,p0,p2); + const vfloat<N> Q1 = select(left,p1,p3); + const vfloat<N> Q2 = select(left,p3,p1); + const vfloat<N> U = select(left,u,vfloat<N>(1.0f)-u); + const vfloat<N> V = select(left,v,vfloat<N>(1.0f)-v); + const vfloat<N> W = 1.0f-U-V; + + if (P) { + mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2))); + } + if (dPdu) { + assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)*rcp_grid_width); + assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)*rcp_grid_height); + } + if (ddPdudu) { + assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero)); + assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero)); + assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero)); + } + } + } + + void addElementsToCount (GeometryCounts & counts) const; + __forceinline unsigned int getNumSubGrids(const size_t gridID) { const Grid &g = grid(gridID); diff --git a/thirdparty/embree-aarch64/kernels/common/scene_instance.h b/thirdparty/embree/kernels/common/scene_instance.h index 7ff82a4fb8..773f2b6fec 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene_instance.h +++ b/thirdparty/embree/kernels/common/scene_instance.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -150,8 +150,8 @@ namespace embree __forceinline AffineSpace3vf<K> getWorld2Local(const vbool<K>& valid, const vfloat<K>& t) const { if (unlikely(gsubtype == GTY_SUBTYPE_INSTANCE_QUATERNION)) - return getWorld2LocalSlerp(valid, t); - return getWorld2LocalLerp(valid, t); + return getWorld2LocalSlerp<K>(valid, t); + return getWorld2LocalLerp<K>(valid, t); } private: @@ -160,7 +160,7 @@ namespace embree __forceinline AffineSpace3vf<K> getWorld2LocalSlerp(const vbool<K>& valid, const vfloat<K>& t) const { vfloat<K> ftime; - const vint<K> itime_k = timeSegment(t, ftime); + const vint<K> itime_k = timeSegment<K>(t, ftime); assert(any(valid)); const size_t index = bsf(movemask(valid)); const int itime = itime_k[index]; @@ -186,7 +186,7 @@ namespace embree __forceinline AffineSpace3vf<K> getWorld2LocalLerp(const vbool<K>& valid, const vfloat<K>& t) const { vfloat<K> ftime; - const vint<K> itime_k = timeSegment(t, ftime); + const vint<K> itime_k = timeSegment<K>(t, ftime); assert(any(valid)); const size_t index = bsf(movemask(valid)); const int itime = itime_k[index]; diff --git a/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h b/thirdparty/embree/kernels/common/scene_line_segments.h index c0f9ee8f77..3c9fdb39db 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene_line_segments.h +++ b/thirdparty/embree/kernels/common/scene_line_segments.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -34,6 +34,44 @@ namespace embree void setMaxRadiusScale(float s); void addElementsToCount (GeometryCounts & counts) const; + template<int N> + void interpolate_impl(const RTCInterpolateArguments* const args) + { + unsigned int primID = args->primID; + float u = args->u; + RTCBufferType bufferType = args->bufferType; + unsigned int bufferSlot = args->bufferSlot; + float* P = args->P; + float* dPdu = args->dPdu; + float* ddPdudu = args->ddPdudu; + unsigned int valueCount = args->valueCount; + + /* calculate base pointer and stride */ + assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || + (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); + const char* src = nullptr; + size_t stride = 0; + if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { + src = vertexAttribs[bufferSlot].getPtr(); + stride = vertexAttribs[bufferSlot].getStride(); + } else { + src = vertices[bufferSlot].getPtr(); + stride = vertices[bufferSlot].getStride(); + } + + for (unsigned int i=0; i<valueCount; i+=N) + { + const size_t ofs = i*sizeof(float); + const size_t segment = segments[primID]; + const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount)); + const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[(segment+0)*stride+ofs]); + const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[(segment+1)*stride+ofs]); + if (P ) mem<vfloat<N>>::storeu(valid,P+i,lerp(p0,p1,u)); + if (dPdu ) mem<vfloat<N>>::storeu(valid,dPdu+i,p1-p0); + if (ddPdudu) mem<vfloat<N>>::storeu(valid,dPdu+i,vfloat<N>(zero)); + } + } + public: /*! returns the number of vertices */ diff --git a/thirdparty/embree-aarch64/kernels/common/scene_points.h b/thirdparty/embree/kernels/common/scene_points.h index 1d39ed07ba..017e098a51 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene_points.h +++ b/thirdparty/embree/kernels/common/scene_points.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h b/thirdparty/embree/kernels/common/scene_quad_mesh.h index d5bb054b14..bd8eeaaeb7 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene_quad_mesh.h +++ b/thirdparty/embree/kernels/common/scene_quad_mesh.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -43,6 +43,66 @@ namespace embree void interpolate(const RTCInterpolateArguments* const args); void addElementsToCount (GeometryCounts & counts) const; + template<int N> + void interpolate_impl(const RTCInterpolateArguments* const args) + { + unsigned int primID = args->primID; + float u = args->u; + float v = args->v; + RTCBufferType bufferType = args->bufferType; + unsigned int bufferSlot = args->bufferSlot; + float* P = args->P; + float* dPdu = args->dPdu; + float* dPdv = args->dPdv; + float* ddPdudu = args->ddPdudu; + float* ddPdvdv = args->ddPdvdv; + float* ddPdudv = args->ddPdudv; + unsigned int valueCount = args->valueCount; + + /* calculate base pointer and stride */ + assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || + (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); + const char* src = nullptr; + size_t stride = 0; + if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { + src = vertexAttribs[bufferSlot].getPtr(); + stride = vertexAttribs[bufferSlot].getStride(); + } else { + src = vertices[bufferSlot].getPtr(); + stride = vertices[bufferSlot].getStride(); + } + + for (unsigned int i=0; i<valueCount; i+=N) + { + const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount)); + const size_t ofs = i*sizeof(float); + const Quad& tri = quad(primID); + const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]); + const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]); + const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]); + const vfloat<N> p3 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[3]*stride+ofs]); + const vbool<N> left = u+v <= 1.0f; + const vfloat<N> Q0 = select(left,p0,p2); + const vfloat<N> Q1 = select(left,p1,p3); + const vfloat<N> Q2 = select(left,p3,p1); + const vfloat<N> U = select(left,u,vfloat<N>(1.0f)-u); + const vfloat<N> V = select(left,v,vfloat<N>(1.0f)-v); + const vfloat<N> W = 1.0f-U-V; + if (P) { + mem<vfloat<N>>::storeu(valid,P+i,madd(W,Q0,madd(U,Q1,V*Q2))); + } + if (dPdu) { + assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,select(left,Q1-Q0,Q0-Q1)); + assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,select(left,Q2-Q0,Q0-Q2)); + } + if (ddPdudu) { + assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero)); + assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero)); + assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero)); + } + } + } + public: /*! returns number of vertices */ diff --git a/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h b/thirdparty/embree/kernels/common/scene_subdiv_mesh.h index d0246009db..1db170196d 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene_subdiv_mesh.h +++ b/thirdparty/embree/kernels/common/scene_subdiv_mesh.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -275,11 +275,11 @@ namespace embree parallel_set<uint32_t> holeSet; /*! fast lookup table to detect invalid faces */ - mvector<int8_t> invalid_face; + mvector<char> invalid_face; /*! test if face i is invalid in timestep j */ - __forceinline int8_t& invalidFace(size_t i, size_t j = 0) { return invalid_face[i*numTimeSteps+j]; } - __forceinline const int8_t& invalidFace(size_t i, size_t j = 0) const { return invalid_face[i*numTimeSteps+j]; } + __forceinline char& invalidFace(size_t i, size_t j = 0) { return invalid_face[i*numTimeSteps+j]; } + __forceinline const char& invalidFace(size_t i, size_t j = 0) const { return invalid_face[i*numTimeSteps+j]; } /*! interpolation cache */ public: diff --git a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp b/thirdparty/embree/kernels/common/scene_triangle_mesh.cpp index d1c2750f14..3bbd7e51ae 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.cpp +++ b/thirdparty/embree/kernels/common/scene_triangle_mesh.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "scene_triangle_mesh.h" @@ -178,62 +178,13 @@ namespace embree return true; } - - void TriangleMesh::interpolate(const RTCInterpolateArguments* const args) - { - unsigned int primID = args->primID; - float u = args->u; - float v = args->v; - RTCBufferType bufferType = args->bufferType; - unsigned int bufferSlot = args->bufferSlot; - float* P = args->P; - float* dPdu = args->dPdu; - float* dPdv = args->dPdv; - float* ddPdudu = args->ddPdudu; - float* ddPdvdv = args->ddPdvdv; - float* ddPdudv = args->ddPdudv; - unsigned int valueCount = args->valueCount; - - /* calculate base pointer and stride */ - assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || - (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); - const char* src = nullptr; - size_t stride = 0; - if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { - src = vertexAttribs[bufferSlot].getPtr(); - stride = vertexAttribs[bufferSlot].getStride(); - } else { - src = vertices[bufferSlot].getPtr(); - stride = vertices[bufferSlot].getStride(); - } - - for (unsigned int i=0; i<valueCount; i+=4) - { - size_t ofs = i*sizeof(float); - const float w = 1.0f-u-v; - const Triangle& tri = triangle(primID); - const vbool4 valid = vint4((int)i)+vint4(step) < vint4(int(valueCount)); - const vfloat4 p0 = vfloat4::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]); - const vfloat4 p1 = vfloat4::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]); - const vfloat4 p2 = vfloat4::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]); - - if (P) { - vfloat4::storeu(valid,P+i,madd(w,p0,madd(u,p1,v*p2))); - } - if (dPdu) { - assert(dPdu); vfloat4::storeu(valid,dPdu+i,p1-p0); - assert(dPdv); vfloat4::storeu(valid,dPdv+i,p2-p0); - } - if (ddPdudu) { - assert(ddPdudu); vfloat4::storeu(valid,ddPdudu+i,vfloat4(zero)); - assert(ddPdvdv); vfloat4::storeu(valid,ddPdvdv+i,vfloat4(zero)); - assert(ddPdudv); vfloat4::storeu(valid,ddPdudv+i,vfloat4(zero)); - } - } + + void TriangleMesh::interpolate(const RTCInterpolateArguments* const args) { + interpolate_impl<4>(args); } - + #endif - + namespace isa { TriangleMesh* createTriangleMesh(Device* device) { diff --git a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h b/thirdparty/embree/kernels/common/scene_triangle_mesh.h index eaf2e1799a..ad3f602fde 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene_triangle_mesh.h +++ b/thirdparty/embree/kernels/common/scene_triangle_mesh.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -43,8 +43,62 @@ namespace embree void interpolate(const RTCInterpolateArguments* const args); void addElementsToCount (GeometryCounts & counts) const; + template<int N> + void interpolate_impl(const RTCInterpolateArguments* const args) + { + unsigned int primID = args->primID; + float u = args->u; + float v = args->v; + RTCBufferType bufferType = args->bufferType; + unsigned int bufferSlot = args->bufferSlot; + float* P = args->P; + float* dPdu = args->dPdu; + float* dPdv = args->dPdv; + float* ddPdudu = args->ddPdudu; + float* ddPdvdv = args->ddPdvdv; + float* ddPdudv = args->ddPdudv; + unsigned int valueCount = args->valueCount; + + /* calculate base pointer and stride */ + assert((bufferType == RTC_BUFFER_TYPE_VERTEX && bufferSlot < numTimeSteps) || + (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE && bufferSlot <= vertexAttribs.size())); + const char* src = nullptr; + size_t stride = 0; + if (bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { + src = vertexAttribs[bufferSlot].getPtr(); + stride = vertexAttribs[bufferSlot].getStride(); + } else { + src = vertices[bufferSlot].getPtr(); + stride = vertices[bufferSlot].getStride(); + } + + for (unsigned int i=0; i<valueCount; i+=N) + { + size_t ofs = i*sizeof(float); + const float w = 1.0f-u-v; + const Triangle& tri = triangle(primID); + const vbool<N> valid = vint<N>((int)i)+vint<N>(step) < vint<N>(int(valueCount)); + const vfloat<N> p0 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[0]*stride+ofs]); + const vfloat<N> p1 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[1]*stride+ofs]); + const vfloat<N> p2 = mem<vfloat<N>>::loadu(valid,(float*)&src[tri.v[2]*stride+ofs]); + + if (P) { + mem<vfloat<N>>::storeu(valid,P+i,madd(w,p0,madd(u,p1,v*p2))); + } + if (dPdu) { + assert(dPdu); mem<vfloat<N>>::storeu(valid,dPdu+i,p1-p0); + assert(dPdv); mem<vfloat<N>>::storeu(valid,dPdv+i,p2-p0); + } + if (ddPdudu) { + assert(ddPdudu); mem<vfloat<N>>::storeu(valid,ddPdudu+i,vfloat<N>(zero)); + assert(ddPdvdv); mem<vfloat<N>>::storeu(valid,ddPdvdv+i,vfloat<N>(zero)); + assert(ddPdudv); mem<vfloat<N>>::storeu(valid,ddPdudv+i,vfloat<N>(zero)); + } + } + } + public: - + /*! returns number of vertices */ __forceinline size_t numVertices() const { return vertices[0].size(); diff --git a/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h b/thirdparty/embree/kernels/common/scene_user_geometry.h index 8d11ed6986..2867b18b79 100644 --- a/thirdparty/embree-aarch64/kernels/common/scene_user_geometry.h +++ b/thirdparty/embree/kernels/common/scene_user_geometry.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/stack_item.h b/thirdparty/embree/kernels/common/stack_item.h index 533c385365..c31c64e862 100644 --- a/thirdparty/embree-aarch64/kernels/common/stack_item.h +++ b/thirdparty/embree/kernels/common/stack_item.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/stat.cpp b/thirdparty/embree/kernels/common/stat.cpp index b73c3a8c76..ebb77cd534 100644 --- a/thirdparty/embree-aarch64/kernels/common/stat.cpp +++ b/thirdparty/embree/kernels/common/stat.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "stat.h" diff --git a/thirdparty/embree-aarch64/kernels/common/stat.h b/thirdparty/embree/kernels/common/stat.h index 3cda2bd014..02fc07e67f 100644 --- a/thirdparty/embree-aarch64/kernels/common/stat.h +++ b/thirdparty/embree/kernels/common/stat.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/common/state.cpp b/thirdparty/embree/kernels/common/state.cpp index 51fc9b7826..01c862da0c 100644 --- a/thirdparty/embree-aarch64/kernels/common/state.cpp +++ b/thirdparty/embree/kernels/common/state.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "state.h" @@ -100,7 +100,6 @@ namespace embree instancing_open_max_depth = 32; instancing_open_max = 50000000; - ignore_config_files = false; float_exceptions = false; quality_flags = -1; scene_flags = -1; @@ -115,8 +114,6 @@ namespace embree #else set_affinity = false; #endif - /* per default enable affinity on KNL */ - if (hasISA(AVX512KNL)) set_affinity = true; start_threads = false; enable_selockmemoryprivilege = false; @@ -147,20 +144,7 @@ namespace embree } bool State::checkISASupport() { -#if defined(__ARM_NEON) - /* - * NEON CPU type is a mixture of NEON and SSE2 - */ - - bool hasSSE2 = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_SSE2; - - /* this will be true when explicitly initialize Device with `isa=neon` config */ - bool hasNEON = (getCPUFeatures() & enabled_cpu_features) & CPU_FEATURE_NEON; - - return hasSSE2 || hasNEON; -#else return (getCPUFeatures() & enabled_cpu_features) == enabled_cpu_features; -#endif } void State::verify() @@ -173,10 +157,8 @@ namespace embree * functions */ #if defined(DEBUG) #if defined(EMBREE_TARGET_SSE2) -#if !defined(__ARM_NEON) assert(sse2::getISA() <= SSE2); #endif -#endif #if defined(EMBREE_TARGET_SSE42) assert(sse42::getISA() <= SSE42); #endif @@ -186,11 +168,8 @@ namespace embree #if defined(EMBREE_TARGET_AVX2) assert(avx2::getISA() <= AVX2); #endif -#if defined (EMBREE_TARGET_AVX512KNL) - assert(avx512knl::getISA() <= AVX512KNL); -#endif -#if defined (EMBREE_TARGET_AVX512SKX) - assert(avx512skx::getISA() <= AVX512SKX); +#if defined (EMBREE_TARGET_AVX512) + assert(avx512::getISA() <= AVX512); #endif #endif } @@ -241,8 +220,7 @@ namespace embree else if (isa == "avx") return AVX; else if (isa == "avxi") return AVXI; else if (isa == "avx2") return AVX2; - else if (isa == "avx512knl") return AVX512KNL; - else if (isa == "avx512skx") return AVX512SKX; + else if (isa == "avx512") return AVX512; else return SSE2; } @@ -269,20 +247,20 @@ namespace embree start_threads = cin->get().Int(); else if (tok == Token::Id("isa") && cin->trySymbol("=")) { - std::string isa = toLowerCase(cin->get().Identifier()); - enabled_cpu_features = string_to_cpufeatures(isa); + std::string isa_str = toLowerCase(cin->get().Identifier()); + enabled_cpu_features = string_to_cpufeatures(isa_str); enabled_builder_cpu_features = enabled_cpu_features; } else if (tok == Token::Id("max_isa") && cin->trySymbol("=")) { - std::string isa = toLowerCase(cin->get().Identifier()); - enabled_cpu_features &= string_to_cpufeatures(isa); + std::string isa_str = toLowerCase(cin->get().Identifier()); + enabled_cpu_features &= string_to_cpufeatures(isa_str); enabled_builder_cpu_features &= enabled_cpu_features; } else if (tok == Token::Id("max_builder_isa") && cin->trySymbol("=")) { - std::string isa = toLowerCase(cin->get().Identifier()); - enabled_builder_cpu_features &= string_to_cpufeatures(isa); + std::string isa_str = toLowerCase(cin->get().Identifier()); + enabled_builder_cpu_features &= string_to_cpufeatures(isa_str); } else if (tok == Token::Id("frequency_level") && cin->trySymbol("=")) { @@ -299,8 +277,6 @@ namespace embree hugepages = cin->get().Int(); } - else if (tok == Token::Id("ignore_config_files") && cin->trySymbol("=")) - ignore_config_files = cin->get().Int(); else if (tok == Token::Id("float_exceptions") && cin->trySymbol("=")) float_exceptions = cin->get().Int(); diff --git a/thirdparty/embree-aarch64/kernels/common/state.h b/thirdparty/embree/kernels/common/state.h index d0fccc023f..33bcc843b2 100644 --- a/thirdparty/embree-aarch64/kernels/common/state.h +++ b/thirdparty/embree/kernels/common/state.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -118,7 +118,6 @@ namespace embree size_t instancing_open_max; //!< instancing opens tree to maximally that number of subtrees public: - bool ignore_config_files; //!< if true no more config files get parse bool float_exceptions; //!< enable floating point exceptions int quality_flags; int scene_flags; diff --git a/thirdparty/embree-aarch64/kernels/common/vector.h b/thirdparty/embree/kernels/common/vector.h index b478762240..4b08275f3b 100644 --- a/thirdparty/embree-aarch64/kernels/common/vector.h +++ b/thirdparty/embree/kernels/common/vector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "default.h" diff --git a/thirdparty/embree-aarch64/kernels/config.h b/thirdparty/embree/kernels/config.h index 80a8ab2a56..80a8ab2a56 100644 --- a/thirdparty/embree-aarch64/kernels/config.h +++ b/thirdparty/embree/kernels/config.h diff --git a/thirdparty/embree-aarch64/kernels/geometry/cone.h b/thirdparty/embree/kernels/geometry/cone.h index 961ef86160..17429bab32 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/cone.h +++ b/thirdparty/embree/kernels/geometry/cone.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h b/thirdparty/embree/kernels/geometry/coneline_intersector.h index 0902baff7d..90f3792eff 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/coneline_intersector.h +++ b/thirdparty/embree/kernels/geometry/coneline_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -169,9 +169,9 @@ namespace embree const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); const vfloat<M> ray_tnear(ray.tnear()); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); - return __coneline_internal::intersectCone(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,cL,cR,epilog); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i); + return __coneline_internal::intersectCone<M>(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,cL,cR,epilog); } }; @@ -200,9 +200,9 @@ namespace embree const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); const vfloat<M> ray_tnear = ray.tnear()[k]; - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); - return __coneline_internal::intersectCone(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,cL,cR,epilog); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i); + return __coneline_internal::intersectCone<M>(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,cL,cR,epilog); } }; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h b/thirdparty/embree/kernels/geometry/conelinei_intersector.h index d47218eb8b..6a985ebcad 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/conelinei_intersector.h +++ b/thirdparty/embree/kernels/geometry/conelinei_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -10,7 +10,7 @@ namespace embree { namespace isa { - template<int M, int Mx, bool filter> + template<int M, bool filter> struct ConeCurveMiIntersector1 { typedef LineMi<M> Primitive; @@ -23,8 +23,8 @@ namespace embree Vec4vf<M> v0,v1; vbool<M> cL,cR; line.gather(v0,v1,cL,cR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + ConeCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) @@ -34,8 +34,8 @@ namespace embree Vec4vf<M> v0,v1; vbool<M> cL,cR; line.gather(v0,v1,cL,cR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return ConeCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); return false; } @@ -45,7 +45,7 @@ namespace embree } }; - template<int M, int Mx, bool filter> + template<int M, bool filter> struct ConeCurveMiMBIntersector1 { typedef LineMi<M> Primitive; @@ -58,8 +58,8 @@ namespace embree Vec4vf<M> v0,v1; vbool<M> cL,cR; line.gather(v0,v1,cL,cR,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + ConeCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) @@ -69,8 +69,8 @@ namespace embree Vec4vf<M> v0,v1; vbool<M> cL,cR; line.gather(v0,v1,cL,cR,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - return ConeCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return ConeCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,cL,cR,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); return false; } @@ -80,7 +80,7 @@ namespace embree } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct ConeCurveMiIntersectorK { typedef LineMi<M> Primitive; @@ -93,8 +93,8 @@ namespace embree Vec4vf<M> v0,v1; vbool<M> cL,cR; line.gather(v0,v1,cL,cR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + ConeCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) @@ -104,12 +104,12 @@ namespace embree Vec4vf<M> v0,v1; vbool<M> cL,cR; line.gather(v0,v1,cL,cR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return ConeCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct ConeCurveMiMBIntersectorK { typedef LineMi<M> Primitive; @@ -122,8 +122,8 @@ namespace embree Vec4vf<M> v0,v1; vbool<M> cL,cR; line.gather(v0,v1,cL,cR,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + ConeCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) @@ -133,8 +133,8 @@ namespace embree Vec4vf<M> v0,v1; vbool<M> cL,cR; line.gather(v0,v1,cL,cR,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - return ConeCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return ConeCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,cL,cR,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } }; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi.h b/thirdparty/embree/kernels/geometry/curveNi.h index 51384f1959..6366a6fb9c 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNi.h +++ b/thirdparty/embree/kernels/geometry/curveNi.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -43,10 +43,10 @@ namespace embree __forceinline void fill(const PrimRef* prims, size_t& begin, size_t _end, Scene* scene) { size_t end = min(begin+M,_end); - N = (uint8_t)(end-begin); + N = (unsigned char)(end-begin); const unsigned int geomID0 = prims[begin].geomID(); this->geomID(N) = geomID0; - ty = (uint8_t) scene->get(geomID0)->getType(); + ty = (unsigned char) scene->get(geomID0)->getType(); /* encode all primitives */ BBox3fa bounds = empty; @@ -76,25 +76,25 @@ namespace embree const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz)); const BBox3fa bounds = scene->get(geomID)->vbounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID); - bounds_vx_x(N)[i] = (int8_t) space3.vx.x; - bounds_vx_y(N)[i] = (int8_t) space3.vx.y; - bounds_vx_z(N)[i] = (int8_t) space3.vx.z; + bounds_vx_x(N)[i] = (char) space3.vx.x; + bounds_vx_y(N)[i] = (char) space3.vx.y; + bounds_vx_z(N)[i] = (char) space3.vx.z; bounds_vx_lower(N)[i] = (short) clamp(floor(bounds.lower.x),-32767.0f,32767.0f); bounds_vx_upper(N)[i] = (short) clamp(ceil (bounds.upper.x),-32767.0f,32767.0f); assert(-32767.0f <= floor(bounds.lower.x) && floor(bounds.lower.x) <= 32767.0f); assert(-32767.0f <= ceil (bounds.upper.x) && ceil (bounds.upper.x) <= 32767.0f); - bounds_vy_x(N)[i] = (int8_t) space3.vy.x; - bounds_vy_y(N)[i] = (int8_t) space3.vy.y; - bounds_vy_z(N)[i] = (int8_t) space3.vy.z; + bounds_vy_x(N)[i] = (char) space3.vy.x; + bounds_vy_y(N)[i] = (char) space3.vy.y; + bounds_vy_z(N)[i] = (char) space3.vy.z; bounds_vy_lower(N)[i] = (short) clamp(floor(bounds.lower.y),-32767.0f,32767.0f); bounds_vy_upper(N)[i] = (short) clamp(ceil (bounds.upper.y),-32767.0f,32767.0f); assert(-32767.0f <= floor(bounds.lower.y) && floor(bounds.lower.y) <= 32767.0f); assert(-32767.0f <= ceil (bounds.upper.y) && ceil (bounds.upper.y) <= 32767.0f); - bounds_vz_x(N)[i] = (int8_t) space3.vz.x; - bounds_vz_y(N)[i] = (int8_t) space3.vz.y; - bounds_vz_z(N)[i] = (int8_t) space3.vz.z; + bounds_vz_x(N)[i] = (char) space3.vz.x; + bounds_vz_y(N)[i] = (char) space3.vz.y; + bounds_vz_z(N)[i] = (char) space3.vz.z; bounds_vz_lower(N)[i] = (short) clamp(floor(bounds.lower.z),-32767.0f,32767.0f); bounds_vz_upper(N)[i] = (short) clamp(ceil (bounds.upper.z),-32767.0f,32767.0f); assert(-32767.0f <= floor(bounds.lower.z) && floor(bounds.lower.z) <= 32767.0f); @@ -114,15 +114,15 @@ namespace embree for (size_t i=0; i<items; i++) { accel[i].fill(prims,start,set.end(),bvh->scene); } - return bvh->encodeLeaf((int8_t*)accel,items); + return bvh->encodeLeaf((char*)accel,items); }; public: // 27.6 - 46 bytes per primitive - uint8_t ty; - uint8_t N; - uint8_t data[4+25*M+16]; + unsigned char ty; + unsigned char N; + unsigned char data[4+25*M+16]; /* struct Layout @@ -130,21 +130,21 @@ namespace embree unsigned int geomID; unsigned int primID[N]; - int8_t bounds_vx_x[N]; - int8_t bounds_vx_y[N]; - int8_t bounds_vx_z[N]; + char bounds_vx_x[N]; + char bounds_vx_y[N]; + char bounds_vx_z[N]; short bounds_vx_lower[N]; short bounds_vx_upper[N]; - int8_t bounds_vy_x[N]; - int8_t bounds_vy_y[N]; - int8_t bounds_vy_z[N]; + char bounds_vy_x[N]; + char bounds_vy_y[N]; + char bounds_vy_z[N]; short bounds_vy_lower[N]; short bounds_vy_upper[N]; - int8_t bounds_vz_x[N]; - int8_t bounds_vz_y[N]; - int8_t bounds_vz_z[N]; + char bounds_vz_x[N]; + char bounds_vz_y[N]; + char bounds_vz_z[N]; short bounds_vz_lower[N]; short bounds_vz_upper[N]; @@ -153,65 +153,65 @@ namespace embree }; */ - __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((int8_t*)this+2); } - __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((int8_t*)this+2); } + __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((char*)this+2); } + __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((char*)this+2); } - __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((int8_t*)this+6); } - __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((int8_t*)this+6); } + __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((char*)this+6); } + __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((char*)this+6); } - __forceinline int8_t* bounds_vx_x(size_t N) { return (int8_t*)((int8_t*)this+6+4*N); } - __forceinline const int8_t* bounds_vx_x(size_t N) const { return (int8_t*)((int8_t*)this+6+4*N); } + __forceinline char* bounds_vx_x(size_t N) { return (char*)((char*)this+6+4*N); } + __forceinline const char* bounds_vx_x(size_t N) const { return (char*)((char*)this+6+4*N); } - __forceinline int8_t* bounds_vx_y(size_t N) { return (int8_t*)((int8_t*)this+6+5*N); } - __forceinline const int8_t* bounds_vx_y(size_t N) const { return (int8_t*)((int8_t*)this+6+5*N); } + __forceinline char* bounds_vx_y(size_t N) { return (char*)((char*)this+6+5*N); } + __forceinline const char* bounds_vx_y(size_t N) const { return (char*)((char*)this+6+5*N); } - __forceinline int8_t* bounds_vx_z(size_t N) { return (int8_t*)((int8_t*)this+6+6*N); } - __forceinline const int8_t* bounds_vx_z(size_t N) const { return (int8_t*)((int8_t*)this+6+6*N); } + __forceinline char* bounds_vx_z(size_t N) { return (char*)((char*)this+6+6*N); } + __forceinline const char* bounds_vx_z(size_t N) const { return (char*)((char*)this+6+6*N); } - __forceinline short* bounds_vx_lower(size_t N) { return (short*)((int8_t*)this+6+7*N); } - __forceinline const short* bounds_vx_lower(size_t N) const { return (short*)((int8_t*)this+6+7*N); } + __forceinline short* bounds_vx_lower(size_t N) { return (short*)((char*)this+6+7*N); } + __forceinline const short* bounds_vx_lower(size_t N) const { return (short*)((char*)this+6+7*N); } - __forceinline short* bounds_vx_upper(size_t N) { return (short*)((int8_t*)this+6+9*N); } - __forceinline const short* bounds_vx_upper(size_t N) const { return (short*)((int8_t*)this+6+9*N); } + __forceinline short* bounds_vx_upper(size_t N) { return (short*)((char*)this+6+9*N); } + __forceinline const short* bounds_vx_upper(size_t N) const { return (short*)((char*)this+6+9*N); } - __forceinline int8_t* bounds_vy_x(size_t N) { return (int8_t*)((int8_t*)this+6+11*N); } - __forceinline const int8_t* bounds_vy_x(size_t N) const { return (int8_t*)((int8_t*)this+6+11*N); } + __forceinline char* bounds_vy_x(size_t N) { return (char*)((char*)this+6+11*N); } + __forceinline const char* bounds_vy_x(size_t N) const { return (char*)((char*)this+6+11*N); } - __forceinline int8_t* bounds_vy_y(size_t N) { return (int8_t*)((int8_t*)this+6+12*N); } - __forceinline const int8_t* bounds_vy_y(size_t N) const { return (int8_t*)((int8_t*)this+6+12*N); } + __forceinline char* bounds_vy_y(size_t N) { return (char*)((char*)this+6+12*N); } + __forceinline const char* bounds_vy_y(size_t N) const { return (char*)((char*)this+6+12*N); } - __forceinline int8_t* bounds_vy_z(size_t N) { return (int8_t*)((int8_t*)this+6+13*N); } - __forceinline const int8_t* bounds_vy_z(size_t N) const { return (int8_t*)((int8_t*)this+6+13*N); } + __forceinline char* bounds_vy_z(size_t N) { return (char*)((char*)this+6+13*N); } + __forceinline const char* bounds_vy_z(size_t N) const { return (char*)((char*)this+6+13*N); } - __forceinline short* bounds_vy_lower(size_t N) { return (short*)((int8_t*)this+6+14*N); } - __forceinline const short* bounds_vy_lower(size_t N) const { return (short*)((int8_t*)this+6+14*N); } + __forceinline short* bounds_vy_lower(size_t N) { return (short*)((char*)this+6+14*N); } + __forceinline const short* bounds_vy_lower(size_t N) const { return (short*)((char*)this+6+14*N); } - __forceinline short* bounds_vy_upper(size_t N) { return (short*)((int8_t*)this+6+16*N); } - __forceinline const short* bounds_vy_upper(size_t N) const { return (short*)((int8_t*)this+6+16*N); } + __forceinline short* bounds_vy_upper(size_t N) { return (short*)((char*)this+6+16*N); } + __forceinline const short* bounds_vy_upper(size_t N) const { return (short*)((char*)this+6+16*N); } - __forceinline int8_t* bounds_vz_x(size_t N) { return (int8_t*)((int8_t*)this+6+18*N); } - __forceinline const int8_t* bounds_vz_x(size_t N) const { return (int8_t*)((int8_t*)this+6+18*N); } + __forceinline char* bounds_vz_x(size_t N) { return (char*)((char*)this+6+18*N); } + __forceinline const char* bounds_vz_x(size_t N) const { return (char*)((char*)this+6+18*N); } - __forceinline int8_t* bounds_vz_y(size_t N) { return (int8_t*)((int8_t*)this+6+19*N); } - __forceinline const int8_t* bounds_vz_y(size_t N) const { return (int8_t*)((int8_t*)this+6+19*N); } + __forceinline char* bounds_vz_y(size_t N) { return (char*)((char*)this+6+19*N); } + __forceinline const char* bounds_vz_y(size_t N) const { return (char*)((char*)this+6+19*N); } - __forceinline int8_t* bounds_vz_z(size_t N) { return (int8_t*)((int8_t*)this+6+20*N); } - __forceinline const int8_t* bounds_vz_z(size_t N) const { return (int8_t*)((int8_t*)this+6+20*N); } + __forceinline char* bounds_vz_z(size_t N) { return (char*)((char*)this+6+20*N); } + __forceinline const char* bounds_vz_z(size_t N) const { return (char*)((char*)this+6+20*N); } - __forceinline short* bounds_vz_lower(size_t N) { return (short*)((int8_t*)this+6+21*N); } - __forceinline const short* bounds_vz_lower(size_t N) const { return (short*)((int8_t*)this+6+21*N); } + __forceinline short* bounds_vz_lower(size_t N) { return (short*)((char*)this+6+21*N); } + __forceinline const short* bounds_vz_lower(size_t N) const { return (short*)((char*)this+6+21*N); } - __forceinline short* bounds_vz_upper(size_t N) { return (short*)((int8_t*)this+6+23*N); } - __forceinline const short* bounds_vz_upper(size_t N) const { return (short*)((int8_t*)this+6+23*N); } + __forceinline short* bounds_vz_upper(size_t N) { return (short*)((char*)this+6+23*N); } + __forceinline const short* bounds_vz_upper(size_t N) const { return (short*)((char*)this+6+23*N); } - __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((int8_t*)this+6+25*N); } - __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((int8_t*)this+6+25*N); } + __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((char*)this+6+25*N); } + __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((char*)this+6+25*N); } - __forceinline float* scale(size_t N) { return (float*)((int8_t*)this+6+25*N+12); } - __forceinline const float* scale(size_t N) const { return (float*)((int8_t*)this+6+25*N+12); } + __forceinline float* scale(size_t N) { return (float*)((char*)this+6+25*N+12); } + __forceinline const float* scale(size_t N) const { return (float*)((char*)this+6+25*N+12); } - __forceinline int8_t* end(size_t N) { return (int8_t*)this+6+25*N+16; } - __forceinline const int8_t* end(size_t N) const { return (int8_t*)this+6+25*N+16; } + __forceinline char* end(size_t N) { return (char*)this+6+25*N+16; } + __forceinline const char* end(size_t N) const { return (char*)this+6+25*N+16; } }; template<int M> diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h b/thirdparty/embree/kernels/geometry/curveNi_intersector.h index 0f9038c9fc..c0b66515c1 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_intersector.h +++ b/thirdparty/embree/kernels/geometry/curveNi_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h b/thirdparty/embree/kernels/geometry/curveNi_mb.h index 0cd8f833fd..5d972b43a0 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb.h +++ b/thirdparty/embree/kernels/geometry/curveNi_mb.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -43,10 +43,10 @@ namespace embree __forceinline LBBox3fa fillMB(const PrimRefMB* prims, size_t& begin, size_t _end, Scene* scene, const BBox1f time_range) { size_t end = min(begin+M,_end); - N = (uint8_t)(end-begin); + N = (unsigned char)(end-begin); const unsigned int geomID0 = prims[begin].geomID(); this->geomID(N) = geomID0; - ty = (uint8_t) scene->get(geomID0)->getType(); + ty = (unsigned char) scene->get(geomID0)->getType(); /* encode all primitives */ LBBox3fa lbounds = empty; @@ -79,10 +79,10 @@ namespace embree const LinearSpace3fa space3(trunc(126.0f*space2.vx),trunc(126.0f*space2.vy),trunc(126.0f*space2.vz)); const LBBox3fa bounds = scene->get(geomID)->vlinearBounds(loffset,lscale,max(length(space3.vx),length(space3.vy),length(space3.vz)),space3.transposed(),primID,time_range); - // NOTE: this weird (int8_t) (short) cast works around VS2015 Win32 compiler bug - bounds_vx_x(N)[i] = (int8_t) (short) space3.vx.x; - bounds_vx_y(N)[i] = (int8_t) (short) space3.vx.y; - bounds_vx_z(N)[i] = (int8_t) (short) space3.vx.z; + // NOTE: this weird (char) (short) cast works around VS2015 Win32 compiler bug + bounds_vx_x(N)[i] = (char) (short) space3.vx.x; + bounds_vx_y(N)[i] = (char) (short) space3.vx.y; + bounds_vx_z(N)[i] = (char) (short) space3.vx.z; bounds_vx_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.x),-32767.0f,32767.0f); bounds_vx_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.x),-32767.0f,32767.0f); bounds_vx_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.x),-32767.0f,32767.0f); @@ -92,9 +92,9 @@ namespace embree assert(-32767.0f <= floor(bounds.bounds1.lower.x) && floor(bounds.bounds1.lower.x) <= 32767.0f); assert(-32767.0f <= ceil (bounds.bounds1.upper.x) && ceil (bounds.bounds1.upper.x) <= 32767.0f); - bounds_vy_x(N)[i] = (int8_t) (short) space3.vy.x; - bounds_vy_y(N)[i] = (int8_t) (short) space3.vy.y; - bounds_vy_z(N)[i] = (int8_t) (short) space3.vy.z; + bounds_vy_x(N)[i] = (char) (short) space3.vy.x; + bounds_vy_y(N)[i] = (char) (short) space3.vy.y; + bounds_vy_z(N)[i] = (char) (short) space3.vy.z; bounds_vy_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.y),-32767.0f,32767.0f); bounds_vy_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.y),-32767.0f,32767.0f); bounds_vy_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.y),-32767.0f,32767.0f); @@ -104,9 +104,9 @@ namespace embree assert(-32767.0f <= floor(bounds.bounds1.lower.y) && floor(bounds.bounds1.lower.y) <= 32767.0f); assert(-32767.0f <= ceil (bounds.bounds1.upper.y) && ceil (bounds.bounds1.upper.y) <= 32767.0f); - bounds_vz_x(N)[i] = (int8_t) (short) space3.vz.x; - bounds_vz_y(N)[i] = (int8_t) (short) space3.vz.y; - bounds_vz_z(N)[i] = (int8_t) (short) space3.vz.z; + bounds_vz_x(N)[i] = (char) (short) space3.vz.x; + bounds_vz_y(N)[i] = (char) (short) space3.vz.y; + bounds_vz_z(N)[i] = (char) (short) space3.vz.z; bounds_vz_lower0(N)[i] = (short) clamp(floor(bounds.bounds0.lower.z),-32767.0f,32767.0f); bounds_vz_upper0(N)[i] = (short) clamp(ceil (bounds.bounds0.upper.z),-32767.0f,32767.0f); bounds_vz_lower1(N)[i] = (short) clamp(floor(bounds.bounds1.lower.z),-32767.0f,32767.0f); @@ -130,7 +130,7 @@ namespace embree size_t items = CurveNiMB::blocks(prims.size()); size_t numbytes = CurveNiMB::bytes(prims.size()); CurveNiMB* accel = (CurveNiMB*) alloc.malloc1(numbytes,BVH::byteAlignment); - const typename BVH::NodeRef node = bvh->encodeLeaf((int8_t*)accel,items); + const typename BVH::NodeRef node = bvh->encodeLeaf((char*)accel,items); LBBox3fa bounds = empty; for (size_t i=0; i<items; i++) @@ -143,9 +143,9 @@ namespace embree public: // 27.6 - 46 bytes per primitive - uint8_t ty; - uint8_t N; - uint8_t data[4+37*M+24]; + unsigned char ty; + unsigned char N; + unsigned char data[4+37*M+24]; /* struct Layout @@ -153,25 +153,25 @@ namespace embree unsigned int geomID; unsigned int primID[N]; - int8_t bounds_vx_x[N]; - int8_t bounds_vx_y[N]; - int8_t bounds_vx_z[N]; + char bounds_vx_x[N]; + char bounds_vx_y[N]; + char bounds_vx_z[N]; short bounds_vx_lower0[N]; short bounds_vx_upper0[N]; short bounds_vx_lower1[N]; short bounds_vx_upper1[N]; - int8_t bounds_vy_x[N]; - int8_t bounds_vy_y[N]; - int8_t bounds_vy_z[N]; + char bounds_vy_x[N]; + char bounds_vy_y[N]; + char bounds_vy_z[N]; short bounds_vy_lower0[N]; short bounds_vy_upper0[N]; short bounds_vy_lower1[N]; short bounds_vy_upper1[N]; - int8_t bounds_vz_x[N]; - int8_t bounds_vz_y[N]; - int8_t bounds_vz_z[N]; + char bounds_vz_x[N]; + char bounds_vz_y[N]; + char bounds_vz_z[N]; short bounds_vz_lower0[N]; short bounds_vz_upper0[N]; short bounds_vz_lower1[N]; @@ -185,89 +185,89 @@ namespace embree }; */ - __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((int8_t*)this+2); } - __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((int8_t*)this+2); } + __forceinline unsigned int& geomID(size_t N) { return *(unsigned int*)((char*)this+2); } + __forceinline const unsigned int& geomID(size_t N) const { return *(unsigned int*)((char*)this+2); } - __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((int8_t*)this+6); } - __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((int8_t*)this+6); } + __forceinline unsigned int* primID(size_t N) { return (unsigned int*)((char*)this+6); } + __forceinline const unsigned int* primID(size_t N) const { return (unsigned int*)((char*)this+6); } - __forceinline int8_t* bounds_vx_x(size_t N) { return (int8_t*)((int8_t*)this+6+4*N); } - __forceinline const int8_t* bounds_vx_x(size_t N) const { return (int8_t*)((int8_t*)this+6+4*N); } + __forceinline char* bounds_vx_x(size_t N) { return (char*)((char*)this+6+4*N); } + __forceinline const char* bounds_vx_x(size_t N) const { return (char*)((char*)this+6+4*N); } - __forceinline int8_t* bounds_vx_y(size_t N) { return (int8_t*)((int8_t*)this+6+5*N); } - __forceinline const int8_t* bounds_vx_y(size_t N) const { return (int8_t*)((int8_t*)this+6+5*N); } + __forceinline char* bounds_vx_y(size_t N) { return (char*)((char*)this+6+5*N); } + __forceinline const char* bounds_vx_y(size_t N) const { return (char*)((char*)this+6+5*N); } - __forceinline int8_t* bounds_vx_z(size_t N) { return (int8_t*)((int8_t*)this+6+6*N); } - __forceinline const int8_t* bounds_vx_z(size_t N) const { return (int8_t*)((int8_t*)this+6+6*N); } + __forceinline char* bounds_vx_z(size_t N) { return (char*)((char*)this+6+6*N); } + __forceinline const char* bounds_vx_z(size_t N) const { return (char*)((char*)this+6+6*N); } - __forceinline short* bounds_vx_lower0(size_t N) { return (short*)((int8_t*)this+6+7*N); } - __forceinline const short* bounds_vx_lower0(size_t N) const { return (short*)((int8_t*)this+6+7*N); } + __forceinline short* bounds_vx_lower0(size_t N) { return (short*)((char*)this+6+7*N); } + __forceinline const short* bounds_vx_lower0(size_t N) const { return (short*)((char*)this+6+7*N); } - __forceinline short* bounds_vx_upper0(size_t N) { return (short*)((int8_t*)this+6+9*N); } - __forceinline const short* bounds_vx_upper0(size_t N) const { return (short*)((int8_t*)this+6+9*N); } + __forceinline short* bounds_vx_upper0(size_t N) { return (short*)((char*)this+6+9*N); } + __forceinline const short* bounds_vx_upper0(size_t N) const { return (short*)((char*)this+6+9*N); } - __forceinline short* bounds_vx_lower1(size_t N) { return (short*)((int8_t*)this+6+11*N); } - __forceinline const short* bounds_vx_lower1(size_t N) const { return (short*)((int8_t*)this+6+11*N); } + __forceinline short* bounds_vx_lower1(size_t N) { return (short*)((char*)this+6+11*N); } + __forceinline const short* bounds_vx_lower1(size_t N) const { return (short*)((char*)this+6+11*N); } - __forceinline short* bounds_vx_upper1(size_t N) { return (short*)((int8_t*)this+6+13*N); } - __forceinline const short* bounds_vx_upper1(size_t N) const { return (short*)((int8_t*)this+6+13*N); } + __forceinline short* bounds_vx_upper1(size_t N) { return (short*)((char*)this+6+13*N); } + __forceinline const short* bounds_vx_upper1(size_t N) const { return (short*)((char*)this+6+13*N); } - __forceinline int8_t* bounds_vy_x(size_t N) { return (int8_t*)((int8_t*)this+6+15*N); } - __forceinline const int8_t* bounds_vy_x(size_t N) const { return (int8_t*)((int8_t*)this+6+15*N); } + __forceinline char* bounds_vy_x(size_t N) { return (char*)((char*)this+6+15*N); } + __forceinline const char* bounds_vy_x(size_t N) const { return (char*)((char*)this+6+15*N); } - __forceinline int8_t* bounds_vy_y(size_t N) { return (int8_t*)((int8_t*)this+6+16*N); } - __forceinline const int8_t* bounds_vy_y(size_t N) const { return (int8_t*)((int8_t*)this+6+16*N); } + __forceinline char* bounds_vy_y(size_t N) { return (char*)((char*)this+6+16*N); } + __forceinline const char* bounds_vy_y(size_t N) const { return (char*)((char*)this+6+16*N); } - __forceinline int8_t* bounds_vy_z(size_t N) { return (int8_t*)((int8_t*)this+6+17*N); } - __forceinline const int8_t* bounds_vy_z(size_t N) const { return (int8_t*)((int8_t*)this+6+17*N); } + __forceinline char* bounds_vy_z(size_t N) { return (char*)((char*)this+6+17*N); } + __forceinline const char* bounds_vy_z(size_t N) const { return (char*)((char*)this+6+17*N); } - __forceinline short* bounds_vy_lower0(size_t N) { return (short*)((int8_t*)this+6+18*N); } - __forceinline const short* bounds_vy_lower0(size_t N) const { return (short*)((int8_t*)this+6+18*N); } + __forceinline short* bounds_vy_lower0(size_t N) { return (short*)((char*)this+6+18*N); } + __forceinline const short* bounds_vy_lower0(size_t N) const { return (short*)((char*)this+6+18*N); } - __forceinline short* bounds_vy_upper0(size_t N) { return (short*)((int8_t*)this+6+20*N); } - __forceinline const short* bounds_vy_upper0(size_t N) const { return (short*)((int8_t*)this+6+20*N); } + __forceinline short* bounds_vy_upper0(size_t N) { return (short*)((char*)this+6+20*N); } + __forceinline const short* bounds_vy_upper0(size_t N) const { return (short*)((char*)this+6+20*N); } - __forceinline short* bounds_vy_lower1(size_t N) { return (short*)((int8_t*)this+6+22*N); } - __forceinline const short* bounds_vy_lower1(size_t N) const { return (short*)((int8_t*)this+6+22*N); } + __forceinline short* bounds_vy_lower1(size_t N) { return (short*)((char*)this+6+22*N); } + __forceinline const short* bounds_vy_lower1(size_t N) const { return (short*)((char*)this+6+22*N); } - __forceinline short* bounds_vy_upper1(size_t N) { return (short*)((int8_t*)this+6+24*N); } - __forceinline const short* bounds_vy_upper1(size_t N) const { return (short*)((int8_t*)this+6+24*N); } + __forceinline short* bounds_vy_upper1(size_t N) { return (short*)((char*)this+6+24*N); } + __forceinline const short* bounds_vy_upper1(size_t N) const { return (short*)((char*)this+6+24*N); } - __forceinline int8_t* bounds_vz_x(size_t N) { return (int8_t*)((int8_t*)this+6+26*N); } - __forceinline const int8_t* bounds_vz_x(size_t N) const { return (int8_t*)((int8_t*)this+6+26*N); } + __forceinline char* bounds_vz_x(size_t N) { return (char*)((char*)this+6+26*N); } + __forceinline const char* bounds_vz_x(size_t N) const { return (char*)((char*)this+6+26*N); } - __forceinline int8_t* bounds_vz_y(size_t N) { return (int8_t*)((int8_t*)this+6+27*N); } - __forceinline const int8_t* bounds_vz_y(size_t N) const { return (int8_t*)((int8_t*)this+6+27*N); } + __forceinline char* bounds_vz_y(size_t N) { return (char*)((char*)this+6+27*N); } + __forceinline const char* bounds_vz_y(size_t N) const { return (char*)((char*)this+6+27*N); } - __forceinline int8_t* bounds_vz_z(size_t N) { return (int8_t*)((int8_t*)this+6+28*N); } - __forceinline const int8_t* bounds_vz_z(size_t N) const { return (int8_t*)((int8_t*)this+6+28*N); } + __forceinline char* bounds_vz_z(size_t N) { return (char*)((char*)this+6+28*N); } + __forceinline const char* bounds_vz_z(size_t N) const { return (char*)((char*)this+6+28*N); } - __forceinline short* bounds_vz_lower0(size_t N) { return (short*)((int8_t*)this+6+29*N); } - __forceinline const short* bounds_vz_lower0(size_t N) const { return (short*)((int8_t*)this+6+29*N); } + __forceinline short* bounds_vz_lower0(size_t N) { return (short*)((char*)this+6+29*N); } + __forceinline const short* bounds_vz_lower0(size_t N) const { return (short*)((char*)this+6+29*N); } - __forceinline short* bounds_vz_upper0(size_t N) { return (short*)((int8_t*)this+6+31*N); } - __forceinline const short* bounds_vz_upper0(size_t N) const { return (short*)((int8_t*)this+6+31*N); } + __forceinline short* bounds_vz_upper0(size_t N) { return (short*)((char*)this+6+31*N); } + __forceinline const short* bounds_vz_upper0(size_t N) const { return (short*)((char*)this+6+31*N); } - __forceinline short* bounds_vz_lower1(size_t N) { return (short*)((int8_t*)this+6+33*N); } - __forceinline const short* bounds_vz_lower1(size_t N) const { return (short*)((int8_t*)this+6+33*N); } + __forceinline short* bounds_vz_lower1(size_t N) { return (short*)((char*)this+6+33*N); } + __forceinline const short* bounds_vz_lower1(size_t N) const { return (short*)((char*)this+6+33*N); } - __forceinline short* bounds_vz_upper1(size_t N) { return (short*)((int8_t*)this+6+35*N); } - __forceinline const short* bounds_vz_upper1(size_t N) const { return (short*)((int8_t*)this+6+35*N); } + __forceinline short* bounds_vz_upper1(size_t N) { return (short*)((char*)this+6+35*N); } + __forceinline const short* bounds_vz_upper1(size_t N) const { return (short*)((char*)this+6+35*N); } - __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((int8_t*)this+6+37*N); } - __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((int8_t*)this+6+37*N); } + __forceinline Vec3f* offset(size_t N) { return (Vec3f*)((char*)this+6+37*N); } + __forceinline const Vec3f* offset(size_t N) const { return (Vec3f*)((char*)this+6+37*N); } - __forceinline float* scale(size_t N) { return (float*)((int8_t*)this+6+37*N+12); } - __forceinline const float* scale(size_t N) const { return (float*)((int8_t*)this+6+37*N+12); } + __forceinline float* scale(size_t N) { return (float*)((char*)this+6+37*N+12); } + __forceinline const float* scale(size_t N) const { return (float*)((char*)this+6+37*N+12); } - __forceinline float& time_offset(size_t N) { return *(float*)((int8_t*)this+6+37*N+16); } - __forceinline const float& time_offset(size_t N) const { return *(float*)((int8_t*)this+6+37*N+16); } + __forceinline float& time_offset(size_t N) { return *(float*)((char*)this+6+37*N+16); } + __forceinline const float& time_offset(size_t N) const { return *(float*)((char*)this+6+37*N+16); } - __forceinline float& time_scale(size_t N) { return *(float*)((int8_t*)this+6+37*N+20); } - __forceinline const float& time_scale(size_t N) const { return *(float*)((int8_t*)this+6+37*N+20); } + __forceinline float& time_scale(size_t N) { return *(float*)((char*)this+6+37*N+20); } + __forceinline const float& time_scale(size_t N) const { return *(float*)((char*)this+6+37*N+20); } - __forceinline int8_t* end(size_t N) { return (int8_t*)this+6+37*N+24; } - __forceinline const int8_t* end(size_t N) const { return (int8_t*)this+6+37*N+24; } + __forceinline char* end(size_t N) { return (char*)this+6+37*N+24; } + __forceinline const char* end(size_t N) const { return (char*)this+6+37*N+24; } }; template<int M> diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h b/thirdparty/embree/kernels/geometry/curveNi_mb_intersector.h index 0cbc764668..bab796b33b 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNi_mb_intersector.h +++ b/thirdparty/embree/kernels/geometry/curveNi_mb_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNv.h b/thirdparty/embree/kernels/geometry/curveNv.h index 6eb5e30b39..e41a381706 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNv.h +++ b/thirdparty/embree/kernels/geometry/curveNv.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h b/thirdparty/embree/kernels/geometry/curveNv_intersector.h index e20da2882e..2742725aec 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curveNv_intersector.h +++ b/thirdparty/embree/kernels/geometry/curveNv_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h b/thirdparty/embree/kernels/geometry/curve_intersector.h index 204958f7cc..1e8ac26125 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector.h +++ b/thirdparty/embree/kernels/geometry/curve_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -23,8 +23,8 @@ namespace embree typedef unsigned char Primitive; typedef CurvePrecalculations1 Precalculations; - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { assert(num == 1); RTCGeometryType ty = (RTCGeometryType)(*prim); @@ -33,8 +33,8 @@ namespace embree leafIntersector.intersect<1>(&pre,&ray,context,prim); } - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { assert(num == 1); RTCGeometryType ty = (RTCGeometryType)(*prim); diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h b/thirdparty/embree/kernels/geometry/curve_intersector_distance.h index 343cc8ff28..748a9511a5 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_distance.h +++ b/thirdparty/embree/kernels/geometry/curve_intersector_distance.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h b/thirdparty/embree/kernels/geometry/curve_intersector_oriented.h index 47531027fc..3d8900c2aa 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_oriented.h +++ b/thirdparty/embree/kernels/geometry/curve_intersector_oriented.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h b/thirdparty/embree/kernels/geometry/curve_intersector_precalculations.h index 6e9fc91925..de6b70be1b 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_precalculations.h +++ b/thirdparty/embree/kernels/geometry/curve_intersector_precalculations.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h b/thirdparty/embree/kernels/geometry/curve_intersector_ribbon.h index a99cf99d56..c3272e99fd 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_ribbon.h +++ b/thirdparty/embree/kernels/geometry/curve_intersector_ribbon.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -32,9 +32,11 @@ namespace embree __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } __forceinline float t (const size_t i) const { return vt[i]; } - __forceinline Vec3fa Ng(const size_t i) const { - return curve3D.eval_du(vu[i]); - } + __forceinline Vec3fa Ng(const size_t i) const { return curve3D.eval_du(vu[i]); } + + __forceinline Vec2vf<M> uv() const { return Vec2vf<M>(vu,vv); } + __forceinline vfloat<M> t () const { return vt; } + __forceinline Vec3vf<M> Ng() const { return (Vec3vf<M>) curve3D.template veval_du<M>(vu); } public: vfloat<M> U; @@ -98,7 +100,7 @@ namespace embree const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1)); vfloatx vu,vv,vt; - vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt); + vboolx valid0 = intersect_quad_backface_culling<VSIZEX>(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt); if (any(valid0)) { @@ -143,7 +145,7 @@ namespace embree const Vec3vfx up1 = nmadd(p1.w,nn1,Vec3vfx(p1)); vfloatx vu,vv,vt; - vboolx valid0 = intersect_quad_backface_culling(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt); + vboolx valid0 = intersect_quad_backface_culling<VSIZEX>(valid,zero,Vec3fa(0,0,1),ray_tnear,ray_tfar,lp0,lp1,up1,up0,vu,vv,vt); if (any(valid0)) { diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h b/thirdparty/embree/kernels/geometry/curve_intersector_sweep.h index 883cedc3d2..2d4abd73ac 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_sweep.h +++ b/thirdparty/embree/kernels/geometry/curve_intersector_sweep.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -137,10 +137,12 @@ namespace embree float u0, float u1, unsigned int depth, const Epilog& epilog) { #if defined(__AVX__) + enum { VSIZEX_ = 8 }; typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues typedef vint8 vintx; typedef vfloat8 vfloatx; #else + enum { VSIZEX_ = 4 }; typedef vbool4 vboolx; typedef vint4 vintx; typedef vfloat4 vfloatx; @@ -192,7 +194,7 @@ namespace embree /* subdivide curve */ const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1))); const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1))); - Vec4vfx P0, dP0du; curve.veval(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale); + Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale); const Vec4vfx P3 = shift_right_1(P0); const Vec4vfx dP3du = shift_right_1(dP0du); const Vec4vfx P1 = P0 + dP0du; diff --git a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h b/thirdparty/embree/kernels/geometry/curve_intersector_virtual.h index e1f4238130..cffa8e46ad 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/curve_intersector_virtual.h +++ b/thirdparty/embree/kernels/geometry/curve_intersector_virtual.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -97,8 +97,8 @@ namespace embree typedef unsigned char Primitive; typedef CurvePrecalculations1 Precalculations; - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { assert(num == 1); RTCGeometryType ty = (RTCGeometryType)(*prim); @@ -107,8 +107,8 @@ namespace embree leafIntersector.intersect<1>(&pre,&ray,context,prim); } - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { assert(num == 1); RTCGeometryType ty = (RTCGeometryType)(*prim); @@ -152,8 +152,8 @@ namespace embree return valid_o; } - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { assert(num == 1); RTCGeometryType ty = (RTCGeometryType)(*prim); @@ -162,8 +162,8 @@ namespace embree leafIntersector.intersect<K>(&pre,&ray,k,context,prim); } - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { assert(num == 1); RTCGeometryType ty = (RTCGeometryType)(*prim); @@ -177,17 +177,17 @@ namespace embree static VirtualCurveIntersector::Intersectors LinearRoundConeNiIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -196,17 +196,17 @@ namespace embree static VirtualCurveIntersector::Intersectors LinearConeNiIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -215,17 +215,17 @@ namespace embree static VirtualCurveIntersector::Intersectors LinearRoundConeNiMBIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiMBIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &RoundLinearCurveMiMBIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &RoundLinearCurveMiMBIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &RoundLinearCurveMiMBIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &RoundLinearCurveMiMBIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiMBIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&RoundLinearCurveMiMBIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &RoundLinearCurveMiMBIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiMBIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&RoundLinearCurveMiMBIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &RoundLinearCurveMiMBIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -234,17 +234,17 @@ namespace embree static VirtualCurveIntersector::Intersectors LinearConeNiMBIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiMBIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &ConeCurveMiMBIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &ConeCurveMiMBIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &ConeCurveMiMBIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &ConeCurveMiMBIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiMBIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&ConeCurveMiMBIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &ConeCurveMiMBIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiMBIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&ConeCurveMiMBIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &ConeCurveMiMBIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -254,17 +254,17 @@ namespace embree static VirtualCurveIntersector::Intersectors LinearRibbonNiIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -273,17 +273,17 @@ namespace embree static VirtualCurveIntersector::Intersectors LinearRibbonNiMBIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiMBIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &FlatLinearCurveMiMBIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &FlatLinearCurveMiMBIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &FlatLinearCurveMiMBIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &FlatLinearCurveMiMBIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiMBIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&FlatLinearCurveMiMBIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &FlatLinearCurveMiMBIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiMBIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&FlatLinearCurveMiMBIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &FlatLinearCurveMiMBIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -292,17 +292,17 @@ namespace embree static VirtualCurveIntersector::Intersectors SphereNiIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -311,17 +311,17 @@ namespace embree static VirtualCurveIntersector::Intersectors SphereNiMBIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiMBIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &SphereMiMBIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &SphereMiMBIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &SphereMiMBIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &SphereMiMBIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiMBIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&SphereMiMBIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &SphereMiMBIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiMBIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&SphereMiMBIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &SphereMiMBIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -330,17 +330,17 @@ namespace embree static VirtualCurveIntersector::Intersectors DiscNiIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -349,17 +349,17 @@ namespace embree static VirtualCurveIntersector::Intersectors DiscNiMBIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiMBIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &DiscMiMBIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &DiscMiMBIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &DiscMiMBIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &DiscMiMBIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiMBIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&DiscMiMBIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &DiscMiMBIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiMBIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&DiscMiMBIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &DiscMiMBIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -368,17 +368,17 @@ namespace embree static VirtualCurveIntersector::Intersectors OrientedDiscNiIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiIntersectorK<N,16,true>::occluded; #endif return intersectors; } @@ -387,17 +387,17 @@ namespace embree static VirtualCurveIntersector::Intersectors OrientedDiscNiMBIntersectors() { VirtualCurveIntersector::Intersectors intersectors; - intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiMBIntersector1<N,N,true>::intersect; - intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiMBIntersector1<N,N,true>::occluded; - intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiMBIntersectorK<N,N,4,true>::intersect; - intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiMBIntersectorK<N,N,4,true>::occluded; + intersectors.intersect1 = (VirtualCurveIntersector::Intersect1Ty) &OrientedDiscMiMBIntersector1<N,true>::intersect; + intersectors.occluded1 = (VirtualCurveIntersector::Occluded1Ty) &OrientedDiscMiMBIntersector1<N,true>::occluded; + intersectors.intersect4 = (VirtualCurveIntersector::Intersect4Ty) &OrientedDiscMiMBIntersectorK<N,4,true>::intersect; + intersectors.occluded4 = (VirtualCurveIntersector::Occluded4Ty) &OrientedDiscMiMBIntersectorK<N,4,true>::occluded; #if defined(__AVX__) - intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiMBIntersectorK<N,N,8,true>::intersect; - intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiMBIntersectorK<N,N,8,true>::occluded; + intersectors.intersect8 = (VirtualCurveIntersector::Intersect8Ty)&OrientedDiscMiMBIntersectorK<N,8,true>::intersect; + intersectors.occluded8 = (VirtualCurveIntersector::Occluded8Ty) &OrientedDiscMiMBIntersectorK<N,8,true>::occluded; #endif #if defined(__AVX512F__) - intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiMBIntersectorK<N,N,16,true>::intersect; - intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiMBIntersectorK<N,N,16,true>::occluded; + intersectors.intersect16 = (VirtualCurveIntersector::Intersect16Ty)&OrientedDiscMiMBIntersectorK<N,16,true>::intersect; + intersectors.occluded16 = (VirtualCurveIntersector::Occluded16Ty) &OrientedDiscMiMBIntersectorK<N,16,true>::occluded; #endif return intersectors; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/cylinder.h b/thirdparty/embree/kernels/geometry/cylinder.h index 39a582864c..dab02989ce 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/cylinder.h +++ b/thirdparty/embree/kernels/geometry/cylinder.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h b/thirdparty/embree/kernels/geometry/disc_intersector.h index e8305780e5..816c066899 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/disc_intersector.h +++ b/thirdparty/embree/kernels/geometry/disc_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -64,7 +64,7 @@ namespace embree const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir)); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); const Vec3vf<M> center = v0.xyz(); const vfloat<M> radius = v0.w; @@ -101,7 +101,7 @@ namespace embree vbool<M> valid = valid_i; const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); const Vec3vf<M> center = v0.xyz(); const vfloat<M> radius = v0.w; @@ -148,7 +148,7 @@ namespace embree const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir)); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); const Vec3vf<M> center = v0.xyz(); const vfloat<M> radius = v0.w; @@ -187,7 +187,7 @@ namespace embree const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); const Vec3vf<M> center = v0.xyz(); const vfloat<M> radius = v0.w; diff --git a/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h b/thirdparty/embree/kernels/geometry/disci_intersector.h index e1dc3aa98e..bb9d396f6e 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/disci_intersector.h +++ b/thirdparty/embree/kernels/geometry/disci_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -11,7 +11,7 @@ namespace embree { namespace isa { - template<int M, int Mx, bool filter> + template<int M, bool filter> struct DiscMiIntersector1 { typedef PointMi<M> Primitive; @@ -25,9 +25,9 @@ namespace embree STAT3(normal.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Disc.gather(v0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + const vbool<M> valid = Disc.valid(); + DiscIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID())); } static __forceinline bool occluded(const Precalculations& pre, @@ -38,13 +38,13 @@ namespace embree STAT3(shadow.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Disc.gather(v0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + const vbool<M> valid = Disc.valid(); + return DiscIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID())); } }; - template<int M, int Mx, bool filter> + template<int M, bool filter> struct DiscMiMBIntersector1 { typedef PointMi<M> Primitive; @@ -58,9 +58,9 @@ namespace embree STAT3(normal.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + const vbool<M> valid = Disc.valid(); + DiscIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID())); } static __forceinline bool occluded(const Precalculations& pre, @@ -71,13 +71,13 @@ namespace embree STAT3(shadow.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + const vbool<M> valid = Disc.valid(); + return DiscIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID())); } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct DiscMiIntersectorK { typedef PointMi<M> Primitive; @@ -89,10 +89,10 @@ namespace embree STAT3(normal.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Disc.gather(v0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersectorK<Mx, K>::intersect( + const vbool<M> valid = Disc.valid(); + DiscIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + Intersect1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); } static __forceinline bool occluded( @@ -101,14 +101,14 @@ namespace embree STAT3(shadow.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Disc.gather(v0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersectorK<Mx, K>::intersect( + const vbool<M> valid = Disc.valid(); + return DiscIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, - Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + Occluded1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct DiscMiMBIntersectorK { typedef PointMi<M> Primitive; @@ -120,10 +120,10 @@ namespace embree STAT3(normal.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersectorK<Mx, K>::intersect( + const vbool<M> valid = Disc.valid(); + DiscIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + Intersect1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); } static __forceinline bool occluded( @@ -132,13 +132,13 @@ namespace embree STAT3(shadow.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Disc.gather(v0, geom, ray.time()[k]); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersectorK<Mx, K>::intersect( - valid, ray, k, context, geom, pre, v0, Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + const vbool<M> valid = Disc.valid(); + return DiscIntersectorK<M, K>::intersect( + valid, ray, k, context, geom, pre, v0, Occluded1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); } }; - template<int M, int Mx, bool filter> + template<int M, bool filter> struct OrientedDiscMiIntersector1 { typedef PointMi<M> Primitive; @@ -153,9 +153,9 @@ namespace embree const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Vec3vf<M> n0; Disc.gather(v0, n0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + const vbool<M> valid = Disc.valid(); + DiscIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID())); } static __forceinline bool occluded(const Precalculations& pre, @@ -167,13 +167,13 @@ namespace embree const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Vec3vf<M> n0; Disc.gather(v0, n0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + const vbool<M> valid = Disc.valid(); + return DiscIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID())); } }; - template<int M, int Mx, bool filter> + template<int M, bool filter> struct OrientedDiscMiMBIntersector1 { typedef PointMi<M> Primitive; @@ -188,9 +188,9 @@ namespace embree const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Vec3vf<M> n0; Disc.gather(v0, n0, geom, ray.time()); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + const vbool<M> valid = Disc.valid(); + DiscIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, n0, Intersect1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID())); } static __forceinline bool occluded(const Precalculations& pre, @@ -202,13 +202,13 @@ namespace embree const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Vec3vf<M> n0; Disc.gather(v0, n0, geom, ray.time()); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, Mx, filter>(ray, context, Disc.geomID(), Disc.primID())); + const vbool<M> valid = Disc.valid(); + return DiscIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, n0, Occluded1EpilogM<M, filter>(ray, context, Disc.geomID(), Disc.primID())); } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct OrientedDiscMiIntersectorK { typedef PointMi<M> Primitive; @@ -221,10 +221,10 @@ namespace embree const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Vec3vf<M> n0; Disc.gather(v0, n0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersectorK<Mx, K>::intersect( + const vbool<M> valid = Disc.valid(); + DiscIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, n0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + Intersect1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); } static __forceinline bool occluded( @@ -234,14 +234,14 @@ namespace embree const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Vec3vf<M> n0; Disc.gather(v0, n0, geom); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersectorK<Mx, K>::intersect( + const vbool<M> valid = Disc.valid(); + return DiscIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, n0, - Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + Occluded1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct OrientedDiscMiMBIntersectorK { typedef PointMi<M> Primitive; @@ -254,10 +254,10 @@ namespace embree const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Vec3vf<M> n0; Disc.gather(v0, n0, geom, ray.time()[k]); - const vbool<Mx> valid = Disc.template valid<Mx>(); - DiscIntersectorK<Mx, K>::intersect( + const vbool<M> valid = Disc.valid(); + DiscIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, n0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + Intersect1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); } static __forceinline bool occluded( @@ -267,10 +267,10 @@ namespace embree const Points* geom = context->scene->get<Points>(Disc.geomID()); Vec4vf<M> v0; Vec3vf<M> n0; Disc.gather(v0, n0, geom, ray.time()[k]); - const vbool<Mx> valid = Disc.template valid<Mx>(); - return DiscIntersectorK<Mx, K>::intersect( + const vbool<M> valid = Disc.valid(); + return DiscIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, n0, - Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); + Occluded1KEpilogM<M, K, filter>(ray, k, context, Disc.geomID(), Disc.primID())); } }; } // namespace isa diff --git a/thirdparty/embree-aarch64/kernels/geometry/filter.h b/thirdparty/embree/kernels/geometry/filter.h index 4cdf7a395a..3b4d924ea7 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/filter.h +++ b/thirdparty/embree/kernels/geometry/filter.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h b/thirdparty/embree/kernels/geometry/grid_intersector.h index 46a0af0827..9c59cef119 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/grid_intersector.h +++ b/thirdparty/embree/kernels/geometry/grid_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h b/thirdparty/embree/kernels/geometry/grid_soa.h index d3b275586c..cea90aedf6 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa.h +++ b/thirdparty/embree/kernels/geometry/grid_soa.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -41,7 +41,7 @@ namespace embree } const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float); size_t rootBytes = time_steps*sizeof(BVH4::NodeRef); -#if !defined(__X86_64__) && !defined(__aarch64__) +#if !defined(__64BIT__) rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding. #endif void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes); @@ -62,8 +62,8 @@ namespace embree __forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; } /*! returns pointer to BVH array */ - __forceinline int8_t* bvhData() { return &data[0]; } - __forceinline const int8_t* bvhData() const { return &data[0]; } + __forceinline char* bvhData() { return &data[0]; } + __forceinline const char* bvhData() const { return &data[0]; } /*! returns pointer to Grid array */ __forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; } @@ -132,7 +132,7 @@ namespace embree __forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines) : grid_uv(grid_uv), line_offset(line_offset), lines(lines) {} - __forceinline void operator() (vfloat& u, vfloat& v) const { + __forceinline void operator() (vfloat& u, vfloat& v, Vec3<vfloat>& Ng) const { const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines); const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]); const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]); @@ -253,7 +253,7 @@ namespace embree public: BVH4::NodeRef troot; -#if !defined(__X86_64__) && !defined(__aarch64__) +#if !defined(__64BIT__) unsigned align1; #endif unsigned time_steps; @@ -269,7 +269,7 @@ namespace embree unsigned gridBytes; unsigned rootOffset; - int8_t data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots + char data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots }; } } diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h b/thirdparty/embree/kernels/geometry/grid_soa_intersector1.h index 2ed922a5ae..8fbf0d4bdf 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector1.h +++ b/thirdparty/embree/kernels/geometry/grid_soa_intersector1.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h b/thirdparty/embree/kernels/geometry/grid_soa_intersector_packet.h index 41d66e1e28..14cacab5fe 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/grid_soa_intersector_packet.h +++ b/thirdparty/embree/kernels/geometry/grid_soa_intersector_packet.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -20,7 +20,7 @@ namespace embree __forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11) : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {} - __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const { + __forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const { const vfloat<K> uv00(grid_uv[ofs00]); const vfloat<K> uv01(grid_uv[ofs01]); const vfloat<K> uv10(grid_uv[ofs10]); @@ -42,7 +42,7 @@ namespace embree __forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11) : grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {} - __forceinline void operator() (vfloat<K>& u, vfloat<K>& v) const { + __forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const { const vfloat<K> uv00(grid_uv[ofs00]); const vfloat<K> uv01(grid_uv[ofs01]); const vfloat<K> uv10(grid_uv[ofs10]); @@ -222,7 +222,7 @@ namespace embree static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) { vfloat<K> vftime; - vint<K> vitime = getTimeSegment(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime); + vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime); vbool<K> valid1 = valid_i; while (any(valid1)) { @@ -282,7 +282,7 @@ namespace embree static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, IntersectContext* context, const Primitive* prim, size_t& lazy_node) { vfloat<K> vftime; - vint<K> vitime = getTimeSegment(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime); + vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime); vbool<K> valid_o = valid_i; vbool<K> valid1 = valid_i; diff --git a/thirdparty/embree-aarch64/kernels/geometry/instance.h b/thirdparty/embree/kernels/geometry/instance.h index 66893d581f..7c0e7e0f49 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/instance.h +++ b/thirdparty/embree/kernels/geometry/instance.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h b/thirdparty/embree/kernels/geometry/instance_intersector.h index 91731a39c5..28a7b728e5 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/instance_intersector.h +++ b/thirdparty/embree/kernels/geometry/instance_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h b/thirdparty/embree/kernels/geometry/intersector_epilog.h index 0df49dd6e9..7bf134cc54 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/intersector_epilog.h +++ b/thirdparty/embree/kernels/geometry/intersector_epilog.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -13,7 +13,7 @@ namespace embree { template<int M> struct UVIdentity { - __forceinline void operator() (vfloat<M>& u, vfloat<M>& v) const {} + __forceinline void operator() (vfloat<M>& u, vfloat<M>& v, Vec3vf<M>& Ng) const {} }; @@ -63,7 +63,7 @@ namespace embree ray.v = hit.v; ray.primID = primID; ray.geomID = geomID; - instance_id_stack::copy(context->user->instID, ray.instID); + instance_id_stack::copy_UU(context->user->instID, ray.instID); return true; } }; @@ -162,7 +162,7 @@ namespace embree ray.v[k] = hit.v; ray.primID[k] = primID; ray.geomID[k] = geomID; - instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k); + instance_id_stack::copy_UV<K>(context->user->instID, ray.instID, k); return true; } }; @@ -211,7 +211,7 @@ namespace embree } }; - template<int M, int Mx, bool filter> + template<int M, bool filter> struct Intersect1EpilogM { RayHit& ray; @@ -226,11 +226,10 @@ namespace embree : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {} template<typename Hit> - __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const + __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const { Scene* scene MAYBE_UNUSED = context->scene; - vbool<Mx> valid = valid_i; - if (Mx > M) valid &= (1<<M)-1; + vbool<M> valid = valid_i; hit.finalize(); size_t i = select_min(valid,hit.vt); unsigned int geomID = geomIDs[i]; @@ -287,94 +286,13 @@ namespace embree ray.v = uv.y; ray.primID = primIDs[i]; ray.geomID = geomID; - instance_id_stack::copy(context->user->instID, ray.instID); + instance_id_stack::copy_UU(context->user->instID, ray.instID); return true; } }; -#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4 template<int M, bool filter> - struct Intersect1EpilogM<M,16,filter> - { - static const size_t Mx = 16; - RayHit& ray; - IntersectContext* context; - const vuint<M>& geomIDs; - const vuint<M>& primIDs; - - __forceinline Intersect1EpilogM(RayHit& ray, - IntersectContext* context, - const vuint<M>& geomIDs, - const vuint<M>& primIDs) - : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {} - - template<typename Hit> - __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const - { - Scene* MAYBE_UNUSED scene = context->scene; - vbool<Mx> valid = valid_i; - if (Mx > M) valid &= (1<<M)-1; - hit.finalize(); - size_t i = select_min(valid,hit.vt); - unsigned int geomID = geomIDs[i]; - - /* intersection filter test */ -#if defined(EMBREE_FILTER_FUNCTION) || defined(EMBREE_RAY_MASK) - bool foundhit = false; - goto entry; - while (true) - { - if (unlikely(none(valid))) return foundhit; - i = select_min(valid,hit.vt); - - geomID = geomIDs[i]; - entry: - Geometry* geometry MAYBE_UNUSED = scene->get(geomID); - -#if defined(EMBREE_RAY_MASK) - /* goto next hit if mask test fails */ - if ((geometry->mask & ray.mask) == 0) { - clear(valid,i); - continue; - } -#endif - -#if defined(EMBREE_FILTER_FUNCTION) - /* call intersection filter function */ - if (filter) { - if (unlikely(context->hasContextFilter() || geometry->hasIntersectionFilter())) { - const Vec2f uv = hit.uv(i); - HitK<1> h(context->user,geomID,primIDs[i],uv.x,uv.y,hit.Ng(i)); - const float old_t = ray.tfar; - ray.tfar = hit.t(i); - const bool found = runIntersectionFilter1(geometry,ray,context,h); - if (!found) ray.tfar = old_t; - foundhit |= found; - clear(valid,i); - valid &= hit.vt <= ray.tfar; // intersection filters may modify tfar value - continue; - } - } -#endif - break; - } -#endif - - vbool<Mx> finalMask(((unsigned int)1 << i)); - ray.update(finalMask,hit.vt,hit.vu,hit.vv,hit.vNg.x,hit.vNg.y,hit.vNg.z,geomID,primIDs); - instance_id_stack::foreach([&](unsigned level) - { - ray.instID[level] = context->user->instID[level]; - return (context->user->instID[level] != RTC_INVALID_GEOMETRY_ID); - }); - return true; - - } - }; -#endif - - template<int M, int Mx, bool filter> struct Occluded1EpilogM { Ray& ray; @@ -389,7 +307,7 @@ namespace embree : ray(ray), context(context), geomIDs(geomIDs), primIDs(primIDs) {} template<typename Hit> - __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const + __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const { Scene* scene MAYBE_UNUSED = context->scene; /* intersection filter test */ @@ -397,8 +315,7 @@ namespace embree if (unlikely(filter)) hit.finalize(); /* called only once */ - vbool<Mx> valid = valid_i; - if (Mx > M) valid &= (1<<M)-1; + vbool<M> valid = valid_i; size_t m=movemask(valid); goto entry; while (true) @@ -506,7 +423,7 @@ namespace embree ray.v = uv.y; ray.primID = primID; ray.geomID = geomID; - instance_id_stack::copy(context->user->instID, ray.instID); + instance_id_stack::copy_UU(context->user->instID, ray.instID); return true; } }; @@ -616,7 +533,7 @@ namespace embree vfloat<K>::store(valid,&ray.v,v); vuint<K>::store(valid,&ray.primID,primID); vuint<K>::store(valid,&ray.geomID,geomID); - instance_id_stack::copy<const unsigned*, vuint<K>*, const vbool<K>&>(context->user->instID, ray.instID, valid); + instance_id_stack::copy_UV<K>(context->user->instID, ray.instID, valid); return valid; } }; @@ -646,8 +563,8 @@ namespace embree /* ray masking test */ Scene* scene MAYBE_UNUSED = context->scene; - const unsigned int geomID = geomIDs[i]; - const unsigned int primID = primIDs[i]; + const unsigned int geomID MAYBE_UNUSED = geomIDs[i]; + const unsigned int primID MAYBE_UNUSED = primIDs[i]; Geometry* geometry MAYBE_UNUSED = scene->get(geomID); #if defined(EMBREE_RAY_MASK) valid &= (geometry->mask & ray.mask) != 0; @@ -731,8 +648,7 @@ namespace embree vfloat<K>::store(valid,&ray.v,v); vuint<K>::store(valid,&ray.primID,primID); vuint<K>::store(valid,&ray.geomID,geomID); - instance_id_stack::copy<const unsigned*, vuint<K>*, const vbool<K>&>(context->user->instID, ray.instID, valid); - + instance_id_stack::copy_UV<K>(context->user->instID, ray.instID, valid); return valid; } }; @@ -788,7 +704,7 @@ namespace embree } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct Intersect1KEpilogM { RayHitK<K>& ray; @@ -804,12 +720,11 @@ namespace embree : ray(ray), k(k), context(context), geomIDs(geomIDs), primIDs(primIDs) {} template<typename Hit> - __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const + __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const { Scene* scene MAYBE_UNUSED = context->scene; - vbool<Mx> valid = valid_i; + vbool<M> valid = valid_i; hit.finalize(); - if (Mx > M) valid &= (1<<M)-1; size_t i = select_min(valid,hit.vt); assert(i<M); unsigned int geomID = geomIDs[i]; @@ -858,9 +773,6 @@ namespace embree #endif assert(i<M); /* update hit information */ -#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4 - ray.updateK(i,k,hit.vt,hit.vu,hit.vv,vfloat<Mx>(hit.vNg.x),vfloat<Mx>(hit.vNg.y),vfloat<Mx>(hit.vNg.z),geomID,vuint<Mx>(primIDs)); -#else const Vec2f uv = hit.uv(i); ray.tfar[k] = hit.t(i); ray.Ng.x[k] = hit.vNg.x[i]; @@ -870,13 +782,12 @@ namespace embree ray.v[k] = uv.y; ray.primID[k] = primIDs[i]; ray.geomID[k] = geomID; - instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k); -#endif + instance_id_stack::copy_UV<K>(context->user->instID, ray.instID, k); return true; } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct Occluded1KEpilogM { RayK<K>& ray; @@ -892,7 +803,7 @@ namespace embree : ray(ray), k(k), context(context), geomIDs(geomIDs), primIDs(primIDs) {} template<typename Hit> - __forceinline bool operator() (const vbool<Mx>& valid_i, Hit& hit) const + __forceinline bool operator() (const vbool<M>& valid_i, Hit& hit) const { Scene* scene MAYBE_UNUSED = context->scene; @@ -901,8 +812,7 @@ namespace embree if (unlikely(filter)) hit.finalize(); /* called only once */ - vbool<Mx> valid = valid_i; - if (Mx > M) valid &= (1<<M)-1; + vbool<M> valid = valid_i; size_t m=movemask(valid); goto entry; while (true) @@ -1002,10 +912,6 @@ namespace embree #endif /* update hit information */ -#if 0 && defined(__AVX512F__) // do not enable, this reduced frequency for BVH4 - const Vec3fa Ng = hit.Ng(i); - ray.updateK(i,k,hit.vt,hit.vu,hit.vv,vfloat<M>(Ng.x),vfloat<M>(Ng.y),vfloat<M>(Ng.z),geomID,vuint<M>(primID)); -#else const Vec2f uv = hit.uv(i); const Vec3fa Ng = hit.Ng(i); ray.tfar[k] = hit.t(i); @@ -1016,8 +922,7 @@ namespace embree ray.v[k] = uv.y; ray.primID[k] = primID; ray.geomID[k] = geomID; - instance_id_stack::copy<const unsigned*, vuint<K>*, const size_t&>(context->user->instID, ray.instID, k); -#endif + instance_id_stack::copy_UV<K>(context->user->instID, ray.instID, k); return true; } }; diff --git a/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h b/thirdparty/embree/kernels/geometry/intersector_iterators.h index 5c1ba5cb61..9cac1cd25c 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/intersector_iterators.h +++ b/thirdparty/embree/kernels/geometry/intersector_iterators.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -19,15 +19,15 @@ namespace embree typedef typename Intersector::Primitive Primitive; typedef typename Intersector::Precalculations Precalculations; - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { for (size_t i=0; i<num; i++) Intersector::intersect(pre,ray,context,prim[i]); } - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { for (size_t i=0; i<num; i++) { if (Intersector::occluded(pre,ray,context,prim[i])) @@ -82,16 +82,16 @@ namespace embree return !valid0; } - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { for (size_t i=0; i<num; i++) { Intersector::intersect(pre,ray,k,context,prim[i]); } } - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { for (size_t i=0; i<num; i++) { if (Intersector::occluded(pre,ray,k,context,prim[i])) diff --git a/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h b/thirdparty/embree/kernels/geometry/line_intersector.h index eef5b0b1fd..41096d8794 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/line_intersector.h +++ b/thirdparty/embree/kernels/geometry/line_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -23,6 +23,10 @@ namespace embree __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } __forceinline float t (const size_t i) const { return vt[i]; } __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } + + __forceinline Vec2vf<M> uv() const { return Vec2vf<M>(vu,vv); } + __forceinline vfloat<M> t () const { return vt; } + __forceinline Vec3vf<M> Ng() const { return vNg; } public: vfloat<M> vu; @@ -36,7 +40,7 @@ namespace embree { typedef CurvePrecalculations1 Precalculations; - template<typename Epilog> + template<typename Ray, typename Epilog> static __forceinline bool intersect(const vbool<M>& valid_i, Ray& ray, IntersectContext* context, @@ -51,8 +55,8 @@ namespace embree LinearSpace3<Vec3vf<M>> ray_space = pre.ray_space; const Vec3vf<M> ray_org ((Vec3fa)ray.org); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i); Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w); Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w); @@ -105,8 +109,8 @@ namespace embree const Vec3vf<M> ray_org(ray.org.x[k],ray.org.y[k],ray.org.z[k]); const Vec3vf<M> ray_dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i); Vec4vf<M> p0(xfmVector(ray_space,v0.xyz()-ray_org), v0.w); Vec4vf<M> p1(xfmVector(ray_space,v1.xyz()-ray_org), v1.w); diff --git a/thirdparty/embree-aarch64/kernels/geometry/linei.h b/thirdparty/embree/kernels/geometry/linei.h index a72029ca53..3ee70ac012 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/linei.h +++ b/thirdparty/embree/kernels/geometry/linei.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -49,10 +49,6 @@ namespace embree /* Returns a mask that tells which line segments are valid */ __forceinline vbool<M> valid() const { return primIDs != vuint<M>(-1); } - /* Returns a mask that tells which line segments are valid */ - template<int Mx> - __forceinline vbool<Mx> valid() const { return vuint<Mx>(primIDs) != vuint<Mx>(-1); } - /* Returns if the specified line segment is valid */ __forceinline bool valid(const size_t i) const { assert(i<M); return primIDs[i] != -1; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h b/thirdparty/embree/kernels/geometry/linei_intersector.h index a431796a88..5992827f5b 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/linei_intersector.h +++ b/thirdparty/embree/kernels/geometry/linei_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -11,7 +11,7 @@ namespace embree { namespace isa { - template<int M, int Mx, bool filter> + template<int M, bool filter> struct FlatLinearCurveMiIntersector1 { typedef LineMi<M> Primitive; @@ -22,8 +22,8 @@ namespace embree STAT3(normal.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1; line.gather(v0,v1,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + FlatLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) @@ -31,8 +31,8 @@ namespace embree STAT3(shadow.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1; line.gather(v0,v1,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return FlatLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) @@ -41,7 +41,7 @@ namespace embree } }; - template<int M, int Mx, bool filter> + template<int M, bool filter> struct FlatLinearCurveMiMBIntersector1 { typedef LineMi<M> Primitive; @@ -52,8 +52,8 @@ namespace embree STAT3(normal.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + FlatLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) @@ -61,8 +61,8 @@ namespace embree STAT3(shadow.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - return FlatLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return FlatLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) @@ -71,7 +71,7 @@ namespace embree } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct FlatLinearCurveMiIntersectorK { typedef LineMi<M> Primitive; @@ -82,8 +82,8 @@ namespace embree STAT3(normal.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1; line.gather(v0,v1,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + FlatLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) @@ -91,12 +91,12 @@ namespace embree STAT3(shadow.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1; line.gather(v0,v1,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return FlatLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct FlatLinearCurveMiMBIntersectorK { typedef LineMi<M> Primitive; @@ -107,8 +107,8 @@ namespace embree STAT3(normal.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + FlatLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) @@ -116,8 +116,8 @@ namespace embree STAT3(shadow.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1; line.gather(v0,v1,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - return FlatLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return FlatLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } }; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/object.h b/thirdparty/embree/kernels/geometry/object.h index f26391de52..2a61829ffd 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/object.h +++ b/thirdparty/embree/kernels/geometry/object.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h b/thirdparty/embree/kernels/geometry/object_intersector.h index 97882e0e59..11ceb2f7fe 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/object_intersector.h +++ b/thirdparty/embree/kernels/geometry/object_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/plane.h b/thirdparty/embree/kernels/geometry/plane.h index ebe45db558..e447122eab 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/plane.h +++ b/thirdparty/embree/kernels/geometry/plane.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/pointi.h b/thirdparty/embree/kernels/geometry/pointi.h index 4ba298e86b..bed04116b0 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/pointi.h +++ b/thirdparty/embree/kernels/geometry/pointi.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -61,11 +61,6 @@ namespace embree return vint<M>(step) < vint<M>(numPrimitives); } - /* Returns a mask that tells which line segments are valid */ - template<int Mx> __forceinline vbool<Mx> valid() const { - return vint<Mx>(step) < vint<Mx>(numPrimitives); - } - /* Returns if the specified line segment is valid */ __forceinline bool valid(const size_t i) const { diff --git a/thirdparty/embree-aarch64/kernels/geometry/primitive.h b/thirdparty/embree/kernels/geometry/primitive.h index 41e5b2b304..608d981dd7 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/primitive.h +++ b/thirdparty/embree/kernels/geometry/primitive.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp b/thirdparty/embree/kernels/geometry/primitive4.cpp index f93574c9c8..9c953c5d35 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/primitive4.cpp +++ b/thirdparty/embree/kernels/geometry/primitive4.cpp @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "primitive.h" diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h b/thirdparty/embree/kernels/geometry/quad_intersector.h index 57ff4e60e5..93c9526912 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector.h +++ b/thirdparty/embree/kernels/geometry/quad_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h b/thirdparty/embree/kernels/geometry/quad_intersector_moeller.h index 74e8c7720c..3abc9d6f70 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_moeller.h +++ b/thirdparty/embree/kernels/geometry/quad_intersector_moeller.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -126,16 +126,17 @@ namespace embree const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomID, const vuint<M>& primID) const { - MoellerTrumboreHitM<M> hit; + UVIdentity<M> mapUV; + MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV); MoellerTrumboreIntersector1<M> intersector(ray,nullptr); - Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); + Intersect1EpilogM<M,filter> epilog(ray,context,geomID,primID); /* intersect first triangle */ - if (intersector.intersect(ray,v0,v1,v3,hit)) + if (intersector.intersect(ray,v0,v1,v3,mapUV,hit)) epilog(hit.valid,hit); /* intersect second triangle */ - if (intersector.intersect(ray,v2,v3,v1,hit)) + if (intersector.intersect(ray,v2,v3,v1,mapUV,hit)) { hit.U = hit.absDen - hit.U; hit.V = hit.absDen - hit.V; @@ -147,19 +148,20 @@ namespace embree const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomID, const vuint<M>& primID) const { - MoellerTrumboreHitM<M> hit; + UVIdentity<M> mapUV; + MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV); MoellerTrumboreIntersector1<M> intersector(ray,nullptr); - Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); + Occluded1EpilogM<M,filter> epilog(ray,context,geomID,primID); /* intersect first triangle */ - if (intersector.intersect(ray,v0,v1,v3,hit)) + if (intersector.intersect(ray,v0,v1,v3,mapUV,hit)) { if (epilog(hit.valid,hit)) return true; } /* intersect second triangle */ - if (intersector.intersect(ray,v2,v3,v1,hit)) + if (intersector.intersect(ray,v2,v3,v1,mapUV,hit)) { hit.U = hit.absDen - hit.U; hit.V = hit.absDen - hit.V; @@ -170,70 +172,7 @@ namespace embree } }; -#if defined(__AVX512ER__) // KNL - - /*! Intersects 4 quads with 1 ray using AVX512 */ - template<bool filter> - struct QuadMIntersector1MoellerTrumbore<4,filter> - { - __forceinline QuadMIntersector1MoellerTrumbore() {} - - __forceinline QuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), - select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), - select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); - const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); -#else - const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), - select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), - select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); - const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), - select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), - select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); -#endif - const vbool16 flags(0xf0f0); - - MoellerTrumboreHitM<16> hit; - MoellerTrumboreIntersector1<16> intersector(ray,nullptr); - if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit))) - { - vfloat16 U = hit.U, V = hit.V, absDen = hit.absDen; -#if !defined(EMBREE_BACKFACE_CULLING) - hit.U = select(flags,absDen-V,U); - hit.V = select(flags,absDen-U,V); - hit.vNg *= select(flags,vfloat16(-1.0f),vfloat16(1.0f)); // FIXME: use XOR -#else - hit.U = select(flags,absDen-U,U); - hit.V = select(flags,absDen-V,V); -#endif - if (likely(epilog(hit.valid,hit))) - return true; - } - return false; - } - - __forceinline bool intersect(RayHit& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - }; - -#elif defined(__AVX__) +#if defined(__AVX__) /*! Intersects 4 quads with 1 ray using AVX */ template<bool filter> @@ -254,10 +193,11 @@ namespace embree const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); #endif - MoellerTrumboreHitM<8> hit; + UVIdentity<8> mapUV; + MoellerTrumboreHitM<8,UVIdentity<8>> hit(mapUV); MoellerTrumboreIntersector1<8> intersector(ray,nullptr); const vbool8 flags(0,0,0,0,1,1,1,1); - if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,hit))) + if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,mapUV,hit))) { vfloat8 U = hit.U, V = hit.V, absDen = hit.absDen; @@ -279,14 +219,14 @@ namespace embree const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const vuint4& geomID, const vuint4& primID) const { - return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); + return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID))); } __forceinline bool occluded(Ray& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const vuint4& geomID, const vuint4& primID) const { - return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); + return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID))); } }; @@ -353,7 +293,7 @@ namespace embree const Vec3vf<M> e1 = v0-v1; const Vec3vf<M> e2 = v2-v0; const Vec3vf<M> Ng = cross(e2,e1); - return intersect(ray,k,v0,e1,e2,Ng,flags,epilog); + return intersect<M,K>(ray,k,v0,e1,e2,Ng,flags,epilog); } }; @@ -458,70 +398,24 @@ namespace embree const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomID, const vuint<M>& primID) const { - Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); - MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog); - MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog); + Intersect1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID); + MoellerTrumboreIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog); + MoellerTrumboreIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog); } __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomID, const vuint<M>& primID) const { - Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); - if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true; - if (MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true; + Occluded1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID); + if (MoellerTrumboreIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true; + if (MoellerTrumboreIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true; return false; } }; -#if defined(__AVX512ER__) // KNL - - /*! Intersects 4 quads with 1 ray using AVX512 */ - template<int K, bool filter> - struct QuadMIntersectorKMoellerTrumbore<4,K,filter> : public QuadMIntersectorKMoellerTrumboreBase<4,K,filter> - { - __forceinline QuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) - : QuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {} - - template<typename Epilog> - __forceinline bool intersect1(RayK<K>& ray, size_t k, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), - select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), - select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); - const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); -#else - const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), - select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), - select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); - const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), - select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), - select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); -#endif - const vbool16 flags(0xf0f0); - return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); - } - - __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - }; - -#elif defined(__AVX__) +#if defined(__AVX__) /*! Intersects 4 quads with 1 ray using AVX */ template<int K, bool filter> @@ -543,21 +437,21 @@ namespace embree const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); #endif const vbool8 flags(0,0,0,0,1,1,1,1); - return MoellerTrumboreIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); + return MoellerTrumboreIntersector1KTriangleM::intersect1<8,K>(ray,k,vtx0,vtx1,vtx2,flags,epilog); } __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const vuint4& geomID, const vuint4& primID) const { - return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); } __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const vuint4& geomID, const vuint4& primID) const { - return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); } }; diff --git a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h b/thirdparty/embree/kernels/geometry/quad_intersector_pluecker.h index 7ca3aed0a0..9873ff76ac 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/quad_intersector_pluecker.h +++ b/thirdparty/embree/kernels/geometry/quad_intersector_pluecker.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -175,69 +175,23 @@ namespace embree const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomID, const vuint<M>& primID) const { - Intersect1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); - PlueckerIntersectorTriangle1::intersect(ray,v0,v1,v3,vbool<M>(false),epilog); - PlueckerIntersectorTriangle1::intersect(ray,v2,v3,v1,vbool<M>(true),epilog); + Intersect1EpilogM<M,filter> epilog(ray,context,geomID,primID); + PlueckerIntersectorTriangle1::intersect<M>(ray,v0,v1,v3,vbool<M>(false),epilog); + PlueckerIntersectorTriangle1::intersect<M>(ray,v2,v3,v1,vbool<M>(true),epilog); } __forceinline bool occluded(Ray& ray, IntersectContext* context, const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomID, const vuint<M>& primID) const { - Occluded1EpilogM<M,M,filter> epilog(ray,context,geomID,primID); - if (PlueckerIntersectorTriangle1::intersect(ray,v0,v1,v3,vbool<M>(false),epilog)) return true; - if (PlueckerIntersectorTriangle1::intersect(ray,v2,v3,v1,vbool<M>(true ),epilog)) return true; + Occluded1EpilogM<M,filter> epilog(ray,context,geomID,primID); + if (PlueckerIntersectorTriangle1::intersect<M>(ray,v0,v1,v3,vbool<M>(false),epilog)) return true; + if (PlueckerIntersectorTriangle1::intersect<M>(ray,v2,v3,v1,vbool<M>(true ),epilog)) return true; return false; } }; -#if defined(__AVX512ER__) // KNL - - /*! Intersects 4 quads with 1 ray using AVX512 */ - template<bool filter> - struct QuadMIntersector1Pluecker<4,filter> - { - __forceinline QuadMIntersector1Pluecker() {} - - __forceinline QuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} - - template<typename Epilog> - __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), - select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), - select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); - const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); -#else - const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), - select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), - select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); - const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), - select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), - select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); -#endif - const vbool16 flags(0xf0f0); - return PlueckerIntersectorTriangle1::intersect(ray,vtx0,vtx1,vtx2,flags,epilog); - } - - __forceinline bool intersect(RayHit& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded(Ray& ray, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,16,filter>(ray,context,vuint8(geomID),vuint8(primID))); - } - }; - -#elif defined(__AVX__) +#if defined(__AVX__) /*! Intersects 4 quads with 1 ray using AVX */ template<bool filter> @@ -259,19 +213,19 @@ namespace embree const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); #endif const vbool8 flags(0,0,0,0,1,1,1,1); - return PlueckerIntersectorTriangle1::intersect(ray,vtx0,vtx1,vtx2,flags,epilog); + return PlueckerIntersectorTriangle1::intersect<8>(ray,vtx0,vtx1,vtx2,flags,epilog); } __forceinline bool intersect(RayHit& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const vuint4& geomID, const vuint4& primID) const { - return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); + return intersect(ray,v0,v1,v2,v3,Intersect1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID))); } __forceinline bool occluded(Ray& ray, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const vuint4& geomID, const vuint4& primID) const { - return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,8,filter>(ray,context,vuint8(geomID),vuint8(primID))); + return intersect(ray,v0,v1,v2,v3,Occluded1EpilogM<8,filter>(ray,context,vuint8(geomID),vuint8(primID))); } }; @@ -305,18 +259,19 @@ namespace embree const Vec3vf<M> e0 = v2-v0; const Vec3vf<M> e1 = v0-v1; const Vec3vf<M> e2 = v1-v2; - + /* perform edge tests */ const vfloat<M> U = dot(cross(e0,v2+v0),D); const vfloat<M> V = dot(cross(e1,v0+v1),D); const vfloat<M> W = dot(cross(e2,v1+v2),D); + const vfloat<M> UVW = U+V+W; const vfloat<M> eps = float(ulp)*abs(UVW); #if defined(EMBREE_BACKFACE_CULLING) vbool<M> valid = max(U,V,W) <= eps; #else vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); -#endif +#endif if (unlikely(none(valid))) return false; /* calculate geometry normal and denominator */ @@ -423,69 +378,23 @@ namespace embree const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomID, const vuint<M>& primID) const { - Intersect1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); - PlueckerIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog); - PlueckerIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog); + Intersect1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID); + PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog); + PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog); } __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const vuint<M>& geomID, const vuint<M>& primID) const { - Occluded1KEpilogM<M,M,K,filter> epilog(ray,k,context,geomID,primID); - if (PlueckerIntersector1KTriangleM::intersect1(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true; - if (PlueckerIntersector1KTriangleM::intersect1(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true; + Occluded1KEpilogM<M,K,filter> epilog(ray,k,context,geomID,primID); + if (PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v0,v1,v3,vbool<M>(false),epilog)) return true; + if (PlueckerIntersector1KTriangleM::intersect1<M,K>(ray,k,v2,v3,v1,vbool<M>(true ),epilog)) return true; return false; } }; -#if defined(__AVX512ER__) // KNL - - /*! Intersects 4 quads with 1 ray using AVX512 */ - template<int K, bool filter> - struct QuadMIntersectorKPluecker<4,K,filter> : public QuadMIntersectorKPlueckerBase<4,K,filter> - { - __forceinline QuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) - : QuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {} - - template<typename Epilog> - __forceinline bool intersect1(RayK<K>& ray, size_t k, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const Epilog& epilog) const - { - const Vec3vf16 vtx0(select(0x0f0f,vfloat16(v0.x),vfloat16(v2.x)), - select(0x0f0f,vfloat16(v0.y),vfloat16(v2.y)), - select(0x0f0f,vfloat16(v0.z),vfloat16(v2.z))); -#if !defined(EMBREE_BACKFACE_CULLING) - const Vec3vf16 vtx1(vfloat16(v1.x),vfloat16(v1.y),vfloat16(v1.z)); - const Vec3vf16 vtx2(vfloat16(v3.x),vfloat16(v3.y),vfloat16(v3.z)); -#else - const Vec3vf16 vtx1(select(0x0f0f,vfloat16(v1.x),vfloat16(v3.x)), - select(0x0f0f,vfloat16(v1.y),vfloat16(v3.y)), - select(0x0f0f,vfloat16(v1.z),vfloat16(v3.z))); - const Vec3vf16 vtx2(select(0x0f0f,vfloat16(v3.x),vfloat16(v1.x)), - select(0x0f0f,vfloat16(v3.y),vfloat16(v1.y)), - select(0x0f0f,vfloat16(v3.z),vfloat16(v1.z))); -#endif - - const vbool16 flags(0xf0f0); - return PlueckerIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); - } - - __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - - __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, - const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, - const vuint4& geomID, const vuint4& primID) const - { - return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,16,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); - } - }; - -#elif defined(__AVX__) +#if defined(__AVX__) /*! Intersects 4 quads with 1 ray using AVX */ template<int K, bool filter> @@ -506,21 +415,21 @@ namespace embree const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); #endif - return PlueckerIntersector1KTriangleM::intersect1(ray,k,vtx0,vtx1,vtx2,flags,epilog); + return PlueckerIntersector1KTriangleM::intersect1<8,K>(ray,k,vtx0,vtx1,vtx2,flags,epilog); } __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const vuint4& geomID, const vuint4& primID) const { - return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + return intersect1(ray,k,v0,v1,v2,v3,Intersect1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); } __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const vuint4& geomID, const vuint4& primID) const { - return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); + return intersect1(ray,k,v0,v1,v2,v3,Occluded1KEpilogM<8,K,filter>(ray,k,context,vuint8(geomID),vuint8(primID))); } }; diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadi.h b/thirdparty/embree/kernels/geometry/quadi.h index 741ec519ab..70a7bdf158 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/quadi.h +++ b/thirdparty/embree/kernels/geometry/quadi.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -349,7 +349,7 @@ namespace embree const QuadMesh* mesh = scene->get<QuadMesh>(geomID(index)); vfloat<K> ftime; - const vint<K> itime = mesh->timeSegment(time, ftime); + const vint<K> itime = mesh->timeSegment<K>(time, ftime); const size_t first = bsf(movemask(valid)); if (likely(all(valid,itime[first] == itime))) @@ -361,10 +361,10 @@ namespace embree } else { - p0 = getVertex<0>(valid, index, scene, itime, ftime); - p1 = getVertex<1>(valid, index, scene, itime, ftime); - p2 = getVertex<2>(valid, index, scene, itime, ftime); - p3 = getVertex<3>(valid, index, scene, itime, ftime); + p0 = getVertex<0,K>(valid, index, scene, itime, ftime); + p1 = getVertex<1,K>(valid, index, scene, itime, ftime); + p2 = getVertex<2,K>(valid, index, scene, itime, ftime); + p3 = getVertex<3,K>(valid, index, scene, itime, ftime); } } diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h b/thirdparty/embree/kernels/geometry/quadi_intersector.h index 96cf7f1ca2..20a98c3406 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/quadi_intersector.h +++ b/thirdparty/embree/kernels/geometry/quadi_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -230,7 +230,7 @@ namespace embree { if (!quad.valid(i)) break; STAT3(normal.trav_prims,1,popcnt(valid_i),K); - Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); + Vec3vf<K> v0,v1,v2,v3; quad.template gather<K>(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); } } @@ -243,7 +243,7 @@ namespace embree { if (!quad.valid(i)) break; STAT3(shadow.trav_prims,1,popcnt(valid0),K); - Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); + Vec3vf<K> v0,v1,v2,v3; quad.template gather<K>(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) break; } @@ -310,7 +310,7 @@ namespace embree { if (!quad.valid(i)) break; STAT3(normal.trav_prims,1,popcnt(valid_i),K); - Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); + Vec3vf<K> v0,v1,v2,v3; quad.template gather<K>(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); pre.intersectK(valid_i,ray,v0,v1,v2,v3,IntersectKEpilogM<M,K,filter>(ray,context,quad.geomID(),quad.primID(),i)); } } @@ -323,7 +323,7 @@ namespace embree { if (!quad.valid(i)) break; STAT3(shadow.trav_prims,1,popcnt(valid0),K); - Vec3vf<K> v0,v1,v2,v3; quad.gather(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); + Vec3vf<K> v0,v1,v2,v3; quad.template gather<K>(valid_i,v0,v1,v2,v3,i,context->scene,ray.time()); if (pre.intersectK(valid0,ray,v0,v1,v2,v3,OccludedKEpilogM<M,K,filter>(valid0,ray,context,quad.geomID(),quad.primID(),i))) break; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadv.h b/thirdparty/embree/kernels/geometry/quadv.h index 0a1fe4d128..2137356ff2 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/quadv.h +++ b/thirdparty/embree/kernels/geometry/quadv.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h b/thirdparty/embree/kernels/geometry/quadv_intersector.h index 30a24b291a..9b28e05614 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/quadv_intersector.h +++ b/thirdparty/embree/kernels/geometry/quadv_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h b/thirdparty/embree/kernels/geometry/roundline_intersector.h index cdf68f486b..0e9393442b 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/roundline_intersector.h +++ b/thirdparty/embree/kernels/geometry/roundline_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -81,7 +81,11 @@ namespace embree __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } __forceinline float t (const size_t i) const { return vt[i]; } __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - + + __forceinline Vec2vf<M> uv() const { return Vec2vf<M>(vu,vv); } + __forceinline vfloat<M> t () const { return vt; } + __forceinline Vec3vf<M> Ng() const { return vNg; } + public: vfloat<M> vu; vfloat<M> vv; @@ -646,14 +650,15 @@ namespace embree struct RoundLinearCurveIntersector1 { typedef CurvePrecalculations1 Precalculations; - + + template<typename Ray> struct ray_tfar { Ray& ray; __forceinline ray_tfar(Ray& ray) : ray(ray) {} __forceinline vfloat<M> operator() () const { return ray.tfar; }; }; - - template<typename Epilog> + + template<typename Ray, typename Epilog> static __forceinline bool intersect(const vbool<M>& valid_i, Ray& ray, IntersectContext* context, @@ -666,11 +671,11 @@ namespace embree const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); const Vec3vf<M> ray_dir(ray.dir.x, ray.dir.y, ray.dir.z); const vfloat<M> ray_tnear(ray.tnear()); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); - const Vec4vf<M> vL = enlargeRadiusToMinWidth(context,geom,ray_org,vLi); - const Vec4vf<M> vR = enlargeRadiusToMinWidth(context,geom,ray_org,vRi); - return __roundline_internal::intersectConeSphere(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray),v0,v1,vL,vR,epilog); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i); + const Vec4vf<M> vL = enlargeRadiusToMinWidth<M>(context,geom,ray_org,vLi); + const Vec4vf<M> vR = enlargeRadiusToMinWidth<M>(context,geom,ray_org,vRi); + return __roundline_internal::intersectConeSphere<M>(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar<Ray>(ray),v0,v1,vL,vR,epilog); } }; @@ -699,11 +704,11 @@ namespace embree const Vec3vf<M> ray_org(ray.org.x[k], ray.org.y[k], ray.org.z[k]); const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); const vfloat<M> ray_tnear = ray.tnear()[k]; - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); - const Vec4vf<M> v1 = enlargeRadiusToMinWidth(context,geom,ray_org,v1i); - const Vec4vf<M> vL = enlargeRadiusToMinWidth(context,geom,ray_org,vLi); - const Vec4vf<M> vR = enlargeRadiusToMinWidth(context,geom,ray_org,vRi); - return __roundline_internal::intersectConeSphere(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,vL,vR,epilog); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); + const Vec4vf<M> v1 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v1i); + const Vec4vf<M> vL = enlargeRadiusToMinWidth<M>(context,geom,ray_org,vLi); + const Vec4vf<M> vR = enlargeRadiusToMinWidth<M>(context,geom,ray_org,vRi); + return __roundline_internal::intersectConeSphere<M>(valid_i,ray_org,ray_dir,ray_tnear,ray_tfar(ray,k),v0,v1,vL,vR,epilog); } }; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h b/thirdparty/embree/kernels/geometry/roundlinei_intersector.h index 079817335e..29061d6475 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/roundlinei_intersector.h +++ b/thirdparty/embree/kernels/geometry/roundlinei_intersector.h @@ -1,18 +1,5 @@ -// ======================================================================== // -// Copyright 2009-2020 Intel Corporation // -// // -// Licensed under the Apache License, Version 2.0 (the "License"); // -// you may not use this file except in compliance with the License. // -// You may obtain a copy of the License at // -// // -// http://www.apache.org/licenses/LICENSE-2.0 // -// // -// Unless required by applicable law or agreed to in writing, software // -// distributed under the License is distributed on an "AS IS" BASIS, // -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // -// See the License for the specific language governing permissions and // -// limitations under the License. // -// ======================================================================== // +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 #pragma once @@ -23,7 +10,7 @@ namespace embree { namespace isa { - template<int M, int Mx, bool filter> + template<int M, bool filter> struct RoundLinearCurveMiIntersector1 { typedef LineMi<M> Primitive; @@ -34,8 +21,8 @@ namespace embree STAT3(normal.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + RoundLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) @@ -43,8 +30,8 @@ namespace embree STAT3(shadow.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return RoundLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) @@ -53,7 +40,7 @@ namespace embree } }; - template<int M, int Mx, bool filter> + template<int M, bool filter> struct RoundLinearCurveMiMBIntersector1 { typedef LineMi<M> Primitive; @@ -64,8 +51,8 @@ namespace embree STAT3(normal.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + RoundLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Intersect1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& line) @@ -73,8 +60,8 @@ namespace embree STAT3(shadow.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()); - const vbool<Mx> valid = line.template valid<Mx>(); - return RoundLinearCurveIntersector1<Mx>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,Mx,filter>(ray,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return RoundLinearCurveIntersector1<M>::intersect(valid,ray,context,geom,pre,v0,v1,vL,vR,Occluded1EpilogM<M,filter>(ray,context,line.geomID(),line.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& line) @@ -83,7 +70,7 @@ namespace embree } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct RoundLinearCurveMiIntersectorK { typedef LineMi<M> Primitive; @@ -94,8 +81,8 @@ namespace embree STAT3(normal.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + RoundLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) @@ -103,12 +90,12 @@ namespace embree STAT3(shadow.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom); - const vbool<Mx> valid = line.template valid<Mx>(); - return RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return RoundLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct RoundLinearCurveMiMBIntersectorK { typedef LineMi<M> Primitive; @@ -119,8 +106,8 @@ namespace embree STAT3(normal.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + RoundLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Intersect1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } static __forceinline bool occluded(const Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& line) @@ -128,8 +115,8 @@ namespace embree STAT3(shadow.trav_prims,1,1,1); const LineSegments* geom = context->scene->get<LineSegments>(line.geomID()); Vec4vf<M> v0,v1,vL,vR; line.gather(v0,v1,vL,vR,geom,ray.time()[k]); - const vbool<Mx> valid = line.template valid<Mx>(); - return RoundLinearCurveIntersectorK<Mx,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,line.geomID(),line.primID())); + const vbool<M> valid = line.valid(); + return RoundLinearCurveIntersectorK<M,K>::intersect(valid,ray,k,context,geom,pre,v0,v1,vL,vR,Occluded1KEpilogM<M,K,filter>(ray,k,context,line.geomID(),line.primID())); } }; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h b/thirdparty/embree/kernels/geometry/sphere_intersector.h index 3ab90c29ef..2670f9762d 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/sphere_intersector.h +++ b/thirdparty/embree/kernels/geometry/sphere_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -105,7 +105,7 @@ namespace embree const Precalculations& pre, const Vec4vf<M>& v0i, const Epilog& epilog) { const Vec3vf<M> ray_org(ray.org.x, ray.org.y, ray.org.z); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); return intersect(valid_i,ray,pre,v0,epilog); } }; @@ -130,7 +130,7 @@ namespace embree const Vec3vf<M> ray_dir(ray.dir.x[k], ray.dir.y[k], ray.dir.z[k]); const vfloat<M> rd2 = rcp(dot(ray_dir, ray_dir)); - const Vec4vf<M> v0 = enlargeRadiusToMinWidth(context,geom,ray_org,v0i); + const Vec4vf<M> v0 = enlargeRadiusToMinWidth<M>(context,geom,ray_org,v0i); const Vec3vf<M> center = v0.xyz(); const vfloat<M> radius = v0.w; diff --git a/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h b/thirdparty/embree/kernels/geometry/spherei_intersector.h index 1146847602..7a0b428117 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/spherei_intersector.h +++ b/thirdparty/embree/kernels/geometry/spherei_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -11,7 +11,7 @@ namespace embree { namespace isa { - template<int M, int Mx, bool filter> + template<int M, bool filter> struct SphereMiIntersector1 { typedef PointMi<M> Primitive; @@ -25,9 +25,9 @@ namespace embree STAT3(normal.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(sphere.geomID()); Vec4vf<M> v0; sphere.gather(v0, geom); - const vbool<Mx> valid = sphere.template valid<Mx>(); - SphereIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); + const vbool<M> valid = sphere.valid(); + SphereIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, filter>(ray, context, sphere.geomID(), sphere.primID())); } static __forceinline bool occluded(const Precalculations& pre, @@ -38,9 +38,9 @@ namespace embree STAT3(shadow.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(sphere.geomID()); Vec4vf<M> v0; sphere.gather(v0, geom); - const vbool<Mx> valid = sphere.template valid<Mx>(); - return SphereIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); + const vbool<M> valid = sphere.valid(); + return SphereIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, filter>(ray, context, sphere.geomID(), sphere.primID())); } static __forceinline bool pointQuery(PointQuery* query, @@ -51,7 +51,7 @@ namespace embree } }; - template<int M, int Mx, bool filter> + template<int M, bool filter> struct SphereMiMBIntersector1 { typedef PointMi<M> Primitive; @@ -65,9 +65,9 @@ namespace embree STAT3(normal.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(sphere.geomID()); Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()); - const vbool<Mx> valid = sphere.template valid<Mx>(); - SphereIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); + const vbool<M> valid = sphere.valid(); + SphereIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, Intersect1EpilogM<M, filter>(ray, context, sphere.geomID(), sphere.primID())); } static __forceinline bool occluded(const Precalculations& pre, @@ -78,9 +78,9 @@ namespace embree STAT3(shadow.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(sphere.geomID()); Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()); - const vbool<Mx> valid = sphere.template valid<Mx>(); - return SphereIntersector1<Mx>::intersect( - valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, Mx, filter>(ray, context, sphere.geomID(), sphere.primID())); + const vbool<M> valid = sphere.valid(); + return SphereIntersector1<M>::intersect( + valid, ray, context, geom, pre, v0, Occluded1EpilogM<M, filter>(ray, context, sphere.geomID(), sphere.primID())); } static __forceinline bool pointQuery(PointQuery* query, @@ -91,7 +91,7 @@ namespace embree } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct SphereMiIntersectorK { typedef PointMi<M> Primitive; @@ -103,10 +103,10 @@ namespace embree STAT3(normal.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(sphere.geomID()); Vec4vf<M> v0; sphere.gather(v0, geom); - const vbool<Mx> valid = sphere.template valid<Mx>(); - SphereIntersectorK<Mx, K>::intersect( + const vbool<M> valid = sphere.valid(); + SphereIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); + Intersect1KEpilogM<M, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); } static __forceinline bool occluded( @@ -115,14 +115,14 @@ namespace embree STAT3(shadow.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(sphere.geomID()); Vec4vf<M> v0; sphere.gather(v0, geom); - const vbool<Mx> valid = sphere.template valid<Mx>(); - return SphereIntersectorK<Mx, K>::intersect( + const vbool<M> valid = sphere.valid(); + return SphereIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, - Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); + Occluded1KEpilogM<M, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); } }; - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct SphereMiMBIntersectorK { typedef PointMi<M> Primitive; @@ -134,10 +134,10 @@ namespace embree STAT3(normal.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(sphere.geomID()); Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]); - const vbool<Mx> valid = sphere.template valid<Mx>(); - SphereIntersectorK<Mx, K>::intersect( + const vbool<M> valid = sphere.valid(); + SphereIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, - Intersect1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); + Intersect1KEpilogM<M, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); } static __forceinline bool occluded( @@ -146,10 +146,10 @@ namespace embree STAT3(shadow.trav_prims, 1, 1, 1); const Points* geom = context->scene->get<Points>(sphere.geomID()); Vec4vf<M> v0; sphere.gather(v0, geom, ray.time()[k]); - const vbool<Mx> valid = sphere.template valid<Mx>(); - return SphereIntersectorK<Mx, K>::intersect( + const vbool<M> valid = sphere.valid(); + return SphereIntersectorK<M, K>::intersect( valid, ray, k, context, geom, pre, v0, - Occluded1KEpilogM<M, Mx, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); + Occluded1KEpilogM<M, K, filter>(ray, k, context, sphere.geomID(), sphere.primID())); } }; } // namespace isa diff --git a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h b/thirdparty/embree/kernels/geometry/subdivpatch1.h index 94ad46ad87..ae0d4e2616 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1.h +++ b/thirdparty/embree/kernels/geometry/subdivpatch1.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h b/thirdparty/embree/kernels/geometry/subdivpatch1_intersector.h index 74ec1de258..b4b15a1210 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/subdivpatch1_intersector.h +++ b/thirdparty/embree/kernels/geometry/subdivpatch1_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -43,28 +43,28 @@ namespace embree } /*! Intersect a ray with the primitive. */ - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { if (likely(ty == 0)) GridSOAIntersector1::intersect(pre,ray,context,prim,lazy_node); else processLazyNode(pre,context,prim,lazy_node); } - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { intersect(This,pre,ray,context,prim,ty,tray,lazy_node); } /*! Test if the ray is occluded by the primitive */ - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { if (likely(ty == 0)) return GridSOAIntersector1::occluded(pre,ray,context,prim,lazy_node); else return processLazyNode(pre,context,prim,lazy_node); } - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { return occluded(This,pre,ray,context,prim,ty,tray,lazy_node); } @@ -100,28 +100,28 @@ namespace embree } /*! Intersect a ray with the primitive. */ - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { if (likely(ty == 0)) GridSOAMBIntersector1::intersect(pre,ray,context,prim,lazy_node); else processLazyNode(pre,ray,context,prim,lazy_node); } - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { intersect(This,pre,ray,context,prim,ty,tray,lazy_node); } /*! Test if the ray is occluded by the primitive */ - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { if (likely(ty == 0)) return GridSOAMBIntersector1::occluded(pre,ray,context,prim,lazy_node); else return processLazyNode(pre,ray,context,prim,lazy_node); } - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) { + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { return occluded(This,pre,ray,context,prim,ty,tray,lazy_node); } @@ -133,7 +133,7 @@ namespace embree return false; } - template<int N, int Nx, bool robust> + template<int N, bool robust> static __forceinline bool pointQuery(const Accel::Intersectors* This, PointQuery* query, PointQueryContext* context, size_t ty0, const Primitive* prim, size_t ty, const TravPointQuery<N> &tquery, size_t& lazy_node) { return pointQuery(This,query,context,prim,ty,tquery,lazy_node); } @@ -166,15 +166,15 @@ namespace embree else return processLazyNode(pre,context,prim,lazy_node); } - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { if (likely(ty == 0)) GridSOAIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node); else processLazyNode(pre,context,prim,lazy_node); } - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { if (likely(ty == 0)) return GridSOAIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node); else return processLazyNode(pre,context,prim,lazy_node); @@ -215,15 +215,15 @@ namespace embree else return processLazyNode(pre,context,prim,lazy_node); } - template<int N, int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { if (likely(ty == 0)) GridSOAMBIntersectorK<K>::intersect(pre,ray,k,context,prim,lazy_node); else processLazyNode(pre,context,prim,lazy_node); } - template<int N, int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<int N, bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t ty, const TravRay<N,robust> &tray, size_t& lazy_node) { if (likely(ty == 0)) return GridSOAMBIntersectorK<K>::occluded(pre,ray,k,context,prim,lazy_node); else return processLazyNode(pre,context,prim,lazy_node); diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid.h b/thirdparty/embree/kernels/geometry/subgrid.h index 39fa6fb0f0..ce54421cab 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/subgrid.h +++ b/thirdparty/embree/kernels/geometry/subgrid.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h b/thirdparty/embree/kernels/geometry/subgrid_intersector.h index 045eee4329..ad5fee2e4e 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_intersector.h +++ b/thirdparty/embree/kernels/geometry/subgrid_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -53,14 +53,14 @@ namespace embree return accel->pointQuery(query, context); } - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { - vfloat<Nx> dist; + vfloat<N> dist; size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); #if defined(__AVX__) STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1); @@ -75,15 +75,15 @@ namespace embree } } } - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { - vfloat<Nx> dist; + vfloat<N> dist; size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); while(mask != 0) { @@ -155,14 +155,14 @@ namespace embree return accel->pointQuery(query, context); } - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { - vfloat<Nx> dist; + vfloat<N> dist; size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); #if defined(__AVX__) STAT3(normal.trav_hit_boxes[popcnt(mask)],1,1,1); @@ -178,14 +178,14 @@ namespace embree } } - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { - vfloat<Nx> dist; + vfloat<N> dist; size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); while(mask != 0) { @@ -326,14 +326,14 @@ namespace embree return !valid0; } - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { - vfloat<Nx> dist; + vfloat<N> dist; size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); while(mask != 0) { @@ -346,14 +346,14 @@ namespace embree } } - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { - vfloat<Nx> dist; + vfloat<N> dist; size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); while(mask != 0) { @@ -408,7 +408,9 @@ namespace embree const Vec3vf<K> p2 = vtx[i*4+2]; const Vec3vf<K> p3 = vtx[i*4+3]; STAT3(shadow.trav_prims,1,popcnt(valid0),K); - if (pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i))) + //if (pre.intersectK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i))) + if (pre.occludedK(valid0,ray,p0,p1,p2,p3,g,subgrid,i,OccludedKEpilogM<4,K,filter>(valid0,ray,context,subgrid.geomID(),subgrid.primID(),i))) + break; } return !valid0; @@ -470,14 +472,14 @@ namespace embree return !valid0; } - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { - vfloat<Nx> dist; + vfloat<N> dist; size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); while(mask != 0) { @@ -490,14 +492,14 @@ namespace embree } } - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { - vfloat<Nx> dist; + vfloat<N> dist; size_t mask = isec1.intersect(&prim[i].qnode,tray,dist); while(mask != 0) { @@ -511,8 +513,5 @@ namespace embree return false; } }; - - - } } diff --git a/thirdparty/embree/kernels/geometry/subgrid_intersector_moeller.h b/thirdparty/embree/kernels/geometry/subgrid_intersector_moeller.h new file mode 100644 index 0000000000..64937d34fe --- /dev/null +++ b/thirdparty/embree/kernels/geometry/subgrid_intersector_moeller.h @@ -0,0 +1,382 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "subgrid.h" +#include "quad_intersector_moeller.h" + +namespace embree +{ + namespace isa + { + + /* ----------------------------- */ + /* -- single ray intersectors -- */ + /* ----------------------------- */ + + template<int M> + __forceinline void interpolateUV(MoellerTrumboreHitM<M,UVIdentity<M>> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const vint<M> &stepX, const vint<M> &stepY) + { + /* correct U,V interpolation across the entire grid */ + const vint<M> sx((int)subgrid.x()); + const vint<M> sy((int)subgrid.y()); + const vint<M> sxM(sx + stepX); + const vint<M> syM(sy + stepY); + const float inv_resX = rcp((float)((int)g.resX-1)); + const float inv_resY = rcp((float)((int)g.resY-1)); + hit.U = (hit.U + (vfloat<M>)sxM * hit.absDen) * inv_resX; + hit.V = (hit.V + (vfloat<M>)syM * hit.absDen) * inv_resY; + } + + template<int M, bool filter> + struct SubGridQuadMIntersector1MoellerTrumbore; + + template<int M, bool filter> + struct SubGridQuadMIntersector1MoellerTrumbore + { + __forceinline SubGridQuadMIntersector1MoellerTrumbore() {} + + __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} + + __forceinline void intersect(RayHit& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + UVIdentity<M> mapUV; + MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV); + MoellerTrumboreIntersector1<M> intersector(ray,nullptr); + Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); + + /* intersect first triangle */ + if (intersector.intersect(ray,v0,v1,v3,mapUV,hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + + /* intersect second triangle */ + if (intersector.intersect(ray,v2,v3,v1,mapUV,hit)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + UVIdentity<M> mapUV; + MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV); + MoellerTrumboreIntersector1<M> intersector(ray,nullptr); + Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); + + /* intersect first triangle */ + if (intersector.intersect(ray,v0,v1,v3,mapUV,hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) + return true; + } + + /* intersect second triangle */ + if (intersector.intersect(ray,v2,v3,v1,mapUV,hit)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) + return true; + } + return false; + } + }; + +#if defined (__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<bool filter> + struct SubGridQuadMIntersector1MoellerTrumbore<4,filter> + { + __forceinline SubGridQuadMIntersector1MoellerTrumbore() {} + + __forceinline SubGridQuadMIntersector1MoellerTrumbore(const Ray& ray, const void* ptr) {} + + template<typename Epilog> + __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + UVIdentity<8> mapUV; + MoellerTrumboreHitM<8,UVIdentity<8>> hit(mapUV); + MoellerTrumboreIntersector1<8> intersector(ray,nullptr); + const vbool8 flags(0,0,0,0,1,1,1,1); + if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,mapUV,hit))) + { + /* correct U,V interpolation across the entire grid */ + const vfloat8 U = select(flags,hit.absDen - hit.V,hit.U); + const vfloat8 V = select(flags,hit.absDen - hit.U,hit.V); + hit.U = U; + hit.V = V; + hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); + interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1)); + if (unlikely(epilog(hit.valid,hit))) + return true; + } + return false; + } + + __forceinline bool intersect(RayHit& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); + } + }; + +#endif + + // ============================================================================================================================ + // ============================================================================================================================ + // ============================================================================================================================ + + + /* ----------------------------- */ + /* -- ray packet intersectors -- */ + /* ----------------------------- */ + + template<int K> + __forceinline void interpolateUV(const vbool<K>& valid, MoellerTrumboreHitK<K,UVIdentity<K>> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const unsigned int i) + { + /* correct U,V interpolation across the entire grid */ + const unsigned int sx = subgrid.x() + (unsigned int)(i % 2); + const unsigned int sy = subgrid.y() + (unsigned int)(i >>1); + const float inv_resX = rcp((float)(int)(g.resX-1)); + const float inv_resY = rcp((float)(int)(g.resY-1)); + hit.U = select(valid,(hit.U + vfloat<K>((float)sx) * hit.absDen) * inv_resX,hit.U); + hit.V = select(valid,(hit.V + vfloat<K>((float)sy) * hit.absDen) * inv_resY,hit.V); + } + + template<int M, int K, bool filter> + struct SubGridQuadMIntersectorKMoellerTrumboreBase + { + __forceinline SubGridQuadMIntersectorKMoellerTrumboreBase(const vbool<K>& valid, const RayK<K>& ray) {} + + template<typename Epilog> + __forceinline bool intersectK(const vbool<K>& valid, + RayK<K>& ray, + const Vec3vf<K>& v0, + const Vec3vf<K>& v1, + const Vec3vf<K>& v2, + const Vec3vf<K>& v3, + const GridMesh::Grid &g, + const SubGrid &subgrid, + const unsigned int i, + const Epilog& epilog) const + { + UVIdentity<K> mapUV; + MoellerTrumboreHitK<K,UVIdentity<K>> hit(mapUV); + MoellerTrumboreIntersectorK<M,K> intersector; + + const vbool<K> valid0 = intersector.intersectK(valid,ray,v0,v1,v3,mapUV,hit); + if (any(valid0)) + { + interpolateUV(valid0,hit,g,subgrid,i); + epilog(valid0,hit); + } + const vbool<K> valid1 = intersector.intersectK(valid,ray,v2,v3,v1,mapUV,hit); + if (any(valid1)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + interpolateUV(valid1,hit,g,subgrid,i); + epilog(valid1,hit); + } + return any(valid0|valid1); + } + + template<typename Epilog> + __forceinline bool occludedK(const vbool<K>& valid, + RayK<K>& ray, + const Vec3vf<K>& v0, + const Vec3vf<K>& v1, + const Vec3vf<K>& v2, + const Vec3vf<K>& v3, + const GridMesh::Grid &g, + const SubGrid &subgrid, + const unsigned int i, + const Epilog& epilog) const + { + UVIdentity<K> mapUV; + MoellerTrumboreHitK<K,UVIdentity<K>> hit(mapUV); + MoellerTrumboreIntersectorK<M,K> intersector; + + vbool<K> valid_final = valid; + const vbool<K> valid0 = intersector.intersectK(valid,ray,v0,v1,v3,mapUV,hit); + if (any(valid0)) + { + interpolateUV(valid0,hit,g,subgrid,i); + epilog(valid0,hit); + valid_final &= !valid0; + } + if (none(valid_final)) return true; + const vbool<K> valid1 = intersector.intersectK(valid,ray,v2,v3,v1,mapUV,hit); + if (any(valid1)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + interpolateUV(valid1,hit,g,subgrid,i); + epilog(valid1,hit); + valid_final &= !valid1; + } + return none(valid_final); + } + + static __forceinline bool intersect1(RayK<K>& ray, + size_t k, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + MoellerTrumboreHitM<M,UVIdentity<M>> &hit) + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + MoellerTrumboreIntersectorK<8,K> intersector; + UVIdentity<M> mapUV; + return intersector.intersectEdge(ray,k,v0,e1,e2,mapUV,hit); + } + + }; + + template<int M, int K, bool filter> + struct SubGridQuadMIntersectorKMoellerTrumbore : public SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter> + { + __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) + : SubGridQuadMIntersectorKMoellerTrumboreBase<M,K,filter>(valid,ray) {} + + __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + UVIdentity<M> mapUV; + MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV); + Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); + MoellerTrumboreIntersectorK<M,K> intersector; + /* intersect first triangle */ + if (intersector.intersect(ray,k,v0,v1,v3,mapUV,hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + + /* intersect second triangle */ + if (intersector.intersect(ray,k,v2,v3,v1,mapUV,hit)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + UVIdentity<M> mapUV; + MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV); + Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); + MoellerTrumboreIntersectorK<M,K> intersector; + /* intersect first triangle */ + if (intersector.intersect(ray,k,v0,v1,v3,mapUV,hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) return true; + } + + /* intersect second triangle */ + if (intersector.intersect(ray,k,v2,v3,v1,mapUV,hit)) + { + hit.U = hit.absDen - hit.U; + hit.V = hit.absDen - hit.V; + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) return true; + } + return false; + } + }; + + +#if defined (__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<int K, bool filter> + struct SubGridQuadMIntersectorKMoellerTrumbore<4,K,filter> : public SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter> + { + __forceinline SubGridQuadMIntersectorKMoellerTrumbore(const vbool<K>& valid, const RayK<K>& ray) + : SubGridQuadMIntersectorKMoellerTrumboreBase<4,K,filter>(valid,ray) {} + + template<typename Epilog> + __forceinline bool intersect1(RayK<K>& ray, size_t k,const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid &subgrid, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + const vbool8 flags(0,0,0,0,1,1,1,1); + + UVIdentity<8> mapUV; + MoellerTrumboreHitM<8,UVIdentity<8>> hit(mapUV); + if (SubGridQuadMIntersectorKMoellerTrumboreBase<8,K,filter>::intersect1(ray,k,vtx0,vtx1,vtx2,hit)) + { + const vfloat8 U = select(flags,hit.absDen - hit.V,hit.U); + const vfloat8 V = select(flags,hit.absDen - hit.U,hit.V); + hit.U = U; + hit.V = V; + hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); + interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1)); + if (unlikely(epilog(hit.valid,hit))) + return true; + + } + return false; + } + + __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Intersect1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID())); + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Occluded1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID())); + } + }; + +#endif + + + + } +} diff --git a/thirdparty/embree/kernels/geometry/subgrid_intersector_pluecker.h b/thirdparty/embree/kernels/geometry/subgrid_intersector_pluecker.h new file mode 100644 index 0000000000..5ded56e1f7 --- /dev/null +++ b/thirdparty/embree/kernels/geometry/subgrid_intersector_pluecker.h @@ -0,0 +1,367 @@ +// Copyright 2009-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "subgrid.h" +#include "quad_intersector_moeller.h" +#include "quad_intersector_pluecker.h" + +namespace embree +{ + namespace isa + { + + template<int M> + __forceinline void interpolateUV(PlueckerHitM<M,UVIdentity<M>> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const vint<M> &stepX, const vint<M> &stepY) + { + /* correct U,V interpolation across the entire grid */ + const vint<M> sx((int)subgrid.x()); + const vint<M> sy((int)subgrid.y()); + const vint<M> sxM(sx + stepX); + const vint<M> syM(sy + stepY); + const float inv_resX = rcp((float)((int)g.resX-1)); + const float inv_resY = rcp((float)((int)g.resY-1)); + hit.U = (hit.U + vfloat<M>(sxM) * hit.UVW) * inv_resX; + hit.V = (hit.V + vfloat<M>(syM) * hit.UVW) * inv_resY; + } + + template<int M, bool filter> + struct SubGridQuadMIntersector1Pluecker; + + template<int M, bool filter> + struct SubGridQuadMIntersector1Pluecker + { + __forceinline SubGridQuadMIntersector1Pluecker() {} + + __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} + + __forceinline void intersect(RayHit& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + UVIdentity<M> mapUV; + PlueckerHitM<M,UVIdentity<M>> hit(mapUV); + PlueckerIntersector1<M> intersector(ray,nullptr); + + Intersect1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); + + /* intersect first triangle */ + if (intersector.intersect(ray,v0,v1,v3,mapUV,hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + + /* intersect second triangle */ + if (intersector.intersect(ray,v2,v3,v1,mapUV,hit)) + { + hit.U = hit.UVW - hit.U; + hit.V = hit.UVW - hit.V; + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + UVIdentity<M> mapUV; + PlueckerHitM<M,UVIdentity<M>> hit(mapUV); + PlueckerIntersector1<M> intersector(ray,nullptr); + Occluded1EpilogMU<M,filter> epilog(ray,context,subgrid.geomID(),subgrid.primID()); + + /* intersect first triangle */ + if (intersector.intersect(ray,v0,v1,v3,mapUV,hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) + return true; + } + + /* intersect second triangle */ + if (intersector.intersect(ray,v2,v3,v1,mapUV,hit)) + { + hit.U = hit.UVW - hit.U; + hit.V = hit.UVW - hit.V; + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) + return true; + } + return false; + } + }; + +#if defined (__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<bool filter> + struct SubGridQuadMIntersector1Pluecker<4,filter> + { + __forceinline SubGridQuadMIntersector1Pluecker() {} + + __forceinline SubGridQuadMIntersector1Pluecker(const Ray& ray, const void* ptr) {} + + template<typename Epilog> + __forceinline bool intersect(Ray& ray, const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid& subgrid, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + + UVIdentity<8> mapUV; + PlueckerHitM<8,UVIdentity<8>> hit(mapUV); + PlueckerIntersector1<8> intersector(ray,nullptr); + const vbool8 flags(0,0,0,0,1,1,1,1); + if (unlikely(intersector.intersect(ray,vtx0,vtx1,vtx2,mapUV,hit))) + { + /* correct U,V interpolation across the entire grid */ + const vfloat8 U = select(flags,hit.UVW - hit.V,hit.U); + const vfloat8 V = select(flags,hit.UVW - hit.U,hit.V); + hit.U = U; + hit.V = V; + hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); + interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1)); + if (unlikely(epilog(hit.valid,hit))) + return true; + } + return false; + } + + __forceinline bool intersect(RayHit& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + return intersect(ray,v0,v1,v2,v3,g,subgrid,Intersect1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); + } + + __forceinline bool occluded(Ray& ray, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid& subgrid) const + { + return intersect(ray,v0,v1,v2,v3,g,subgrid,Occluded1EpilogMU<8,filter>(ray,context,subgrid.geomID(),subgrid.primID())); + } + }; + +#endif + + + /* ----------------------------- */ + /* -- ray packet intersectors -- */ + /* ----------------------------- */ + + template<int K> + __forceinline void interpolateUV(const vbool<K>& valid, PlueckerHitK<K,UVIdentity<K>> &hit,const GridMesh::Grid &g, const SubGrid& subgrid, const unsigned int i) + { + /* correct U,V interpolation across the entire grid */ + const unsigned int sx = subgrid.x() + (unsigned int)(i % 2); + const unsigned int sy = subgrid.y() + (unsigned int)(i >>1); + const float inv_resX = rcp((float)(int)(g.resX-1)); + const float inv_resY = rcp((float)(int)(g.resY-1)); + hit.U = select(valid,(hit.U + vfloat<K>((float)sx) * hit.UVW) * inv_resX,hit.U); + hit.V = select(valid,(hit.V + vfloat<K>((float)sy) * hit.UVW) * inv_resY,hit.V); + } + + template<int M, int K, bool filter> + struct SubGridQuadMIntersectorKPlueckerBase + { + __forceinline SubGridQuadMIntersectorKPlueckerBase(const vbool<K>& valid, const RayK<K>& ray) {} + + template<typename Epilog> + __forceinline bool intersectK(const vbool<K>& valid, + RayK<K>& ray, + const Vec3vf<K>& v0, + const Vec3vf<K>& v1, + const Vec3vf<K>& v2, + const Vec3vf<K>& v3, + const GridMesh::Grid &g, + const SubGrid &subgrid, + const unsigned int i, + const Epilog& epilog) const + { + UVIdentity<K> mapUV; + PlueckerHitK<K,UVIdentity<K>> hit(mapUV); + PlueckerIntersectorK<M,K> intersector; + + const vbool<K> valid0 = intersector.intersectK(valid,ray,v0,v1,v3,mapUV,hit); + if (any(valid0)) + { + interpolateUV(valid0,hit,g,subgrid,i); + epilog(valid0,hit); + } + const vbool<K> valid1 = intersector.intersectK(valid,ray,v2,v3,v1,mapUV,hit); + if (any(valid1)) + { + hit.U = hit.UVW - hit.U; + hit.V = hit.UVW - hit.V; + interpolateUV(valid1,hit,g,subgrid,i); + epilog(valid1,hit); + } + return any(valid0|valid1); + } + + template<typename Epilog> + __forceinline bool occludedK(const vbool<K>& valid, + RayK<K>& ray, + const Vec3vf<K>& v0, + const Vec3vf<K>& v1, + const Vec3vf<K>& v2, + const Vec3vf<K>& v3, + const GridMesh::Grid &g, + const SubGrid &subgrid, + const unsigned int i, + const Epilog& epilog) const + { + UVIdentity<K> mapUV; + PlueckerHitK<K,UVIdentity<K>> hit(mapUV); + PlueckerIntersectorK<M,K> intersector; + + vbool<K> valid_final = valid; + const vbool<K> valid0 = intersector.intersectK(valid,ray,v0,v1,v3,mapUV,hit); + if (any(valid0)) + { + interpolateUV(valid0,hit,g,subgrid,i); + epilog(valid0,hit); + valid_final &= !valid0; + } + if (none(valid_final)) return true; + const vbool<K> valid1 = intersector.intersectK(valid,ray,v2,v3,v1,mapUV,hit); + if (any(valid1)) + { + hit.U = hit.UVW - hit.U; + hit.V = hit.UVW - hit.V; + interpolateUV(valid1,hit,g,subgrid,i); + epilog(valid1,hit); + valid_final &= !valid1; + } + return none(valid_final); + } + + + }; + + + + + template<int M, int K, bool filter> + struct SubGridQuadMIntersectorKPluecker : public SubGridQuadMIntersectorKPlueckerBase<M,K,filter> + { + __forceinline SubGridQuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) + : SubGridQuadMIntersectorKPlueckerBase<M,K,filter>(valid,ray) {} + + __forceinline void intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + UVIdentity<M> mapUV; + PlueckerHitM<M,UVIdentity<M>> hit(mapUV); + Intersect1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); + PlueckerIntersectorK<M,K> intersector; + + /* intersect first triangle */ + if (intersector.intersect(ray,k,v0,v1,v3,mapUV,hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + + /* intersect second triangle */ + if (intersector.intersect(ray,k,v2,v3,v1,mapUV,hit)) + { + hit.U = hit.UVW - hit.U; + hit.V = hit.UVW - hit.V; + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + epilog(hit.valid,hit); + } + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, const Vec3vf<M>& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + UVIdentity<M> mapUV; + PlueckerHitM<M,UVIdentity<M>> hit(mapUV); + Occluded1KEpilogMU<M,K,filter> epilog(ray,k,context,subgrid.geomID(),subgrid.primID()); + PlueckerIntersectorK<M,K> intersector; + + /* intersect first triangle */ + if (intersector.intersect(ray,k,v0,v1,v3,mapUV,hit)) + { + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) return true; + } + + /* intersect second triangle */ + if (intersector.intersect(ray,k,v2,v3,v1,mapUV,hit)) + { + hit.U = hit.UVW - hit.U; + hit.V = hit.UVW - hit.V; + interpolateUV<M>(hit,g,subgrid,vint<M>(0,1,1,0),vint<M>(0,0,1,1)); + if (epilog(hit.valid,hit)) return true; + } + return false; + } + }; + + +#if defined (__AVX__) + + /*! Intersects 4 quads with 1 ray using AVX */ + template<int K, bool filter> + struct SubGridQuadMIntersectorKPluecker<4,K,filter> : public SubGridQuadMIntersectorKPlueckerBase<4,K,filter> + { + __forceinline SubGridQuadMIntersectorKPluecker(const vbool<K>& valid, const RayK<K>& ray) + : SubGridQuadMIntersectorKPlueckerBase<4,K,filter>(valid,ray) {} + + template<typename Epilog> + __forceinline bool intersect1(RayK<K>& ray, size_t k,const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, + const GridMesh::Grid &g, const SubGrid &subgrid, const Epilog& epilog) const + { + const Vec3vf8 vtx0(vfloat8(v0.x,v2.x),vfloat8(v0.y,v2.y),vfloat8(v0.z,v2.z)); +#if !defined(EMBREE_BACKFACE_CULLING) + const Vec3vf8 vtx1(vfloat8(v1.x),vfloat8(v1.y),vfloat8(v1.z)); + const Vec3vf8 vtx2(vfloat8(v3.x),vfloat8(v3.y),vfloat8(v3.z)); +#else + const Vec3vf8 vtx1(vfloat8(v1.x,v3.x),vfloat8(v1.y,v3.y),vfloat8(v1.z,v3.z)); + const Vec3vf8 vtx2(vfloat8(v3.x,v1.x),vfloat8(v3.y,v1.y),vfloat8(v3.z,v1.z)); +#endif + UVIdentity<8> mapUV; + PlueckerHitM<8,UVIdentity<8>> hit(mapUV); + PlueckerIntersectorK<8,K> intersector; + const vbool8 flags(0,0,0,0,1,1,1,1); + if (unlikely(intersector.intersect(ray,k,vtx0,vtx1,vtx2,mapUV,hit))) + { + /* correct U,V interpolation across the entire grid */ + const vfloat8 U = select(flags,hit.UVW - hit.V,hit.U); + const vfloat8 V = select(flags,hit.UVW - hit.U,hit.V); + hit.U = U; + hit.V = V; + hit.vNg *= select(flags,vfloat8(-1.0f),vfloat8(1.0f)); + interpolateUV<8>(hit,g,subgrid,vint<8>(0,1,1,0,0,1,1,0),vint<8>(0,0,1,1,0,0,1,1)); + if (unlikely(epilog(hit.valid,hit))) + return true; + } + return false; + } + + __forceinline bool intersect1(RayHitK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Intersect1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID())); + } + + __forceinline bool occluded1(RayK<K>& ray, size_t k, IntersectContext* context, + const Vec3vf4& v0, const Vec3vf4& v1, const Vec3vf4& v2, const Vec3vf4& v3, const GridMesh::Grid &g, const SubGrid &subgrid) const + { + return intersect1(ray,k,v0,v1,v2,v3,g,subgrid,Occluded1KEpilogMU<8,K,filter>(ray,k,context,subgrid.geomID(),subgrid.primID())); + } + }; +#endif + + + } +} diff --git a/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h b/thirdparty/embree/kernels/geometry/subgrid_mb_intersector.h index 400a88b985..473d656e24 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/subgrid_mb_intersector.h +++ b/thirdparty/embree/kernels/geometry/subgrid_mb_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -45,13 +45,13 @@ namespace embree return PrimitivePointQuery1<Primitive>::pointQuery(query, context, subgrid); } - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { - vfloat<Nx> dist; + vfloat<N> dist; const float time = prim[i].adjustTime(ray.time()); assert(time <= 1.0f); @@ -68,15 +68,15 @@ namespace embree } } - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { const float time = prim[i].adjustTime(ray.time()); assert(time <= 1.0f); - vfloat<Nx> dist; + vfloat<N> dist; size_t mask = isec1.intersect(&prim[i].qnode,tray,time,dist); while(mask != 0) { @@ -132,7 +132,7 @@ namespace embree const GridMesh::Grid &g = mesh->grid(subgrid.primID()); vfloat<K> ftime; - const vint<K> itime = mesh->timeSegment(ray.time(), ftime); + const vint<K> itime = mesh->timeSegment<K>(ray.time(), ftime); Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]); pre.intersect1(ray,k,context,v0,v1,v2,v3,g,subgrid); } @@ -144,7 +144,7 @@ namespace embree const GridMesh::Grid &g = mesh->grid(subgrid.primID()); vfloat<K> ftime; - const vint<K> itime = mesh->timeSegment(ray.time(), ftime); + const vint<K> itime = mesh->timeSegment<K>(ray.time(), ftime); Vec3vf4 v0,v1,v2,v3; subgrid.gatherMB(v0,v1,v2,v3,context->scene,itime[k],ftime[k]); return pre.occluded1(ray,k,context,v0,v1,v2,v3,g,subgrid); } @@ -156,7 +156,7 @@ namespace embree for (size_t j=0;j<num;j++) { size_t m_valid = movemask(prim[j].qnode.validMask()); - const vfloat<K> time = prim[j].adjustTime(ray.time()); + const vfloat<K> time = prim[j].template adjustTime<K>(ray.time()); vfloat<K> dist; while(m_valid) @@ -177,7 +177,7 @@ namespace embree for (size_t j=0;j<num;j++) { size_t m_valid = movemask(prim[j].qnode.validMask()); - const vfloat<K> time = prim[j].adjustTime(ray.time()); + const vfloat<K> time = prim[j].template adjustTime<K>(ray.time()); vfloat<K> dist; while(m_valid) { @@ -190,10 +190,10 @@ namespace embree return !valid0; } - template<int Nx, bool robust> - static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline void intersect(const Accel::Intersectors* This, Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { vfloat<N> dist; @@ -210,10 +210,10 @@ namespace embree } } - template<int Nx, bool robust> - static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,Nx,robust> &tray, size_t& lazy_node) + template<bool robust> + static __forceinline bool occluded(const Accel::Intersectors* This, Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive* prim, size_t num, const TravRay<N,robust> &tray, size_t& lazy_node) { - BVHNQuantizedBaseNodeIntersector1<N,Nx,robust> isec1; + BVHNQuantizedBaseNodeIntersector1<N,robust> isec1; for (size_t i=0;i<num;i++) { diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle.h b/thirdparty/embree/kernels/geometry/triangle.h index 0dedf6dc4c..24b758ae48 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle.h +++ b/thirdparty/embree/kernels/geometry/triangle.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h b/thirdparty/embree/kernels/geometry/triangle_intersector.h index 125a42c5fe..2cdff78ec8 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector.h +++ b/thirdparty/embree/kernels/geometry/triangle_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -11,24 +11,24 @@ namespace embree namespace isa { /*! Intersects M triangles with 1 ray */ - template<int M, int Mx, bool filter> + template<int M, bool filter> struct TriangleMIntersector1Moeller { typedef TriangleM<M> Primitive; - typedef MoellerTrumboreIntersector1<Mx> Precalculations; + typedef MoellerTrumboreIntersector1<M> Precalculations; /*! Intersect a ray with the M triangles and updates the hit. */ static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleM<M>& tri) { STAT3(normal.trav_prims,1,1,1); - pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,UVIdentity<M>(),Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of M triangles. */ static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleM<M>& tri) { STAT3(shadow.trav_prims,1,1,1); - return pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + return pre.intersectEdge(ray,tri.v0,tri.e1,tri.e2,UVIdentity<M>(),Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) @@ -39,11 +39,11 @@ namespace embree }; /*! Intersects M triangles with K rays. */ - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct TriangleMIntersectorKMoeller { typedef TriangleM<M> Primitive; - typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; + typedef MoellerTrumboreIntersectorK<M,K> Precalculations; /*! Intersects K rays with M triangles. */ static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleM<M>& tri) @@ -56,7 +56,7 @@ namespace embree const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i); const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i); const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i); - pre.intersectEdgeK(valid_i,ray,p0,e1,e2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + pre.intersectEdgeK(valid_i,ray,p0,e1,e2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); } } @@ -72,7 +72,7 @@ namespace embree const Vec3vf<K> p0 = broadcast<vfloat<K>>(tri.v0,i); const Vec3vf<K> e1 = broadcast<vfloat<K>>(tri.e1,i); const Vec3vf<K> e2 = broadcast<vfloat<K>>(tri.e2,i); - pre.intersectEdgeK(valid0,ray,p0,e1,e2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + pre.intersectEdgeK(valid0,ray,p0,e1,e2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); if (none(valid0)) break; } return !valid0; @@ -82,14 +82,14 @@ namespace embree static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleM<M>& tri) { STAT3(normal.trav_prims,1,1,1); - pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,UVIdentity<M>(),Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of the M triangles. */ static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleM<M>& tri) { STAT3(shadow.trav_prims,1,1,1); - return pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + return pre.intersectEdge(ray,k,tri.v0,tri.e1,tri.e2,UVIdentity<M>(),Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } }; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h b/thirdparty/embree/kernels/geometry/triangle_intersector_moeller.h index b5a8519236..0a42d8f08b 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_moeller.h +++ b/thirdparty/embree/kernels/geometry/triangle_intersector_moeller.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -18,13 +18,13 @@ namespace embree { namespace isa { - template<int M> + template<int M, typename UVMapper> struct MoellerTrumboreHitM { - __forceinline MoellerTrumboreHitM() {} + __forceinline MoellerTrumboreHitM(const UVMapper& mapUV) : mapUV(mapUV) {} - __forceinline MoellerTrumboreHitM(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const vfloat<M>& absDen, const Vec3vf<M>& Ng) - : U(U), V(V), T(T), absDen(absDen), valid(valid), vNg(Ng) {} + __forceinline MoellerTrumboreHitM(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& T, const vfloat<M>& absDen, const Vec3vf<M>& Ng, const UVMapper& mapUV) + : U(U), V(V), T(T), absDen(absDen), mapUV(mapUV), valid(valid), vNg(Ng) {} __forceinline void finalize() { @@ -32,8 +32,13 @@ namespace embree vt = T * rcpAbsDen; vu = U * rcpAbsDen; vv = V * rcpAbsDen; + mapUV(vu,vv,vNg); } + __forceinline Vec2vf<M> uv() const { return Vec2vf<M>(vu,vv); } + __forceinline vfloat<M> t () const { return vt; } + __forceinline Vec3vf<M> Ng() const { return vNg; } + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } __forceinline float t (const size_t i) const { return vt[i]; } __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } @@ -43,6 +48,7 @@ namespace embree vfloat<M> V; vfloat<M> T; vfloat<M> absDen; + UVMapper mapUV; public: vbool<M> valid; @@ -52,20 +58,22 @@ namespace embree Vec3vf<M> vNg; }; - template<int M> + template<int M, bool early_out = true> struct MoellerTrumboreIntersector1 { __forceinline MoellerTrumboreIntersector1() {} __forceinline MoellerTrumboreIntersector1(const Ray& ray, const void* ptr) {} + template<typename UVMapper> __forceinline bool intersect(const vbool<M>& valid0, Ray& ray, const Vec3vf<M>& tri_v0, const Vec3vf<M>& tri_e1, const Vec3vf<M>& tri_e2, const Vec3vf<M>& tri_Ng, - MoellerTrumboreHitM<M>& hit) const + const UVMapper& mapUV, + MoellerTrumboreHitM<M,UVMapper>& hit) const { /* calculate denominator */ vbool<M> valid = valid0; @@ -88,122 +96,160 @@ namespace embree #else valid &= (den != vfloat<M>(zero)) & (U >= 0.0f) & (V >= 0.0f) & (U+V<=absDen); #endif - if (likely(none(valid))) return false; + if (likely(early_out && none(valid))) return false; /* perform depth test */ const vfloat<M> T = dot(Vec3vf<M>(tri_Ng),C) ^ sgnDen; valid &= (absDen*vfloat<M>(ray.tnear()) < T) & (T <= absDen*vfloat<M>(ray.tfar)); - if (likely(none(valid))) return false; - - + if (likely(early_out && none(valid))) return false; + /* update hit information */ - new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng); + new (&hit) MoellerTrumboreHitM<M,UVMapper>(valid,U,V,T,absDen,tri_Ng,mapUV); return true; } + template<typename UVMapper> + __forceinline bool intersectEdge(const vbool<M>& valid, + Ray& ray, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_e1, + const Vec3vf<M>& tri_e2, + const UVMapper& mapUV, + MoellerTrumboreHitM<M,UVMapper>& hit) const + { + const Vec3<vfloat<M>> tri_Ng = cross(tri_e2,tri_e1); + return intersect(valid,ray,tri_v0,tri_e1,tri_e2,tri_Ng,mapUV,hit); + } + + template<typename UVMapper> __forceinline bool intersectEdge(Ray& ray, const Vec3vf<M>& tri_v0, const Vec3vf<M>& tri_e1, const Vec3vf<M>& tri_e2, - MoellerTrumboreHitM<M>& hit) const + const UVMapper& mapUV, + MoellerTrumboreHitM<M,UVMapper>& hit) const { vbool<M> valid = true; const Vec3<vfloat<M>> tri_Ng = cross(tri_e2,tri_e1); - return intersect(valid,ray,tri_v0,tri_e1,tri_e2,tri_Ng,hit); + return intersect(valid,ray,tri_v0,tri_e1,tri_e2,tri_Ng,mapUV,hit); } - + + template<typename UVMapper> __forceinline bool intersect(Ray& ray, const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, - MoellerTrumboreHitM<M>& hit) const + const UVMapper& mapUV, + MoellerTrumboreHitM<M,UVMapper>& hit) const { const Vec3vf<M> e1 = v0-v1; const Vec3vf<M> e2 = v2-v0; - return intersectEdge(ray,v0,e1,e2,hit); + return intersectEdge(ray,v0,e1,e2,mapUV,hit); } + template<typename UVMapper> __forceinline bool intersect(const vbool<M>& valid, Ray& ray, const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, - MoellerTrumboreHitM<M>& hit) const + const UVMapper& mapUV, + MoellerTrumboreHitM<M,UVMapper>& hit) const { const Vec3vf<M> e1 = v0-v1; const Vec3vf<M> e2 = v2-v0; - return intersectEdge(valid,ray,v0,e1,e2,hit); + return intersectEdge(valid,ray,v0,e1,e2,mapUV,hit); } - template<typename Epilog> + template<typename UVMapper, typename Epilog> __forceinline bool intersectEdge(Ray& ray, const Vec3vf<M>& v0, const Vec3vf<M>& e1, const Vec3vf<M>& e2, + const UVMapper& mapUV, const Epilog& epilog) const { - MoellerTrumboreHitM<M> hit; - if (likely(intersectEdge(ray,v0,e1,e2,hit))) return epilog(hit.valid,hit); + MoellerTrumboreHitM<M,UVMapper> hit(mapUV); + if (likely(intersectEdge(ray,v0,e1,e2,mapUV,hit))) return epilog(hit.valid,hit); return false; } - template<typename Epilog> + template<typename UVMapper, typename Epilog> __forceinline bool intersect(Ray& ray, const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, + const UVMapper& mapUV, const Epilog& epilog) const { - MoellerTrumboreHitM<M> hit; - if (likely(intersect(ray,v0,v1,v2,hit))) return epilog(hit.valid,hit); + MoellerTrumboreHitM<M,UVMapper> hit(mapUV); + if (likely(intersect(ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit); return false; } template<typename Epilog> + __forceinline bool intersect(Ray& ray, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) const + { + auto mapUV = UVIdentity<M>(); + MoellerTrumboreHitM<M,UVIdentity<M>> hit(mapUV); + if (likely(intersect(ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit); + return false; + } + + template<typename UVMapper, typename Epilog> __forceinline bool intersect(const vbool<M>& valid, Ray& ray, const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, + const UVMapper& mapUV, const Epilog& epilog) const { - MoellerTrumboreHitM<M> hit; - if (likely(intersect(valid,ray,v0,v1,v2,hit))) return epilog(hit.valid,hit); + MoellerTrumboreHitM<M,UVMapper> hit(mapUV); + if (likely(intersect(valid,ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit); return false; } }; - template<int K> + template<int K, typename UVMapper> struct MoellerTrumboreHitK { - __forceinline MoellerTrumboreHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng) - : U(U), V(V), T(T), absDen(absDen), Ng(Ng) {} + __forceinline MoellerTrumboreHitK(const UVMapper& mapUV) : mapUV(mapUV) {} + __forceinline MoellerTrumboreHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& T, const vfloat<K>& absDen, const Vec3vf<K>& Ng, const UVMapper& mapUV) + : U(U), V(V), T(T), absDen(absDen), Ng(Ng), mapUV(mapUV) {} __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const { const vfloat<K> rcpAbsDen = rcp(absDen); const vfloat<K> t = T * rcpAbsDen; - const vfloat<K> u = U * rcpAbsDen; - const vfloat<K> v = V * rcpAbsDen; - return std::make_tuple(u,v,t,Ng); + vfloat<K> u = U * rcpAbsDen; + vfloat<K> v = V * rcpAbsDen; + Vec3vf<K> vNg = Ng; + mapUV(u,v,vNg); + return std::make_tuple(u,v,t,vNg); } - private: - const vfloat<K> U; - const vfloat<K> V; + vfloat<K> U; + vfloat<K> V; const vfloat<K> T; const vfloat<K> absDen; const Vec3vf<K> Ng; + const UVMapper& mapUV; }; template<int M, int K> struct MoellerTrumboreIntersectorK { + __forceinline MoellerTrumboreIntersectorK() {} __forceinline MoellerTrumboreIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {} /*! Intersects K rays with one of M triangles. */ - template<typename Epilog> + template<typename UVMapper> __forceinline vbool<K> intersectK(const vbool<K>& valid0, //RayK<K>& ray, const Vec3vf<K>& ray_org, @@ -214,7 +260,8 @@ namespace embree const Vec3vf<K>& tri_e1, const Vec3vf<K>& tri_e2, const Vec3vf<K>& tri_Ng, - const Epilog& epilog) const + const UVMapper& mapUV, + MoellerTrumboreHitK<K,UVMapper> &hit) const { /* calculate denominator */ vbool<K> valid = valid0; @@ -254,11 +301,47 @@ namespace embree #endif /* calculate hit information */ - MoellerTrumboreHitK<K> hit(U,V,T,absDen,tri_Ng); - return epilog(valid,hit); + new (&hit) MoellerTrumboreHitK<K,UVMapper>(U,V,T,absDen,tri_Ng,mapUV); + return valid; + } + + /*! Intersects K rays with one of M triangles. */ + template<typename UVMapper> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const UVMapper& mapUV, + MoellerTrumboreHitK<K,UVMapper> &hit) const + { + const Vec3vf<K> e1 = tri_v0-tri_v1; + const Vec3vf<K> e2 = tri_v2-tri_v0; + const Vec3vf<K> Ng = cross(e2,e1); + return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,mapUV,hit); } + /*! Intersects K rays with one of M triangles. */ + template<typename UVMapper, typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const UVMapper& mapUV, + const Epilog& epilog) const + { + MoellerTrumboreHitK<K,UVIdentity<K>> hit(mapUV); + const Vec3vf<K> e1 = tri_v0-tri_v1; + const Vec3vf<K> e2 = tri_v2-tri_v0; + const Vec3vf<K> Ng = cross(e2,e1); + const vbool<K> valid = intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,mapUV,hit); + return epilog(valid,hit); + } + + + template<typename Epilog> __forceinline vbool<K> intersectK(const vbool<K>& valid0, RayK<K>& ray, @@ -267,32 +350,40 @@ namespace embree const Vec3vf<K>& tri_v2, const Epilog& epilog) const { + UVIdentity<K> mapUV; + MoellerTrumboreHitK<K,UVIdentity<K>> hit(mapUV); const Vec3vf<K> e1 = tri_v0-tri_v1; const Vec3vf<K> e2 = tri_v2-tri_v0; const Vec3vf<K> Ng = cross(e2,e1); - return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,epilog); + const vbool<K> valid = intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,e1,e2,Ng,mapUV,hit); + return epilog(valid,hit); } /*! Intersects K rays with one of M triangles. */ - template<typename Epilog> + template<typename UVMapper, typename Epilog> __forceinline vbool<K> intersectEdgeK(const vbool<K>& valid0, RayK<K>& ray, const Vec3vf<K>& tri_v0, const Vec3vf<K>& tri_e1, - const Vec3vf<K>& tri_e2, + const Vec3vf<K>& tri_e2, + const UVMapper& mapUV, const Epilog& epilog) const { + MoellerTrumboreHitK<K,UVIdentity<K>> hit(mapUV); const Vec3vf<K> tri_Ng = cross(tri_e2,tri_e1); - return intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,epilog); + const vbool<K> valid = intersectK(valid0,ray.org,ray.dir,ray.tnear(),ray.tfar,tri_v0,tri_e1,tri_e2,tri_Ng,mapUV,hit); + return epilog(valid,hit); } /*! Intersect k'th ray from ray packet of size K with M triangles. */ + template<typename UVMapper> __forceinline bool intersectEdge(RayK<K>& ray, size_t k, const Vec3vf<M>& tri_v0, const Vec3vf<M>& tri_e1, const Vec3vf<M>& tri_e2, - MoellerTrumboreHitM<M>& hit) const + const UVMapper& mapUV, + MoellerTrumboreHitM<M,UVMapper>& hit) const { /* calculate denominator */ typedef Vec3vf<M> Vec3vfM; @@ -324,19 +415,21 @@ namespace embree if (likely(none(valid))) return false; /* calculate hit information */ - new (&hit) MoellerTrumboreHitM<M>(valid,U,V,T,absDen,tri_Ng); + new (&hit) MoellerTrumboreHitM<M,UVMapper>(valid,U,V,T,absDen,tri_Ng,mapUV); return true; } + template<typename UVMapper> __forceinline bool intersectEdge(RayK<K>& ray, size_t k, const BBox<vfloat<M>>& time_range, const Vec3vf<M>& tri_v0, const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, - MoellerTrumboreHitM<M>& hit) const + const Vec3vf<M>& tri_e2, + const UVMapper& mapUV, + MoellerTrumboreHitM<M,UVMapper>& hit) const { - if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,hit))) + if (likely(intersect(ray,k,tri_v0,tri_e1,tri_e2,mapUV,hit))) { hit.valid &= time_range.lower <= vfloat<M>(ray.time[k]); hit.valid &= vfloat<M>(ray.time[k]) < time_range.upper; @@ -345,58 +438,87 @@ namespace embree return false; } - template<typename Epilog> + template<typename UVMapper> + __forceinline bool intersect(RayK<K>& ray, + size_t k, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const UVMapper& mapUV, + MoellerTrumboreHitM<M,UVMapper>& hit) const + { + const Vec3vf<M> e1 = v0-v1; + const Vec3vf<M> e2 = v2-v0; + return intersectEdge(ray,k,v0,e1,e2,mapUV,hit); + } + + template<typename UVMapper, typename Epilog> __forceinline bool intersectEdge(RayK<K>& ray, size_t k, const Vec3vf<M>& tri_v0, const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, + const Vec3vf<M>& tri_e2, + const UVMapper& mapUV, const Epilog& epilog) const { - MoellerTrumboreHitM<M> hit; - if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); + MoellerTrumboreHitM<M,UVMapper> hit(mapUV); + if (likely(intersectEdge(ray,k,tri_v0,tri_e1,tri_e2,mapUV,hit))) return epilog(hit.valid,hit); return false; } - template<typename Epilog> + template<typename UVMapper, typename Epilog> __forceinline bool intersectEdge(RayK<K>& ray, size_t k, const BBox<vfloat<M>>& time_range, const Vec3vf<M>& tri_v0, const Vec3vf<M>& tri_e1, - const Vec3vf<M>& tri_e2, + const Vec3vf<M>& tri_e2, + const UVMapper& mapUV, const Epilog& epilog) const { - MoellerTrumboreHitM<M> hit; - if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,hit))) return epilog(hit.valid,hit); + MoellerTrumboreHitM<M,UVMapper> hit(mapUV); + if (likely(intersectEdge(ray,k,time_range,tri_v0,tri_e1,tri_e2,mapUV,hit))) return epilog(hit.valid,hit); return false; } - template<typename Epilog> + template<typename UVMapper, typename Epilog> __forceinline bool intersect(RayK<K>& ray, size_t k, const Vec3vf<M>& v0, const Vec3vf<M>& v1, - const Vec3vf<M>& v2, + const Vec3vf<M>& v2, + const UVMapper& mapUV, const Epilog& epilog) const { const Vec3vf<M> e1 = v0-v1; const Vec3vf<M> e2 = v2-v0; - return intersectEdge(ray,k,v0,e1,e2,epilog); + return intersectEdge(ray,k,v0,e1,e2,mapUV,epilog); } template<typename Epilog> __forceinline bool intersect(RayK<K>& ray, size_t k, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) const + { + return intersect(ray,k,v0,v1,v2,UVIdentity<M>(),epilog); + } + + template<typename UVMapper, typename Epilog> + __forceinline bool intersect(RayK<K>& ray, + size_t k, const BBox<vfloat<M>>& time_range, const Vec3vf<M>& v0, const Vec3vf<M>& v1, const Vec3vf<M>& v2, + const UVMapper& mapUV, const Epilog& epilog) const { const Vec3vf<M> e1 = v0-v1; const Vec3vf<M> e2 = v2-v0; - return intersectEdge(ray,k,time_range,v0,e1,e2,epilog); + return intersectEdge(ray,k,time_range,v0,e1,e2,mapUV,epilog); } }; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h b/thirdparty/embree/kernels/geometry/triangle_intersector_pluecker.h index f1de99d208..8fbefcea88 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_pluecker.h +++ b/thirdparty/embree/kernels/geometry/triangle_intersector_pluecker.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -22,50 +22,60 @@ namespace embree template<int M, typename UVMapper> struct PlueckerHitM { - __forceinline PlueckerHitM(const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& UVW, const vfloat<M>& t, const Vec3vf<M>& Ng, const UVMapper& mapUV) - : U(U), V(V), UVW(UVW), mapUV(mapUV), vt(t), vNg(Ng) {} + __forceinline PlueckerHitM(const UVMapper& mapUV) : mapUV(mapUV) {} + + __forceinline PlueckerHitM(const vbool<M>& valid, const vfloat<M>& U, const vfloat<M>& V, const vfloat<M>& UVW, const vfloat<M>& t, const Vec3vf<M>& Ng, const UVMapper& mapUV) + : U(U), V(V), UVW(UVW), mapUV(mapUV), valid(valid), vt(t), vNg(Ng) {} __forceinline void finalize() { const vbool<M> invalid = abs(UVW) < min_rcp_input; const vfloat<M> rcpUVW = select(invalid,vfloat<M>(0.0f),rcp(UVW)); - vu = U * rcpUVW; - vv = V * rcpUVW; - mapUV(vu,vv); + vu = min(U * rcpUVW,1.0f); + vv = min(V * rcpUVW,1.0f); + mapUV(vu,vv,vNg); } - + + __forceinline Vec2vf<M> uv() const { return Vec2vf<M>(vu,vv); } + __forceinline vfloat<M> t () const { return vt; } + __forceinline Vec3vf<M> Ng() const { return vNg; } + __forceinline Vec2f uv (const size_t i) const { return Vec2f(vu[i],vv[i]); } __forceinline float t (const size_t i) const { return vt[i]; } __forceinline Vec3fa Ng(const size_t i) const { return Vec3fa(vNg.x[i],vNg.y[i],vNg.z[i]); } - private: - const vfloat<M> U; - const vfloat<M> V; - const vfloat<M> UVW; + public: + vfloat<M> U; + vfloat<M> V; + vfloat<M> UVW; const UVMapper& mapUV; public: + vbool<M> valid; vfloat<M> vu; vfloat<M> vv; vfloat<M> vt; Vec3vf<M> vNg; }; - template<int M> + template<int M, bool early_out = true> struct PlueckerIntersector1 { __forceinline PlueckerIntersector1() {} __forceinline PlueckerIntersector1(const Ray& ray, const void* ptr) {} - template<typename UVMapper, typename Epilog> - __forceinline bool intersect(Ray& ray, + template<typename UVMapper> + __forceinline bool intersect(const vbool<M>& valid0, + Ray& ray, const Vec3vf<M>& tri_v0, const Vec3vf<M>& tri_v1, const Vec3vf<M>& tri_v2, const UVMapper& mapUV, - const Epilog& epilog) const + PlueckerHitM<M,UVMapper>& hit) const { + vbool<M> valid = valid0; + /* calculate vertices relative to ray origin */ const Vec3vf<M> O = Vec3vf<M>((Vec3fa)ray.org); const Vec3vf<M> D = Vec3vf<M>((Vec3fa)ray.dir); @@ -85,11 +95,11 @@ namespace embree const vfloat<M> UVW = U+V+W; const vfloat<M> eps = float(ulp)*abs(UVW); #if defined(EMBREE_BACKFACE_CULLING) - vbool<M> valid = max(U,V,W) <= eps; + valid &= max(U,V,W) <= eps; #else - vbool<M> valid = (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); + valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); #endif - if (unlikely(none(valid))) return false; + if (unlikely(early_out && none(valid))) return false; /* calculate geometry normal and denominator */ const Vec3vf<M> Ng = stable_triangle_normal(e0,e1,e2); @@ -100,33 +110,123 @@ namespace embree const vfloat<M> t = rcp(den)*T; valid &= vfloat<M>(ray.tnear()) <= t & t <= vfloat<M>(ray.tfar); valid &= den != vfloat<M>(zero); - if (unlikely(none(valid))) return false; + if (unlikely(early_out && none(valid))) return false; /* update hit information */ - PlueckerHitM<M,UVMapper> hit(U,V,UVW,t,Ng,mapUV); - return epilog(valid,hit); + new (&hit) PlueckerHitM<M,UVMapper>(valid,U,V,UVW,t,Ng,mapUV); + return true; + } + + template<typename UVMapper> + __forceinline bool intersectEdge(const vbool<M>& valid, + Ray& ray, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const UVMapper& mapUV, + PlueckerHitM<M,UVMapper>& hit) const + { + return intersect(valid,ray,tri_v0,tri_v1,tri_v2,mapUV,hit); + } + + template<typename UVMapper> + __forceinline bool intersectEdge(Ray& ray, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const UVMapper& mapUV, + PlueckerHitM<M,UVMapper>& hit) const + { + vbool<M> valid = true; + return intersect(valid,ray,tri_v0,tri_v1,tri_v2,mapUV,hit); + } + + template<typename UVMapper> + __forceinline bool intersect(Ray& ray, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const UVMapper& mapUV, + PlueckerHitM<M,UVMapper>& hit) const + { + return intersectEdge(ray,tri_v0,tri_v1,tri_v2,mapUV,hit); + } + + template<typename UVMapper, typename Epilog> + __forceinline bool intersectEdge(Ray& ray, + const Vec3vf<M>& v0, + const Vec3vf<M>& e1, + const Vec3vf<M>& e2, + const UVMapper& mapUV, + const Epilog& epilog) const + { + PlueckerHitM<M,UVMapper> hit(mapUV); + if (likely(intersectEdge(ray,v0,e1,e2,mapUV,hit))) return epilog(hit.valid,hit); + return false; + } + + template<typename UVMapper, typename Epilog> + __forceinline bool intersect(Ray& ray, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const UVMapper& mapUV, + const Epilog& epilog) const + { + PlueckerHitM<M,UVMapper> hit(mapUV); + if (likely(intersect(ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit); + return false; } + + template<typename Epilog> + __forceinline bool intersect(Ray& ray, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const Epilog& epilog) const + { + auto mapUV = UVIdentity<M>(); + PlueckerHitM<M,UVIdentity<M>> hit(mapUV); + if (likely(intersect(ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit); + return false; + } + + template<typename UVMapper, typename Epilog> + __forceinline bool intersect(const vbool<M>& valid, + Ray& ray, + const Vec3vf<M>& v0, + const Vec3vf<M>& v1, + const Vec3vf<M>& v2, + const UVMapper& mapUV, + const Epilog& epilog) const + { + PlueckerHitM<M,UVMapper> hit(mapUV); + if (likely(intersect(valid,ray,v0,v1,v2,mapUV,hit))) return epilog(hit.valid,hit); + return false; + } + }; template<int K, typename UVMapper> struct PlueckerHitK { + __forceinline PlueckerHitK(const UVMapper& mapUV) : mapUV(mapUV) {} + __forceinline PlueckerHitK(const vfloat<K>& U, const vfloat<K>& V, const vfloat<K>& UVW, const vfloat<K>& t, const Vec3vf<K>& Ng, const UVMapper& mapUV) - : U(U), V(V), UVW(UVW), t(t), Ng(Ng), mapUV(mapUV) {} + : U(U), V(V), UVW(UVW), t(t), Ng(Ng), mapUV(mapUV) {} __forceinline std::tuple<vfloat<K>,vfloat<K>,vfloat<K>,Vec3vf<K>> operator() () const { const vbool<K> invalid = abs(UVW) < min_rcp_input; const vfloat<K> rcpUVW = select(invalid,vfloat<K>(0.0f),rcp(UVW)); - vfloat<K> u = U * rcpUVW; - vfloat<K> v = V * rcpUVW; - mapUV(u,v); - return std::make_tuple(u,v,t,Ng); + vfloat<K> u = min(U * rcpUVW,1.0f); + vfloat<K> v = min(V * rcpUVW,1.0f); + Vec3vf<K> vNg = Ng; + mapUV(u,v,vNg); + return std::make_tuple(u,v,t,vNg); } - - private: - const vfloat<K> U; - const vfloat<K> V; + vfloat<K> U; + vfloat<K> V; const vfloat<K> UVW; const vfloat<K> t; const Vec3vf<K> Ng; @@ -136,17 +236,18 @@ namespace embree template<int M, int K> struct PlueckerIntersectorK { + __forceinline PlueckerIntersectorK() {} __forceinline PlueckerIntersectorK(const vbool<K>& valid, const RayK<K>& ray) {} /*! Intersects K rays with one of M triangles. */ - template<typename UVMapper, typename Epilog> + template<typename UVMapper> __forceinline vbool<K> intersectK(const vbool<K>& valid0, - RayK<K>& ray, - const Vec3vf<K>& tri_v0, - const Vec3vf<K>& tri_v1, - const Vec3vf<K>& tri_v2, - const UVMapper& mapUV, - const Epilog& epilog) const + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const UVMapper& mapUV, + PlueckerHitK<K,UVMapper> &hit) const { /* calculate vertices relative to ray origin */ vbool<K> valid = valid0; @@ -172,7 +273,7 @@ namespace embree #else valid &= (min(U,V,W) >= -eps) | (max(U,V,W) <= eps); #endif - if (unlikely(none(valid))) return false; + if (unlikely(none(valid))) return valid; /* calculate geometry normal and denominator */ const Vec3vf<K> Ng = stable_triangle_normal(e0,e1,e2); @@ -183,21 +284,49 @@ namespace embree const vfloat<K> t = rcp(den)*T; valid &= ray.tnear() <= t & t <= ray.tfar; valid &= den != vfloat<K>(zero); - if (unlikely(none(valid))) return false; + if (unlikely(none(valid))) return valid; /* calculate hit information */ - PlueckerHitK<K,UVMapper> hit(U,V,UVW,t,Ng,mapUV); - return epilog(valid,hit); + new (&hit) PlueckerHitK<K,UVMapper>(U,V,UVW,t,Ng,mapUV); + return valid; + } + + template<typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const Epilog& epilog) const + { + UVIdentity<K> mapUV; + PlueckerHitK<K,UVIdentity<K>> hit(mapUV); + const vbool<K> valid = intersectK(valid0,ray,tri_v0,tri_v1,tri_v2,mapUV,hit); + return epilog(valid,hit); } - /*! Intersect k'th ray from ray packet of size K with M triangles. */ template<typename UVMapper, typename Epilog> + __forceinline vbool<K> intersectK(const vbool<K>& valid0, + RayK<K>& ray, + const Vec3vf<K>& tri_v0, + const Vec3vf<K>& tri_v1, + const Vec3vf<K>& tri_v2, + const UVMapper& mapUV, + const Epilog& epilog) const + { + PlueckerHitK<K,UVMapper> hit(mapUV); + const vbool<K> valid = intersectK(valid0,ray,tri_v0,tri_v1,tri_v2,mapUV,hit); + return epilog(valid,hit); + } + + /*! Intersect k'th ray from ray packet of size K with M triangles. */ + template<typename UVMapper> __forceinline bool intersect(RayK<K>& ray, size_t k, const Vec3vf<M>& tri_v0, const Vec3vf<M>& tri_v1, const Vec3vf<M>& tri_v2, const UVMapper& mapUV, - const Epilog& epilog) const + PlueckerHitM<M,UVMapper> &hit) const { /* calculate vertices relative to ray origin */ const Vec3vf<M> O = broadcast<vfloat<M>>(ray.org,k); @@ -211,10 +340,12 @@ namespace embree const Vec3vf<M> e1 = v0-v1; const Vec3vf<M> e2 = v1-v2; + /* perform edge tests */ const vfloat<M> U = dot(cross(e0,v2+v0),D); const vfloat<M> V = dot(cross(e1,v0+v1),D); const vfloat<M> W = dot(cross(e2,v1+v2),D); + const vfloat<M> UVW = U+V+W; const vfloat<M> eps = float(ulp)*abs(UVW); #if defined(EMBREE_BACKFACE_CULLING) @@ -239,9 +370,38 @@ namespace embree if (unlikely(none(valid))) return false; /* update hit information */ - PlueckerHitM<M,UVMapper> hit(U,V,UVW,t,Ng,mapUV); - return epilog(valid,hit); + new (&hit) PlueckerHitM<M,UVMapper>(valid,U,V,UVW,t,Ng,mapUV); + return true; } + + template<typename UVMapper, typename Epilog> + __forceinline bool intersect(RayK<K>& ray, size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const UVMapper& mapUV, + const Epilog& epilog) const + { + PlueckerHitM<M,UVMapper> hit(mapUV); + if (intersect(ray,k,tri_v0,tri_v1,tri_v2,mapUV,hit)) + return epilog(hit.valid,hit); + return false; + } + + template<typename Epilog> + __forceinline bool intersect(RayK<K>& ray, size_t k, + const Vec3vf<M>& tri_v0, + const Vec3vf<M>& tri_v1, + const Vec3vf<M>& tri_v2, + const Epilog& epilog) const + { + UVIdentity<M> mapUV; + PlueckerHitM<M,UVIdentity<M>> hit(mapUV); + if (intersect(ray,k,tri_v0,tri_v1,tri_v2,mapUV,hit)) + return epilog(hit.valid,hit); + return false; + } + }; } } diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h b/thirdparty/embree/kernels/geometry/triangle_intersector_woop.h index 63e649d8fb..f05dcc4537 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle_intersector_woop.h +++ b/thirdparty/embree/kernels/geometry/triangle_intersector_woop.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h b/thirdparty/embree/kernels/geometry/triangle_triangle_intersector.h index 91b35c36f3..50106bcc16 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/triangle_triangle_intersector.h +++ b/thirdparty/embree/kernels/geometry/triangle_triangle_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #include "primitive.h" diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglei.h b/thirdparty/embree/kernels/geometry/trianglei.h index 4f3118cc0c..6aad48a5ef 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglei.h +++ b/thirdparty/embree/kernels/geometry/trianglei.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -343,7 +343,7 @@ namespace embree const TriangleMesh* mesh = scene->get<TriangleMesh>(geomID(index)); vfloat<K> ftime; - const vint<K> itime = mesh->timeSegment(time, ftime); + const vint<K> itime = mesh->timeSegment<K>(time, ftime); const size_t first = bsf(movemask(valid)); if (likely(all(valid,itime[first] == itime))) @@ -352,9 +352,9 @@ namespace embree p1 = getVertex<1>(index, scene, itime[first], ftime); p2 = getVertex<2>(index, scene, itime[first], ftime); } else { - p0 = getVertex<0>(valid, index, scene, itime, ftime); - p1 = getVertex<1>(valid, index, scene, itime, ftime); - p2 = getVertex<2>(valid, index, scene, itime, ftime); + p0 = getVertex<0,K>(valid, index, scene, itime, ftime); + p1 = getVertex<1,K>(valid, index, scene, itime, ftime); + p2 = getVertex<2,K>(valid, index, scene, itime, ftime); } } diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h b/thirdparty/embree/kernels/geometry/trianglei_intersector.h index e2f106a62c..f7deb9e72d 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglei_intersector.h +++ b/thirdparty/embree/kernels/geometry/trianglei_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -12,24 +12,24 @@ namespace embree namespace isa { /*! Intersects M triangles with 1 ray */ - template<int M, int Mx, bool filter> + template<int M, bool filter> struct TriangleMiIntersector1Moeller { typedef TriangleMi<M> Primitive; - typedef MoellerTrumboreIntersector1<Mx> Precalculations; + typedef MoellerTrumboreIntersector1<M> Precalculations; static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) { STAT3(normal.trav_prims,1,1,1); Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) { STAT3(shadow.trav_prims,1,1,1); Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - return pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) @@ -39,11 +39,11 @@ namespace embree }; /*! Intersects M triangles with K rays */ - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct TriangleMiIntersectorKMoeller { typedef TriangleMi<M> Primitive; - typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; + typedef MoellerTrumboreIntersectorK<M,K> Precalculations; static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) { @@ -55,7 +55,7 @@ namespace embree const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); - pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); } } @@ -71,7 +71,7 @@ namespace embree const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); - pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); if (none(valid0)) break; } return !valid0; @@ -81,36 +81,36 @@ namespace embree { STAT3(normal.trav_prims,1,1,1); Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) { STAT3(shadow.trav_prims,1,1,1); Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - return pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } }; /*! Intersects M triangles with 1 ray */ - template<int M, int Mx, bool filter> + template<int M, bool filter> struct TriangleMiIntersector1Pluecker { typedef TriangleMi<M> Primitive; - typedef PlueckerIntersector1<Mx> Precalculations; + typedef PlueckerIntersector1<M> Precalculations; static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) { STAT3(normal.trav_prims,1,1,1); Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) { STAT3(shadow.trav_prims,1,1,1); Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) @@ -120,11 +120,11 @@ namespace embree }; /*! Intersects M triangles with K rays */ - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct TriangleMiIntersectorKPluecker { typedef TriangleMi<M> Primitive; - typedef PlueckerIntersectorK<Mx,K> Precalculations; + typedef PlueckerIntersectorK<M,K> Precalculations; static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) { @@ -136,7 +136,7 @@ namespace embree const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); - pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); } } @@ -152,7 +152,7 @@ namespace embree const Vec3vf<K> v0 = tri.template getVertex<0>(i,scene); const Vec3vf<K> v1 = tri.template getVertex<1>(i,scene); const Vec3vf<K> v2 = tri.template getVertex<2>(i,scene); - pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); if (none(valid0)) break; } return !valid0; @@ -162,30 +162,30 @@ namespace embree { STAT3(normal.trav_prims,1,1,1); Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) { STAT3(shadow.trav_prims,1,1,1); Vec3vf<M> v0, v1, v2; tri.gather(v0,v1,v2,context->scene); - return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } }; /*! Intersects M motion blur triangles with 1 ray */ - template<int M, int Mx, bool filter> + template<int M, bool filter> struct TriangleMiMBIntersector1Moeller { typedef TriangleMi<M> Primitive; - typedef MoellerTrumboreIntersector1<Mx> Precalculations; + typedef MoellerTrumboreIntersector1<M> Precalculations; /*! Intersect a ray with the M triangles and updates the hit. */ static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) { STAT3(normal.trav_prims,1,1,1); Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); - pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of M triangles. */ @@ -193,7 +193,7 @@ namespace embree { STAT3(shadow.trav_prims,1,1,1); Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); - return pre.intersect(ray,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) @@ -203,11 +203,11 @@ namespace embree }; /*! Intersects M motion blur triangles with K rays. */ - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct TriangleMiMBIntersectorKMoeller { typedef TriangleMi<M> Primitive; - typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; + typedef MoellerTrumboreIntersectorK<M,K> Precalculations; /*! Intersects K rays with M triangles. */ static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri) @@ -216,8 +216,8 @@ namespace embree { if (!tri.valid(i)) break; STAT3(normal.trav_prims,1,popcnt(valid_i),K); - Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); - pre.intersectK(valid_i,ray,v0,v1,v2,/*UVIdentity<K>(),*/IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + Vec3vf<K> v0,v1,v2; tri.template gather<K>(valid_i,v0,v1,v2,i,context->scene,ray.time()); + pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); } } @@ -229,8 +229,8 @@ namespace embree { if (!tri.valid(i)) break; STAT3(shadow.trav_prims,1,popcnt(valid0),K); - Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); - pre.intersectK(valid0,ray,v0,v1,v2,/*UVIdentity<K>(),*/OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + Vec3vf<K> v0,v1,v2; tri.template gather<K>(valid_i,v0,v1,v2,i,context->scene,ray.time()); + pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); if (none(valid0)) break; } return !valid0; @@ -241,7 +241,7 @@ namespace embree { STAT3(normal.trav_prims,1,1,1); Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); - pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of the M triangles. */ @@ -249,23 +249,23 @@ namespace embree { STAT3(shadow.trav_prims,1,1,1); Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); - return pre.intersect(ray,k,v0,v1,v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } }; /*! Intersects M motion blur triangles with 1 ray */ - template<int M, int Mx, bool filter> + template<int M, bool filter> struct TriangleMiMBIntersector1Pluecker { typedef TriangleMi<M> Primitive; - typedef PlueckerIntersector1<Mx> Precalculations; + typedef PlueckerIntersector1<M> Precalculations; /*! Intersect a ray with the M triangles and updates the hit. */ static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) { STAT3(normal.trav_prims,1,1,1); Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); - pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of M triangles. */ @@ -273,7 +273,7 @@ namespace embree { STAT3(shadow.trav_prims,1,1,1); Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()); - return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) @@ -283,11 +283,11 @@ namespace embree }; /*! Intersects M motion blur triangles with K rays. */ - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct TriangleMiMBIntersectorKPluecker { typedef TriangleMi<M> Primitive; - typedef PlueckerIntersectorK<Mx,K> Precalculations; + typedef PlueckerIntersectorK<M,K> Precalculations; /*! Intersects K rays with M triangles. */ static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMi<M>& tri) @@ -296,8 +296,8 @@ namespace embree { if (!tri.valid(i)) break; STAT3(normal.trav_prims,1,popcnt(valid_i),K); - Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); - pre.intersectK(valid_i,ray,v0,v1,v2,UVIdentity<K>(),IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); + Vec3vf<K> v0,v1,v2; tri.template gather<K>(valid_i,v0,v1,v2,i,context->scene,ray.time()); + pre.intersectK(valid_i,ray,v0,v1,v2,IntersectKEpilogM<M,K,filter>(ray,context,tri.geomID(),tri.primID(),i)); } } @@ -309,8 +309,8 @@ namespace embree { if (!tri.valid(i)) break; STAT3(shadow.trav_prims,1,popcnt(valid0),K); - Vec3vf<K> v0,v1,v2; tri.gather(valid_i,v0,v1,v2,i,context->scene,ray.time()); - pre.intersectK(valid0,ray,v0,v1,v2,UVIdentity<K>(),OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); + Vec3vf<K> v0,v1,v2; tri.template gather<K>(valid_i,v0,v1,v2,i,context->scene,ray.time()); + pre.intersectK(valid0,ray,v0,v1,v2,OccludedKEpilogM<M,K,filter>(valid0,ray,context,tri.geomID(),tri.primID(),i)); if (none(valid0)) break; } return !valid0; @@ -321,7 +321,7 @@ namespace embree { STAT3(normal.trav_prims,1,1,1); Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); - pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of the M triangles. */ @@ -329,7 +329,7 @@ namespace embree { STAT3(shadow.trav_prims,1,1,1); Vec3vf<M> v0,v1,v2; tri.gather(v0,v1,v2,context->scene,ray.time()[k]); - return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } }; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev.h b/thirdparty/embree/kernels/geometry/trianglev.h index 19af389e73..cd94756b9e 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglev.h +++ b/thirdparty/embree/kernels/geometry/trianglev.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h b/thirdparty/embree/kernels/geometry/trianglev_intersector.h index 6af0d5a11c..3abb7f8e32 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_intersector.h +++ b/thirdparty/embree/kernels/geometry/trianglev_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -13,24 +13,24 @@ namespace embree namespace isa { /*! Intersects M triangles with 1 ray */ - template<int M, int Mx, bool filter> + template<int M, bool filter> struct TriangleMvIntersector1Moeller { typedef TriangleMv<M> Primitive; - typedef MoellerTrumboreIntersector1<Mx> Precalculations; + typedef MoellerTrumboreIntersector1<M> Precalculations; /*! Intersect a ray with M triangles and updates the hit. */ static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) { STAT3(normal.trav_prims,1,1,1); - pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<M>(),*/Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of the M triangles. */ static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) { STAT3(shadow.trav_prims,1,1,1); - return pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + return pre.intersect(ray,tri.v0,tri.v1,tri.v2,/*UVIdentity<M>(),*/Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) @@ -40,25 +40,25 @@ namespace embree }; - template<int M, int Mx, bool filter> + template<int M, bool filter> struct TriangleMvIntersector1Woop { typedef TriangleMv<M> Primitive; - typedef WoopIntersector1<Mx> intersec; + typedef WoopIntersector1<M> intersec; typedef WoopPrecalculations1<M> Precalculations; /*! Intersect a ray with M triangles and updates the hit. */ static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) { STAT3(normal.trav_prims,1,1,1); - intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of the M triangles. */ static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) { STAT3(shadow.trav_prims,1,1,1); - return intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + return intersec::intersect(ray,pre,tri.v0,tri.v1,tri.v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) @@ -69,11 +69,11 @@ namespace embree /*! Intersects M triangles with K rays */ - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct TriangleMvIntersectorKMoeller { typedef TriangleMv<M> Primitive; - typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; + typedef MoellerTrumboreIntersectorK<M,K> Precalculations; /*! Intersects K rays with M triangles. */ static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) @@ -111,36 +111,36 @@ namespace embree static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) { STAT3(normal.trav_prims,1,1,1); - pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx + pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<M>(),*/Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M } /*! Test if the ray is occluded by one of the M triangles. */ static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) { STAT3(shadow.trav_prims,1,1,1); - return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<Mx>(),*/Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx + return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,/*UVIdentity<M>(),*/Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M } }; /*! Intersects M triangles with 1 ray */ - template<int M, int Mx, bool filter> + template<int M, bool filter> struct TriangleMvIntersector1Pluecker { typedef TriangleMv<M> Primitive; - typedef PlueckerIntersector1<Mx> Precalculations; + typedef PlueckerIntersector1<M> Precalculations; /*! Intersect a ray with M triangles and updates the hit. */ static __forceinline void intersect(Precalculations& pre, RayHit& ray, IntersectContext* context, const Primitive& tri) { STAT3(normal.trav_prims,1,1,1); - pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<M>(),Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of the M triangles. */ static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const Primitive& tri) { STAT3(shadow.trav_prims,1,1,1); - return pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + return pre.intersect(ray,tri.v0,tri.v1,tri.v2,UVIdentity<M>(),Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) @@ -150,11 +150,11 @@ namespace embree }; /*! Intersects M triangles with K rays */ - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct TriangleMvIntersectorKPluecker { typedef TriangleMv<M> Primitive; - typedef PlueckerIntersectorK<Mx,K> Precalculations; + typedef PlueckerIntersectorK<M,K> Precalculations; /*! Intersects K rays with M triangles. */ static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const Primitive& tri) @@ -192,14 +192,14 @@ namespace embree static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) { STAT3(normal.trav_prims,1,1,1); - pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx + pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<M>(),Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of the M triangles. */ static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const Primitive& tri) { STAT3(shadow.trav_prims,1,1,1); - return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); //FIXME: M,Mx + return pre.intersect(ray,k,tri.v0,tri.v1,tri.v2,UVIdentity<M>(),Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } }; } diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h b/thirdparty/embree/kernels/geometry/trianglev_mb.h index 63137aee16..b550a29fd5 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb.h +++ b/thirdparty/embree/kernels/geometry/trianglev_mb.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h b/thirdparty/embree/kernels/geometry/trianglev_mb_intersector.h index 35a260d826..38cd52e85d 100644 --- a/thirdparty/embree-aarch64/kernels/geometry/trianglev_mb_intersector.h +++ b/thirdparty/embree/kernels/geometry/trianglev_mb_intersector.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -11,32 +11,32 @@ namespace embree namespace isa { /*! Intersects M motion blur triangles with 1 ray */ - template<int M, int Mx, bool filter> + template<int M, bool filter> struct TriangleMvMBIntersector1Moeller { typedef TriangleMvMB<M> Primitive; - typedef MoellerTrumboreIntersector1<Mx> Precalculations; + typedef MoellerTrumboreIntersector1<M> Precalculations; /*! Intersect a ray with the M triangles and updates the hit. */ static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleMvMB<M>& tri) { STAT3(normal.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + const Vec3vf<M> time(ray.time()); + const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0)); + const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1)); + const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2)); + pre.intersect(ray,v0,v1,v2,Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of M triangles. */ static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleMvMB<M>& tri) { STAT3(shadow.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + const Vec3vf<M> time(ray.time()); + const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0)); + const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1)); + const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2)); + return pre.intersect(ray,v0,v1,v2,Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) @@ -46,11 +46,11 @@ namespace embree }; /*! Intersects M motion blur triangles with K rays. */ - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct TriangleMvMBIntersectorKMoeller { typedef TriangleMvMB<M> Primitive; - typedef MoellerTrumboreIntersectorK<Mx,K> Precalculations; + typedef MoellerTrumboreIntersectorK<M,K> Precalculations; /*! Intersects K rays with M triangles. */ static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri) @@ -90,52 +90,52 @@ namespace embree static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) { STAT3(normal.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()[k]); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + const Vec3vf<M> time(ray.time()[k]); + const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0)); + const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1)); + const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2)); + pre.intersect(ray,k,v0,v1,v2,Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of the M triangles. */ static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) { STAT3(shadow.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()[k]); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + const Vec3vf<M> time(ray.time()[k]); + const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0)); + const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1)); + const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2)); + return pre.intersect(ray,k,v0,v1,v2,Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } }; /*! Intersects M motion blur triangles with 1 ray */ - template<int M, int Mx, bool filter> + template<int M, bool filter> struct TriangleMvMBIntersector1Pluecker { typedef TriangleMvMB<M> Primitive; - typedef PlueckerIntersector1<Mx> Precalculations; + typedef PlueckerIntersector1<M> Precalculations; /*! Intersect a ray with the M triangles and updates the hit. */ static __forceinline void intersect(const Precalculations& pre, RayHit& ray, IntersectContext* context, const TriangleMvMB<M>& tri) { STAT3(normal.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Intersect1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + const Vec3vf<M> time(ray.time()); + const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0)); + const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1)); + const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2)); + pre.intersect(ray,v0,v1,v2,UVIdentity<M>(),Intersect1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of M triangles. */ static __forceinline bool occluded(const Precalculations& pre, Ray& ray, IntersectContext* context, const TriangleMvMB<M>& tri) { STAT3(shadow.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - return pre.intersect(ray,v0,v1,v2,UVIdentity<Mx>(),Occluded1EpilogM<M,Mx,filter>(ray,context,tri.geomID(),tri.primID())); + const Vec3vf<M> time(ray.time()); + const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0)); + const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1)); + const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2)); + return pre.intersect(ray,v0,v1,v2,UVIdentity<M>(),Occluded1EpilogM<M,filter>(ray,context,tri.geomID(),tri.primID())); } static __forceinline bool pointQuery(PointQuery* query, PointQueryContext* context, const Primitive& tri) @@ -145,11 +145,11 @@ namespace embree }; /*! Intersects M motion blur triangles with K rays. */ - template<int M, int Mx, int K, bool filter> + template<int M, int K, bool filter> struct TriangleMvMBIntersectorKPluecker { typedef TriangleMvMB<M> Primitive; - typedef PlueckerIntersectorK<Mx,K> Precalculations; + typedef PlueckerIntersectorK<M,K> Precalculations; /*! Intersects K rays with M triangles. */ static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, IntersectContext* context, const TriangleMvMB<M>& tri) @@ -189,22 +189,22 @@ namespace embree static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) { STAT3(normal.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()[k]); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Intersect1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + const Vec3vf<M> time(ray.time()[k]); + const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0)); + const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1)); + const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2)); + pre.intersect(ray,k,v0,v1,v2,UVIdentity<M>(),Intersect1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } /*! Test if the ray is occluded by one of the M triangles. */ static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, IntersectContext* context, const TriangleMvMB<M>& tri) { STAT3(shadow.trav_prims,1,1,1); - const Vec3vf<Mx> time(ray.time()[k]); - const Vec3vf<Mx> v0 = madd(time,Vec3vf<Mx>(tri.dv0),Vec3vf<Mx>(tri.v0)); - const Vec3vf<Mx> v1 = madd(time,Vec3vf<Mx>(tri.dv1),Vec3vf<Mx>(tri.v1)); - const Vec3vf<Mx> v2 = madd(time,Vec3vf<Mx>(tri.dv2),Vec3vf<Mx>(tri.v2)); - return pre.intersect(ray,k,v0,v1,v2,UVIdentity<Mx>(),Occluded1KEpilogM<M,Mx,K,filter>(ray,k,context,tri.geomID(),tri.primID())); + const Vec3vf<M> time(ray.time()[k]); + const Vec3vf<M> v0 = madd(time,Vec3vf<M>(tri.dv0),Vec3vf<M>(tri.v0)); + const Vec3vf<M> v1 = madd(time,Vec3vf<M>(tri.dv1),Vec3vf<M>(tri.v1)); + const Vec3vf<M> v2 = madd(time,Vec3vf<M>(tri.dv2),Vec3vf<M>(tri.v2)); + return pre.intersect(ray,k,v0,v1,v2,UVIdentity<M>(),Occluded1KEpilogM<M,K,filter>(ray,k,context,tri.geomID(),tri.primID())); } }; } diff --git a/thirdparty/embree-aarch64/kernels/hash.h b/thirdparty/embree/kernels/hash.h index 4abbe203d6..10f315cee7 100644 --- a/thirdparty/embree-aarch64/kernels/hash.h +++ b/thirdparty/embree/kernels/hash.h @@ -2,4 +2,4 @@ // Copyright 2009-2020 Intel Corporation // SPDX-License-Identifier: Apache-2.0 -#define RTC_HASH "6ef362f99af80c9dfe8dd2bfc582d9067897edc6" +#define RTC_HASH "7c53133eb21424f7f0ae1e25bf357e358feaf6ab" diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bezier_curve.h b/thirdparty/embree/kernels/subdiv/bezier_curve.h index c0e78820f8..a5adad5cc9 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/bezier_curve.h +++ b/thirdparty/embree/kernels/subdiv/bezier_curve.h @@ -1,10 +1,11 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once #include "../common/default.h" -#include "../common/scene_curves.h" +//#include "../common/scene_curves.h" +#include "../common/context.h" namespace embree { @@ -659,6 +660,7 @@ namespace embree return numRoots(v0,v1) + numRoots(v1,v2) + numRoots(v2,v3); } + template<typename CurveGeometry> __forceinline CubicBezierCurve<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CubicBezierCurve<Vec3ff>& curve) { return CubicBezierCurve<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0), diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bezier_patch.h b/thirdparty/embree/kernels/subdiv/bezier_patch.h index d87ed41ccb..2ff03902a7 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/bezier_patch.h +++ b/thirdparty/embree/kernels/subdiv/bezier_patch.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bilinear_patch.h b/thirdparty/embree/kernels/subdiv/bilinear_patch.h index 35748754bd..cade104a6c 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/bilinear_patch.h +++ b/thirdparty/embree/kernels/subdiv/bilinear_patch.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bspline_curve.h b/thirdparty/embree/kernels/subdiv/bspline_curve.h index a325667328..51489ef37c 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/bspline_curve.h +++ b/thirdparty/embree/kernels/subdiv/bspline_curve.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -161,8 +161,8 @@ namespace embree template<int M> __forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const { - p = veval(t); - dp = veval_du(t); + p = veval<M>(t); + dp = veval_du<M>(t); } template<int M> @@ -306,6 +306,7 @@ namespace embree ocurve = BezierCurveT<Vertex>(v0,v1,v2,v3); } + template<typename CurveGeometry> __forceinline BSplineCurveT<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const BSplineCurveT<Vec3ff>& curve) { return BSplineCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0), diff --git a/thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h b/thirdparty/embree/kernels/subdiv/bspline_patch.h index 9769bc17bd..ff47f01c7a 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/bspline_patch.h +++ b/thirdparty/embree/kernels/subdiv/bspline_patch.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_coefficients.h b/thirdparty/embree/kernels/subdiv/catmullclark_coefficients.h index 05031cf6b9..46959797bf 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_coefficients.h +++ b/thirdparty/embree/kernels/subdiv/catmullclark_coefficients.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_patch.h b/thirdparty/embree/kernels/subdiv/catmullclark_patch.h index ab1d63594a..91772d94ed 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_patch.h +++ b/thirdparty/embree/kernels/subdiv/catmullclark_patch.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_ring.h b/thirdparty/embree/kernels/subdiv/catmullclark_ring.h index 73b41fd4ff..e5ad5dadfe 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/catmullclark_ring.h +++ b/thirdparty/embree/kernels/subdiv/catmullclark_ring.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/catmullrom_curve.h b/thirdparty/embree/kernels/subdiv/catmullrom_curve.h index b244af481c..74fc4c1230 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/catmullrom_curve.h +++ b/thirdparty/embree/kernels/subdiv/catmullrom_curve.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -168,8 +168,8 @@ namespace embree template<int M> __forceinline void veval(const vfloat<M>& t, Vec4vf<M>& p, Vec4vf<M>& dp) const { - p = veval(t); - dp = veval_du(t); + p = veval<M>(t); + dp = veval_du<M>(t); } template<int M> @@ -283,6 +283,7 @@ namespace embree } }; + template<typename CurveGeometry> __forceinline CatmullRomCurveT<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const CatmullRomCurveT<Vec3ff>& curve) { return CatmullRomCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,curve.v0), diff --git a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval.h b/thirdparty/embree/kernels/subdiv/feature_adaptive_eval.h index 23f24c360c..58c0b63e62 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval.h +++ b/thirdparty/embree/kernels/subdiv/feature_adaptive_eval.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_grid.h b/thirdparty/embree/kernels/subdiv/feature_adaptive_eval_grid.h index 76583b2e5d..4755aba28d 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_grid.h +++ b/thirdparty/embree/kernels/subdiv/feature_adaptive_eval_grid.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_simd.h b/thirdparty/embree/kernels/subdiv/feature_adaptive_eval_simd.h index fa3216730f..edab0db12f 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/feature_adaptive_eval_simd.h +++ b/thirdparty/embree/kernels/subdiv/feature_adaptive_eval_simd.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/gregory_patch.h b/thirdparty/embree/kernels/subdiv/gregory_patch.h index 2a7c4b1f2c..9026d5c407 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/gregory_patch.h +++ b/thirdparty/embree/kernels/subdiv/gregory_patch.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/gregory_patch_dense.h b/thirdparty/embree/kernels/subdiv/gregory_patch_dense.h index 85effd02cf..4cf9a7e98f 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/gregory_patch_dense.h +++ b/thirdparty/embree/kernels/subdiv/gregory_patch_dense.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/gridrange.h b/thirdparty/embree/kernels/subdiv/gridrange.h index 4fd741c879..4f2b90d7bd 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/gridrange.h +++ b/thirdparty/embree/kernels/subdiv/gridrange.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/half_edge.h b/thirdparty/embree/kernels/subdiv/half_edge.h index fb350ca71f..baf019cd79 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/half_edge.h +++ b/thirdparty/embree/kernels/subdiv/half_edge.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -186,7 +186,7 @@ namespace embree { const HalfEdge* p = this; do { - if (p->vertexHasBorder()) return true; + if (p->vertexHasBorder() && (p->vertex_type != HalfEdge::NON_MANIFOLD_EDGE_VERTEX)) return true; p = p->next(); } while (p != this); return false; diff --git a/thirdparty/embree-aarch64/kernels/subdiv/hermite_curve.h b/thirdparty/embree/kernels/subdiv/hermite_curve.h index 9fab79cf0c..ffef5a4315 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/hermite_curve.h +++ b/thirdparty/embree/kernels/subdiv/hermite_curve.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -29,6 +29,7 @@ namespace embree } }; + template<typename CurveGeometry> __forceinline HermiteCurveT<Vec3ff> enlargeRadiusToMinWidth(const IntersectContext* context, const CurveGeometry* geom, const Vec3fa& ray_org, const HermiteCurveT<Vec3ff>& curve) { return HermiteCurveT<Vec3ff>(enlargeRadiusToMinWidth(context,geom,ray_org,BezierCurveT<Vec3ff>(curve))); } diff --git a/thirdparty/embree-aarch64/kernels/subdiv/linear_bezier_patch.h b/thirdparty/embree/kernels/subdiv/linear_bezier_patch.h index f4a854af7f..f8e8a25f35 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/linear_bezier_patch.h +++ b/thirdparty/embree/kernels/subdiv/linear_bezier_patch.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/patch.h b/thirdparty/embree/kernels/subdiv/patch.h index d58241b96d..c4340ea9b6 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/patch.h +++ b/thirdparty/embree/kernels/subdiv/patch.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval.h b/thirdparty/embree/kernels/subdiv/patch_eval.h index 482d015fa3..a3fafa72f4 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval.h +++ b/thirdparty/embree/kernels/subdiv/patch_eval.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval_grid.h b/thirdparty/embree/kernels/subdiv/patch_eval_grid.h index c05db55f4c..167e1ebe1c 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval_grid.h +++ b/thirdparty/embree/kernels/subdiv/patch_eval_grid.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval_simd.h b/thirdparty/embree/kernels/subdiv/patch_eval_simd.h index 28016d9e20..fef88a4492 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/patch_eval_simd.h +++ b/thirdparty/embree/kernels/subdiv/patch_eval_simd.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/subdivpatch1base.h b/thirdparty/embree/kernels/subdiv/subdivpatch1base.h index d5bc403cca..c3069dadee 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/subdivpatch1base.h +++ b/thirdparty/embree/kernels/subdiv/subdivpatch1base.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/tessellation.h b/thirdparty/embree/kernels/subdiv/tessellation.h index bda1e2d559..abde4f2bde 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/tessellation.h +++ b/thirdparty/embree/kernels/subdiv/tessellation.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once diff --git a/thirdparty/embree-aarch64/kernels/subdiv/tessellation_cache.h b/thirdparty/embree/kernels/subdiv/tessellation_cache.h index 5c215288b6..99edf49be4 100644 --- a/thirdparty/embree-aarch64/kernels/subdiv/tessellation_cache.h +++ b/thirdparty/embree/kernels/subdiv/tessellation_cache.h @@ -1,4 +1,4 @@ -// Copyright 2009-2020 Intel Corporation +// Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 #pragma once @@ -63,7 +63,7 @@ namespace embree static const size_t NUM_CACHE_SEGMENTS = 8; static const size_t NUM_PREALLOC_THREAD_WORK_STATES = 512; static const size_t COMMIT_INDEX_SHIFT = 32+8; -#if defined(__X86_64__) || defined(__aarch64__) +#if defined(__64BIT__) static const size_t REF_TAG_MASK = 0xffffffffff; #else static const size_t REF_TAG_MASK = 0x7FFFFFFF; diff --git a/thirdparty/embree/patches/godot-changes-android.patch b/thirdparty/embree/patches/godot-changes-android.patch new file mode 100644 index 0000000000..a27f924bde --- /dev/null +++ b/thirdparty/embree/patches/godot-changes-android.patch @@ -0,0 +1,103 @@ +diff --git a/thirdparty/embree/common/sys/sysinfo.cpp b/thirdparty/embree/common/sys/sysinfo.cpp +index ba97dc227b..1679599608 100644 +--- a/thirdparty/embree/common/sys/sysinfo.cpp ++++ b/thirdparty/embree/common/sys/sysinfo.cpp +@@ -618,7 +618,10 @@ namespace embree + static int nThreads = -1; + if (nThreads != -1) return nThreads; + +-#if defined(__MACOSX__) ++// -- GODOT start -- ++// #if defined(__MACOSX__) ++#if defined(__MACOSX__) || defined(__ANDROID__) ++// -- GODOT end -- + nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container + assert(nThreads); + #else +diff --git a/thirdparty/embree/common/sys/thread.cpp b/thirdparty/embree/common/sys/thread.cpp +index a7827e18f7..f4014be89b 100644 +--- a/thirdparty/embree/common/sys/thread.cpp ++++ b/thirdparty/embree/common/sys/thread.cpp +@@ -158,7 +158,9 @@ namespace embree + /// Linux Platform + //////////////////////////////////////////////////////////////////////////////// + +-#if defined(__LINUX__) ++// -- GODOT start -- ++#if defined(__LINUX__) && !defined(__ANDROID__) ++// -- GODOT end -- + + #include <fstream> + #include <sstream> +@@ -247,6 +249,28 @@ namespace embree + } + #endif + ++// -- GODOT start -- ++//////////////////////////////////////////////////////////////////////////////// ++/// Android Platform ++//////////////////////////////////////////////////////////////////////////////// ++ ++#if defined(__ANDROID__) ++ ++namespace embree ++{ ++ /*! set affinity of the calling thread */ ++ void setAffinity(ssize_t affinity) ++ { ++ cpu_set_t cset; ++ CPU_ZERO(&cset); ++ CPU_SET(affinity, &cset); ++ ++ sched_setaffinity(0, sizeof(cset), &cset); ++ } ++} ++#endif ++// -- GODOT end -- ++ + //////////////////////////////////////////////////////////////////////////////// + /// FreeBSD Platform + //////////////////////////////////////////////////////////////////////////////// +@@ -355,7 +379,9 @@ namespace embree + pthread_attr_destroy(&attr); + + /* set affinity */ +-#if defined(__LINUX__) ++// -- GODOT start -- ++#if defined(__LINUX__) && !defined(__ANDROID__) ++// -- GODOT end -- + if (threadID >= 0) { + cpu_set_t cset; + CPU_ZERO(&cset); +@@ -370,7 +396,16 @@ namespace embree + CPU_SET(threadID, &cset); + pthread_setaffinity_np(*tid, sizeof(cset), &cset); + } ++// -- GODOT start -- ++#elif defined(__ANDROID__) ++ if (threadID >= 0) { ++ cpu_set_t cset; ++ CPU_ZERO(&cset); ++ CPU_SET(threadID, &cset); ++ sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset); ++ } + #endif ++// -- GODOT end -- + + return thread_t(tid); + } +@@ -389,8 +424,14 @@ namespace embree + + /*! destroy a hardware thread by its handle */ + void destroyThread(thread_t tid) { ++// -- GODOT start -- ++#if defined(__ANDROID__) ++ FATAL("Can't destroy threads on Android."); ++#else + pthread_cancel(*(pthread_t*)tid); + delete (pthread_t*)tid; ++#endif ++// -- GODOT end -- + } + + /*! creates thread local storage */ diff --git a/thirdparty/embree/patches/godot-changes-misc.patch b/thirdparty/embree/patches/godot-changes-misc.patch new file mode 100644 index 0000000000..8bf0d9fa97 --- /dev/null +++ b/thirdparty/embree/patches/godot-changes-misc.patch @@ -0,0 +1,105 @@ +diff --git a/thirdparty/embree/common/sys/intrinsics.h b/thirdparty/embree/common/sys/intrinsics.h +index 79729c87ab..ed8dd7d40a 100644 +--- a/thirdparty/embree/common/sys/intrinsics.h ++++ b/thirdparty/embree/common/sys/intrinsics.h +@@ -34,8 +34,14 @@ + #endif + + #if defined(__WIN32__) +-# define NOMINMAX +-# include <windows.h> ++// -- GODOT start -- ++#if !defined(NOMINMAX) ++// -- GODOT end -- ++#define NOMINMAX ++// -- GODOT start -- ++#endif ++#include "windows.h" ++// -- GODOT end -- + #endif + + /* normally defined in pmmintrin.h, but we always need this */ +diff --git a/thirdparty/embree/common/sys/platform.h b/thirdparty/embree/common/sys/platform.h +index 3fc5e99b8d..697e07bb86 100644 +--- a/thirdparty/embree/common/sys/platform.h ++++ b/thirdparty/embree/common/sys/platform.h +@@ -99,7 +99,9 @@ + #define dll_import + #endif + +-#ifdef __WIN32__ ++// -- GODOT start -- ++#if defined(__WIN32__) && !defined(__MINGW32__) ++// -- GODOT end -- + #if !defined(__noinline) + #define __noinline __declspec(noinline) + #endif +@@ -149,6 +151,9 @@ + #define DELETED = delete + #endif + ++// -- GODOT start -- ++#if !defined(likely) ++// -- GODOT end -- + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + #define likely(expr) (expr) + #define unlikely(expr) (expr) +@@ -156,6 +161,9 @@ + #define likely(expr) __builtin_expect((bool)(expr),true ) + #define unlikely(expr) __builtin_expect((bool)(expr),false) + #endif ++// -- GODOT start -- ++#endif ++// -- GODOT end -- + + //////////////////////////////////////////////////////////////////////////////// + /// Error handling and debugging +diff --git a/thirdparty/embree/common/sys/sysinfo.cpp b/thirdparty/embree/common/sys/sysinfo.cpp +index ba97dc227b..f1a59e511e 100644 +--- a/thirdparty/embree/common/sys/sysinfo.cpp ++++ b/thirdparty/embree/common/sys/sysinfo.cpp +@@ -248,7 +248,9 @@ namespace embree + #if defined(__X86_ASM__) + __noinline int64_t get_xcr0() + { +-#if defined (__WIN32__) ++// -- GODOT start -- ++#if defined (__WIN32__) && !defined (__MINGW32__) ++// -- GODOT end -- + int64_t xcr0 = 0; // int64_t is workaround for compiler bug under VS2013, Win32 + xcr0 = _xgetbv(0); + return xcr0; +diff --git a/thirdparty/embree/include/embree3/rtcore_common.h b/thirdparty/embree/include/embree3/rtcore_common.h +index 9c14b28745..4857e1e05e 100644 +--- a/thirdparty/embree/include/embree3/rtcore_common.h ++++ b/thirdparty/embree/include/embree3/rtcore_common.h +@@ -19,7 +19,9 @@ typedef int ssize_t; + #endif + #endif + +-#ifdef _WIN32 ++// -- GODOT start -- ++#if defined(_WIN32) && defined(_MSC_VER) ++// -- GODOT end -- + # define RTC_ALIGN(...) __declspec(align(__VA_ARGS__)) + #else + # define RTC_ALIGN(...) __attribute__((aligned(__VA_ARGS__))) +diff --git a/thirdparty/embree/common/tasking/taskschedulertbb.h b/thirdparty/embree/common/tasking/taskschedulertbb.h +index 3fd15816e9..35bd49849f 100644 +--- a/thirdparty/embree/common/tasking/taskschedulertbb.h ++++ b/thirdparty/embree/common/tasking/taskschedulertbb.h +@@ -12,7 +12,13 @@ + #include "../sys/ref.h" + + #if defined(__WIN32__) ++// -- GODOT start -- ++#if !defined(NOMINMAX) ++// -- GODOT end -- + # define NOMINMAX ++// -- GODOT start -- ++#endif ++// -- GODOT end -- + #endif + + // We need to define these to avoid implicit linkage against +
\ No newline at end of file diff --git a/thirdparty/embree-aarch64/patches/godot-changes.patch b/thirdparty/embree/patches/godot-changes-noexcept.patch index 86fbf226d2..c587a0e2be 100644 --- a/thirdparty/embree-aarch64/patches/godot-changes.patch +++ b/thirdparty/embree/patches/godot-changes-noexcept.patch @@ -1,27 +1,27 @@ -diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_for.h b/thirdparty/embree-aarch64/common/algorithms/parallel_for.h -index 76c6b740aa..51d296fb16 100644 ---- a/thirdparty/embree-aarch64/common/algorithms/parallel_for.h -+++ b/thirdparty/embree-aarch64/common/algorithms/parallel_for.h -@@ -27,7 +27,10 @@ namespace embree +diff --git a/thirdparty/embree/common/algorithms/parallel_for.h b/thirdparty/embree/common/algorithms/parallel_for.h +index f052d8b468..645681ac63 100644 +--- a/thirdparty/embree/common/algorithms/parallel_for.h ++++ b/thirdparty/embree/common/algorithms/parallel_for.h +@@ -21,7 +21,10 @@ namespace embree func(r.begin()); }); if (!TaskScheduler::wait()) - throw std::runtime_error("task cancelled"); + // -- GODOT start -- + // throw std::runtime_error("task cancelled"); -+ abort(); ++ abort(); + // -- GODOT end -- } - #elif defined(TASKING_GCD) && defined(BUILD_IOS) - -@@ -55,13 +58,19 @@ namespace embree + + #elif defined(TASKING_TBB) +@@ -31,13 +34,19 @@ namespace embree func(i); },context); if (context.is_group_execution_cancelled()) - throw std::runtime_error("task cancelled"); + // -- GODOT start -- + // throw std::runtime_error("task cancelled"); -+ abort(); ++ abort(); + // -- GODOT end -- #else tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { @@ -31,31 +31,31 @@ index 76c6b740aa..51d296fb16 100644 - throw std::runtime_error("task cancelled"); + // -- GODOT start -- + // throw std::runtime_error("task cancelled"); -+ abort(); ++ abort(); + // -- GODOT end -- #endif #elif defined(TASKING_PPL) -@@ -81,7 +90,10 @@ namespace embree +@@ -57,7 +66,10 @@ namespace embree #if defined(TASKING_INTERNAL) TaskScheduler::spawn(first,last,minStepSize,func); if (!TaskScheduler::wait()) - throw std::runtime_error("task cancelled"); + // -- GODOT start -- + // throw std::runtime_error("task cancelled"); -+ abort(); ++ abort(); + // -- GODOT end -- - #elif defined(TASKING_GCD) && defined(BUILD_IOS) - -@@ -109,13 +121,19 @@ namespace embree + #elif defined(TASKING_TBB) + #if TBB_INTERFACE_VERSION >= 12002 +@@ -66,13 +78,19 @@ namespace embree func(range<Index>(r.begin(),r.end())); },context); if (context.is_group_execution_cancelled()) - throw std::runtime_error("task cancelled"); + // -- GODOT start -- + // throw std::runtime_error("task cancelled"); -+ abort(); ++ abort(); + // -- GODOT end -- #else tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) { @@ -65,19 +65,19 @@ index 76c6b740aa..51d296fb16 100644 - throw std::runtime_error("task cancelled"); + // -- GODOT start -- + // throw std::runtime_error("task cancelled"); -+ abort(); ++ abort(); + // -- GODOT end -- #endif #elif defined(TASKING_PPL) -@@ -147,13 +165,19 @@ namespace embree +@@ -104,13 +122,19 @@ namespace embree func(i); },tbb::simple_partitioner(),context); if (context.is_group_execution_cancelled()) - throw std::runtime_error("task cancelled"); + // -- GODOT start -- + // throw std::runtime_error("task cancelled"); -+ abort(); ++ abort(); + // -- GODOT end -- #else tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { @@ -87,20 +87,20 @@ index 76c6b740aa..51d296fb16 100644 - throw std::runtime_error("task cancelled"); + // -- GODOT start -- + // throw std::runtime_error("task cancelled"); -+ abort(); ++ abort(); + // -- GODOT end -- #endif } -@@ -168,13 +192,19 @@ namespace embree +@@ -125,13 +149,19 @@ namespace embree func(i); },ap,context); if (context.is_group_execution_cancelled()) - throw std::runtime_error("task cancelled"); -+ // -- GODOT start -- -+ // throw std::runtime_error("task cancelled"); -+ abort(); -+ // -- GODOT end -- ++ // -- GODOT start -- ++ // throw std::runtime_error("task cancelled"); ++ abort(); ++ // -- GODOT end -- #else tbb::parallel_for(Index(0),N,Index(1),[&](Index i) { func(i); @@ -109,15 +109,15 @@ index 76c6b740aa..51d296fb16 100644 - throw std::runtime_error("task cancelled"); + // -- GODOT start -- + // throw std::runtime_error("task cancelled"); -+ abort(); ++ abort(); + // -- GODOT end -- #endif } -diff --git a/thirdparty/embree-aarch64/common/algorithms/parallel_reduce.h b/thirdparty/embree-aarch64/common/algorithms/parallel_reduce.h -index d444b6a2e4..0daf94e50e 100644 ---- a/thirdparty/embree-aarch64/common/algorithms/parallel_reduce.h -+++ b/thirdparty/embree-aarch64/common/algorithms/parallel_reduce.h +diff --git a/thirdparty/embree/common/algorithms/parallel_reduce.h b/thirdparty/embree/common/algorithms/parallel_reduce.h +index f42ae2ec50..8271372ea4 100644 +--- a/thirdparty/embree/common/algorithms/parallel_reduce.h ++++ b/thirdparty/embree/common/algorithms/parallel_reduce.h @@ -58,15 +58,19 @@ namespace embree const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity, [&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); }, @@ -142,10 +142,10 @@ index d444b6a2e4..0daf94e50e 100644 return v; #endif #else // TASKING_PPL -diff --git a/thirdparty/embree-aarch64/common/lexers/stringstream.cpp b/thirdparty/embree-aarch64/common/lexers/stringstream.cpp -index 7e7b9faef8..98dc80ad59 100644 ---- a/thirdparty/embree-aarch64/common/lexers/stringstream.cpp -+++ b/thirdparty/embree-aarch64/common/lexers/stringstream.cpp +diff --git a/thirdparty/embree/common/lexers/stringstream.cpp b/thirdparty/embree/common/lexers/stringstream.cpp +index 42ffb10176..a037869506 100644 +--- a/thirdparty/embree/common/lexers/stringstream.cpp ++++ b/thirdparty/embree/common/lexers/stringstream.cpp @@ -39,7 +39,10 @@ namespace embree std::vector<char> str; str.reserve(64); while (cin->peek() != EOF && !isSeparator(cin->peek())) { @@ -158,10 +158,10 @@ index 7e7b9faef8..98dc80ad59 100644 str.push_back((char)c); } str.push_back(0); -diff --git a/thirdparty/embree-aarch64/common/sys/alloc.cpp b/thirdparty/embree-aarch64/common/sys/alloc.cpp -index 4e8928242e..12f143f131 100644 ---- a/thirdparty/embree-aarch64/common/sys/alloc.cpp -+++ b/thirdparty/embree-aarch64/common/sys/alloc.cpp +diff --git a/thirdparty/embree/common/sys/alloc.cpp b/thirdparty/embree/common/sys/alloc.cpp +index 1bc30fe9a5..abdd269069 100644 +--- a/thirdparty/embree/common/sys/alloc.cpp ++++ b/thirdparty/embree/common/sys/alloc.cpp @@ -21,7 +21,10 @@ namespace embree void* ptr = _mm_malloc(size,align); @@ -169,7 +169,7 @@ index 4e8928242e..12f143f131 100644 - throw std::bad_alloc(); + // -- GODOT start -- + // throw std::bad_alloc(); -+ abort(); ++ abort(); + // -- GODOT end -- return ptr; @@ -246,11 +246,11 @@ index 4e8928242e..12f143f131 100644 } /* hint for transparent huge pages (THP) */ -diff --git a/thirdparty/embree-aarch64/common/sys/platform.h b/thirdparty/embree-aarch64/common/sys/platform.h -index 7914eb7a52..737f14aa6e 100644 ---- a/thirdparty/embree-aarch64/common/sys/platform.h -+++ b/thirdparty/embree-aarch64/common/sys/platform.h -@@ -174,11 +174,19 @@ +diff --git a/thirdparty/embree/common/sys/platform.h b/thirdparty/embree/common/sys/platform.h +index 8a6d9fa0a9..697e07bb86 100644 +--- a/thirdparty/embree/common/sys/platform.h ++++ b/thirdparty/embree/common/sys/platform.h +@@ -179,11 +179,19 @@ #define PRINT4(x,y,z,w) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl #if defined(DEBUG) // only report file and line in debug mode @@ -272,10 +272,10 @@ index 7914eb7a52..737f14aa6e 100644 #endif #define FATAL(x) THROW_RUNTIME_ERROR(x) -diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.cpp b/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.cpp -index 98d7fb9249..ebf656d1a0 100644 ---- a/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.cpp -+++ b/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.cpp +diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.cpp b/thirdparty/embree/common/tasking/taskschedulerinternal.cpp +index dca835a716..ad438588a3 100644 +--- a/thirdparty/embree/common/tasking/taskschedulerinternal.cpp ++++ b/thirdparty/embree/common/tasking/taskschedulerinternal.cpp @@ -48,13 +48,15 @@ namespace embree { Task* prevTask = thread.task; @@ -298,7 +298,7 @@ index 98d7fb9249..ebf656d1a0 100644 thread.task = prevTask; add_dependencies(-1); } -@@ -297,8 +299,11 @@ namespace embree +@@ -291,8 +293,11 @@ namespace embree size_t threadIndex = allocThreadIndex(); condition.wait(mutex, [&] () { return hasRootTask.load(); }); mutex.unlock(); @@ -312,7 +312,7 @@ index 98d7fb9249..ebf656d1a0 100644 } void TaskScheduler::reset() { -@@ -330,7 +335,10 @@ namespace embree +@@ -324,7 +329,10 @@ namespace embree return thread->scheduler->cancellingException == nullptr; } @@ -324,7 +324,7 @@ index 98d7fb9249..ebf656d1a0 100644 { /* allocate thread structure */ std::unique_ptr<Thread> mthread(new Thread(threadIndex,this)); // too large for stack allocation -@@ -353,9 +361,10 @@ namespace embree +@@ -347,9 +355,10 @@ namespace embree swapThread(oldThread); /* remember exception to throw */ @@ -338,22 +338,22 @@ index 98d7fb9249..ebf656d1a0 100644 /* wait for all threads to terminate */ threadCounter--; #if defined(__WIN32__) -@@ -373,7 +382,10 @@ namespace embree +@@ -367,7 +376,10 @@ namespace embree yield(); #endif } - return except; -+ // -- GODOT start -- -+ // return except; -+ return; -+ // -- GODOT end -- ++ // -- GODOT start -- ++ // return except; ++ return; ++ // -- GODOT end -- } bool TaskScheduler::steal_from_other_threads(Thread& thread) -diff --git a/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.h b/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.h -index c2a9391aea..8bd70b2b8c 100644 ---- a/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.h -+++ b/thirdparty/embree-aarch64/common/tasking/taskschedulerinternal.h +diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.h b/thirdparty/embree/common/tasking/taskschedulerinternal.h +index c766a0bb6a..8fa6bb12fa 100644 +--- a/thirdparty/embree/common/tasking/taskschedulerinternal.h ++++ b/thirdparty/embree/common/tasking/taskschedulerinternal.h @@ -123,7 +123,10 @@ namespace embree { size_t ofs = bytes + ((align - stackPtr) & (align-1)); @@ -371,14 +371,14 @@ index c2a9391aea..8bd70b2b8c 100644 { if (right >= TASK_STACK_SIZE) - throw std::runtime_error("task stack overflow"); -+ // -- GODOT start -- -+ // throw std::runtime_error("task stack overflow"); -+ abort(); -+ // -- GODOT end -- ++ // -- GODOT start -- ++ // throw std::runtime_error("task stack overflow"); ++ abort(); ++ // -- GODOT end -- /* allocate new task on right side of stack */ size_t oldStackPtr = stackPtr; -@@ -239,7 +245,10 @@ namespace embree +@@ -238,7 +244,10 @@ namespace embree void wait_for_threads(size_t threadCount); /*! thread loop for all worker threads */ @@ -390,10 +390,10 @@ index c2a9391aea..8bd70b2b8c 100644 /*! steals a task from a different thread */ bool steal_from_other_threads(Thread& thread); -diff --git a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp -index 20cdd2d320..aa56035026 100644 ---- a/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp -+++ b/thirdparty/embree-aarch64/kernels/bvh/bvh_statistics.cpp +diff --git a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp +index d8da78eed7..d857ff7d95 100644 +--- a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp ++++ b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp @@ -150,7 +150,10 @@ namespace embree } } @@ -406,11 +406,11 @@ index 20cdd2d320..aa56035026 100644 } return s; } -diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore.cpp b/thirdparty/embree-aarch64/kernels/common/rtcore.cpp -index ee5c37b238..625fbf6d4f 100644 ---- a/thirdparty/embree-aarch64/kernels/common/rtcore.cpp -+++ b/thirdparty/embree-aarch64/kernels/common/rtcore.cpp -@@ -230,7 +230,10 @@ RTC_NAMESPACE_BEGIN; +diff --git a/thirdparty/embree/kernels/common/rtcore.cpp b/thirdparty/embree/kernels/common/rtcore.cpp +index 74e9fb335c..94b3819e42 100644 +--- a/thirdparty/embree/kernels/common/rtcore.cpp ++++ b/thirdparty/embree/kernels/common/rtcore.cpp +@@ -197,7 +197,10 @@ RTC_NAMESPACE_BEGIN; if (quality != RTC_BUILD_QUALITY_LOW && quality != RTC_BUILD_QUALITY_MEDIUM && quality != RTC_BUILD_QUALITY_HIGH) @@ -422,7 +422,7 @@ index ee5c37b238..625fbf6d4f 100644 scene->setBuildQuality(quality); RTC_CATCH_END2(scene); } -@@ -1383,7 +1386,10 @@ RTC_NAMESPACE_BEGIN; +@@ -1350,7 +1353,10 @@ RTC_NAMESPACE_BEGIN; quality != RTC_BUILD_QUALITY_MEDIUM && quality != RTC_BUILD_QUALITY_HIGH && quality != RTC_BUILD_QUALITY_REFIT) @@ -434,10 +434,10 @@ index ee5c37b238..625fbf6d4f 100644 geometry->setBuildQuality(quality); RTC_CATCH_END2(geometry); } -diff --git a/thirdparty/embree-aarch64/kernels/common/rtcore.h b/thirdparty/embree-aarch64/kernels/common/rtcore.h -index 6583d12d57..4b070e122b 100644 ---- a/thirdparty/embree-aarch64/kernels/common/rtcore.h -+++ b/thirdparty/embree-aarch64/kernels/common/rtcore.h +diff --git a/thirdparty/embree/kernels/common/rtcore.h b/thirdparty/embree/kernels/common/rtcore.h +index 4e4b24e9c2..373e49a689 100644 +--- a/thirdparty/embree/kernels/common/rtcore.h ++++ b/thirdparty/embree/kernels/common/rtcore.h @@ -25,52 +25,58 @@ namespace embree #endif @@ -596,11 +596,11 @@ index 6583d12d57..4b070e122b 100644 #endif #define RTC_BUILD_ARGUMENTS_HAS(settings,member) \ -diff --git a/thirdparty/embree-aarch64/kernels/common/scene.cpp b/thirdparty/embree-aarch64/kernels/common/scene.cpp -index e75aa968f9..1e23aeb415 100644 ---- a/thirdparty/embree-aarch64/kernels/common/scene.cpp -+++ b/thirdparty/embree-aarch64/kernels/common/scene.cpp -@@ -800,16 +800,18 @@ namespace embree +diff --git a/thirdparty/embree/kernels/common/scene.cpp b/thirdparty/embree/kernels/common/scene.cpp +index 0149055f2c..408d7eae6f 100644 +--- a/thirdparty/embree/kernels/common/scene.cpp ++++ b/thirdparty/embree/kernels/common/scene.cpp +@@ -792,16 +792,18 @@ namespace embree } /* initiate build */ diff --git a/thirdparty/embree/patches/godot-changes-ubsan.patch b/thirdparty/embree/patches/godot-changes-ubsan.patch new file mode 100644 index 0000000000..1336246f0d --- /dev/null +++ b/thirdparty/embree/patches/godot-changes-ubsan.patch @@ -0,0 +1,24 @@ +diff --git a/thirdparty/embree/kernels/builders/primrefgen.cpp b/thirdparty/embree/kernels/builders/primrefgen.cpp +index bb4fc81dfe..d279dc4993 100644 +--- a/thirdparty/embree/kernels/builders/primrefgen.cpp ++++ b/thirdparty/embree/kernels/builders/primrefgen.cpp +@@ -184,6 +184,9 @@ namespace embree + + // special variants for grid meshes + ++// -- GODOT start -- ++#if defined(EMBREE_GEOMETRY_GRID) ++// -- GODOT end -- + PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids) + { + PrimInfo pinfo(empty); +@@ -293,6 +296,9 @@ namespace embree + + return pinfo; + } ++// -- GODOT start -- ++#endif ++// -- GODOT end -- + + // ==================================================================================================== + // ==================================================================================================== diff --git a/thirdparty/enet/godot.cpp b/thirdparty/enet/godot.cpp index 189de6cc1f..fd7968204b 100644 --- a/thirdparty/enet/godot.cpp +++ b/thirdparty/enet/godot.cpp @@ -45,10 +45,10 @@ /// Abstract ENet interface for UDP/DTLS. class ENetGodotSocket { public: - virtual Error bind(IP_Address p_ip, uint16_t p_port) = 0; - virtual Error get_socket_address(IP_Address *r_ip, uint16_t *r_port) = 0; - virtual Error sendto(const uint8_t *p_buffer, int p_len, int &r_sent, IP_Address p_ip, uint16_t p_port) = 0; - virtual Error recvfrom(uint8_t *p_buffer, int p_len, int &r_read, IP_Address &r_ip, uint16_t &r_port) = 0; + virtual Error bind(IPAddress p_ip, uint16_t p_port) = 0; + virtual Error get_socket_address(IPAddress *r_ip, uint16_t *r_port) = 0; + virtual Error sendto(const uint8_t *p_buffer, int p_len, int &r_sent, IPAddress p_ip, uint16_t p_port) = 0; + virtual Error recvfrom(uint8_t *p_buffer, int p_len, int &r_read, IPAddress &r_ip, uint16_t &r_port) = 0; virtual int set_option(ENetSocketOption p_option, int p_value) = 0; virtual void close() = 0; virtual void set_refuse_new_connections(bool p_enable) {} /* Only used by dtls server */ @@ -65,7 +65,7 @@ class ENetUDP : public ENetGodotSocket { private: Ref<NetSocket> sock; - IP_Address local_address; + IPAddress local_address; bool bound = false; public: @@ -79,21 +79,21 @@ public: sock->close(); } - Error bind(IP_Address p_ip, uint16_t p_port) { + Error bind(IPAddress p_ip, uint16_t p_port) { local_address = p_ip; bound = true; return sock->bind(p_ip, p_port); } - Error get_socket_address(IP_Address *r_ip, uint16_t *r_port) { + Error get_socket_address(IPAddress *r_ip, uint16_t *r_port) { return sock->get_socket_address(r_ip, r_port); } - Error sendto(const uint8_t *p_buffer, int p_len, int &r_sent, IP_Address p_ip, uint16_t p_port) { + Error sendto(const uint8_t *p_buffer, int p_len, int &r_sent, IPAddress p_ip, uint16_t p_port) { return sock->sendto(p_buffer, p_len, r_sent, p_ip, p_port); } - Error recvfrom(uint8_t *p_buffer, int p_len, int &r_read, IP_Address &r_ip, uint16_t &r_port) { + Error recvfrom(uint8_t *p_buffer, int p_len, int &r_read, IPAddress &r_ip, uint16_t &r_port) { Error err = sock->poll(NetSocket::POLL_TYPE_IN, 0); if (err != OK) { return err; @@ -157,7 +157,7 @@ class ENetDTLSClient : public ENetGodotSocket { bool verify = false; String for_hostname; Ref<X509Certificate> cert; - IP_Address local_address; + IPAddress local_address; public: ENetDTLSClient(ENetUDP *p_base, Ref<X509Certificate> p_cert, bool p_verify, String p_for_hostname) { @@ -178,12 +178,12 @@ public: close(); } - Error bind(IP_Address p_ip, uint16_t p_port) { + Error bind(IPAddress p_ip, uint16_t p_port) { local_address = p_ip; return udp->bind(p_port, p_ip); } - Error get_socket_address(IP_Address *r_ip, uint16_t *r_port) { + Error get_socket_address(IPAddress *r_ip, uint16_t *r_port) { if (!udp->is_bound()) { return ERR_UNCONFIGURED; } @@ -192,7 +192,7 @@ public: return OK; } - Error sendto(const uint8_t *p_buffer, int p_len, int &r_sent, IP_Address p_ip, uint16_t p_port) { + Error sendto(const uint8_t *p_buffer, int p_len, int &r_sent, IPAddress p_ip, uint16_t p_port) { if (!connected) { udp->connect_to_host(p_ip, p_port); dtls->connect_to_peer(udp, verify, for_hostname, cert); @@ -208,7 +208,7 @@ public: return dtls->put_packet(p_buffer, p_len); } - Error recvfrom(uint8_t *p_buffer, int p_len, int &r_read, IP_Address &r_ip, uint16_t &r_port) { + Error recvfrom(uint8_t *p_buffer, int p_len, int &r_read, IPAddress &r_ip, uint16_t &r_port) { dtls->poll(); if (dtls->get_status() == PacketPeerDTLS::STATUS_HANDSHAKING) { return ERR_BUSY; @@ -250,7 +250,7 @@ class ENetDTLSServer : public ENetGodotSocket { Ref<UDPServer> udp_server; Map<String, Ref<PacketPeerDTLS>> peers; int last_service = 0; - IP_Address local_address; + IPAddress local_address; public: ENetDTLSServer(ENetUDP *p_base, Ref<CryptoKey> p_key, Ref<X509Certificate> p_cert) { @@ -273,12 +273,12 @@ public: udp_server->set_max_pending_connections(p_refuse ? 0 : 16); } - Error bind(IP_Address p_ip, uint16_t p_port) { + Error bind(IPAddress p_ip, uint16_t p_port) { local_address = p_ip; return udp_server->listen(p_port, p_ip); } - Error get_socket_address(IP_Address *r_ip, uint16_t *r_port) { + Error get_socket_address(IPAddress *r_ip, uint16_t *r_port) { if (!udp_server->is_listening()) { return ERR_UNCONFIGURED; } @@ -287,7 +287,7 @@ public: return OK; } - Error sendto(const uint8_t *p_buffer, int p_len, int &r_sent, IP_Address p_ip, uint16_t p_port) { + Error sendto(const uint8_t *p_buffer, int p_len, int &r_sent, IPAddress p_ip, uint16_t p_port) { String key = String(p_ip) + ":" + itos(p_port); ERR_FAIL_COND_V(!peers.has(key), ERR_UNAVAILABLE); Ref<PacketPeerDTLS> peer = peers[key]; @@ -302,12 +302,12 @@ public: return err; } - Error recvfrom(uint8_t *p_buffer, int p_len, int &r_read, IP_Address &r_ip, uint16_t &r_port) { + Error recvfrom(uint8_t *p_buffer, int p_len, int &r_read, IPAddress &r_ip, uint16_t &r_port) { udp_server->poll(); // TODO limits? Maybe we can better enforce allowed connections! if (udp_server->is_connection_available()) { Ref<PacketPeerUDP> udp = udp_server->take_connection(); - IP_Address peer_ip = udp->get_packet_address(); + IPAddress peer_ip = udp->get_packet_address(); int peer_port = udp->get_packet_port(); Ref<PacketPeerDTLS> peer = server->take_connection(udp); PacketPeerDTLS::Status status = peer->get_status(); @@ -397,7 +397,7 @@ void enet_time_set(enet_uint32 newTimeBase) { } int enet_address_set_host(ENetAddress *address, const char *name) { - IP_Address ip = IP::get_singleton()->resolve_hostname(name); + IPAddress ip = IP::get_singleton()->resolve_hostname(name); ERR_FAIL_COND_V(!ip.is_valid(), -1); enet_address_set_ip(address, ip.get_ipv6(), 16); @@ -442,9 +442,9 @@ void enet_host_refuse_new_connections(ENetHost *host, int p_refuse) { } int enet_socket_bind(ENetSocket socket, const ENetAddress *address) { - IP_Address ip; + IPAddress ip; if (address->wildcard) { - ip = IP_Address("*"); + ip = IPAddress("*"); } else { ip.set_ipv6(address->host); } @@ -466,7 +466,7 @@ int enet_socket_send(ENetSocket socket, const ENetAddress *address, const ENetBu ERR_FAIL_COND_V(address == nullptr, -1); ENetGodotSocket *sock = (ENetGodotSocket *)socket; - IP_Address dest; + IPAddress dest; Error err; size_t i = 0; @@ -508,7 +508,7 @@ int enet_socket_receive(ENetSocket socket, ENetAddress *address, ENetBuffer *buf ENetGodotSocket *sock = (ENetGodotSocket *)socket; int read; - IP_Address ip; + IPAddress ip; Error err = sock->recvfrom((uint8_t *)buffers[0].data, buffers[0].dataLength, read, ip, address->port); if (err == ERR_BUSY) { @@ -525,7 +525,7 @@ int enet_socket_receive(ENetSocket socket, ENetAddress *address, ENetBuffer *buf } int enet_socket_get_address (ENetSocket socket, ENetAddress * address) { - IP_Address ip; + IPAddress ip; uint16_t port; ENetGodotSocket *sock = (ENetGodotSocket *)socket; diff --git a/thirdparty/fonts/OpenSans_SemiBold.ttf b/thirdparty/fonts/OpenSans_SemiBold.ttf Binary files differnew file mode 100644 index 0000000000..54e7059cf3 --- /dev/null +++ b/thirdparty/fonts/OpenSans_SemiBold.ttf diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h index fe8d349731..e44b99ce52 100644 --- a/thirdparty/meshoptimizer/meshoptimizer.h +++ b/thirdparty/meshoptimizer/meshoptimizer.h @@ -299,6 +299,11 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); /** + * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex) + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count); + +/** * Experimental: Mesh simplifier (sloppy) * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance * The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error. diff --git a/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch new file mode 100644 index 0000000000..cf648b0da3 --- /dev/null +++ b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch @@ -0,0 +1,262 @@ +diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h +index fe8d349731..e44b99ce52 100644 +--- a/thirdparty/meshoptimizer/meshoptimizer.h ++++ b/thirdparty/meshoptimizer/meshoptimizer.h +@@ -298,6 +298,11 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver + */ + MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); + ++/** ++ * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex) ++ */ ++MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count); ++ + /** + * Experimental: Mesh simplifier (sloppy) + * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance +diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp +index b2cb589462..059cabb055 100644 +--- a/thirdparty/meshoptimizer/simplifier.cpp ++++ b/thirdparty/meshoptimizer/simplifier.cpp +@@ -20,6 +20,8 @@ + #define TRACESTATS(i) (void)0 + #endif + ++#define ATTRIBUTES 8 ++ + // This work is based on: + // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 + // Michael Garland. Quadric-based polygonal surface simplification. 1999 +@@ -358,6 +360,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned + struct Vector3 + { + float x, y, z; ++ ++#if ATTRIBUTES ++ float a[ATTRIBUTES]; ++#endif + }; + + static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +@@ -414,6 +420,13 @@ struct Quadric + float a10, a20, a21; + float b0, b1, b2, c; + float w; ++ ++#if ATTRIBUTES ++ float gx[ATTRIBUTES]; ++ float gy[ATTRIBUTES]; ++ float gz[ATTRIBUTES]; ++ float gw[ATTRIBUTES]; ++#endif + }; + + struct Collapse +@@ -456,6 +469,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R) + Q.b2 += R.b2; + Q.c += R.c; + Q.w += R.w; ++ ++#if ATTRIBUTES ++ for (int k = 0; k < ATTRIBUTES; ++k) ++ { ++ Q.gx[k] += R.gx[k]; ++ Q.gy[k] += R.gy[k]; ++ Q.gz[k] += R.gz[k]; ++ Q.gw[k] += R.gw[k]; ++ } ++#endif + } + + static float quadricError(const Quadric& Q, const Vector3& v) +@@ -481,6 +504,17 @@ static float quadricError(const Quadric& Q, const Vector3& v) + r += ry * v.y; + r += rz * v.z; + ++#if ATTRIBUTES ++ // see quadricUpdateAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr ++ for (int k = 0; k < ATTRIBUTES; ++k) ++ { ++ float a = v.a[k]; ++ ++ r += a * a * Q.w; ++ r -= 2 * a * (v.x * Q.gx[k] + v.y * Q.gy[k] + v.z * Q.gz[k] + Q.gw[k]); ++ } ++#endif ++ + float s = Q.w == 0.f ? 0.f : 1.f / Q.w; + + return fabsf(r) * s; +@@ -504,6 +538,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo + Q.b2 = c * dw; + Q.c = d * dw; + Q.w = w; ++ ++#if ATTRIBUTES ++ memset(Q.gx, 0, sizeof(Q.gx)); ++ memset(Q.gy, 0, sizeof(Q.gy)); ++ memset(Q.gz, 0, sizeof(Q.gz)); ++ memset(Q.gw, 0, sizeof(Q.gw)); ++#endif + } + + static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w) +@@ -556,6 +597,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3 + quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight); + } + ++#if ATTRIBUTES ++static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float w) ++{ ++ // for each attribute we want to encode the following function into the quadric: ++ // (eval(pos) - attr)^2 ++ // where eval(pos) interpolates attribute across the triangle like so: ++ // eval(pos) = pos.x * gx + pos.y * gy + pos.z * gz + gw ++ // where gx/gy/gz/gw are gradients ++ Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; ++ Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; ++ ++ // we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows: ++ // v = (d11 * d20 - d01 * d21) / denom ++ // w = (d00 * d21 - d01 * d20) / denom ++ // u = 1 - v - w ++ // here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj) ++ const Vector3& v0 = p10; ++ const Vector3& v1 = p20; ++ float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z; ++ float d01 = v0.x * v1.x + v0.y * v1.y + v0.z * v1.z; ++ float d11 = v1.x * v1.x + v1.y * v1.y + v1.z * v1.z; ++ float denom = d00 * d11 - d01 * d01; ++ float denomr = denom == 0 ? 0.f : 1.f / denom; ++ ++ // precompute gradient factors ++ // these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out common factors that are shared between attributes ++ float gx1 = (d11 * v0.x - d01 * v1.x) * denomr; ++ float gx2 = (d00 * v1.x - d01 * v0.x) * denomr; ++ float gy1 = (d11 * v0.y - d01 * v1.y) * denomr; ++ float gy2 = (d00 * v1.y - d01 * v0.y) * denomr; ++ float gz1 = (d11 * v0.z - d01 * v1.z) * denomr; ++ float gz2 = (d00 * v1.z - d01 * v0.z) * denomr; ++ ++ for (int k = 0; k < ATTRIBUTES; ++k) ++ { ++ float a0 = p0.a[k], a1 = p1.a[k], a2 = p2.a[k]; ++ ++ // compute gradient of eval(pos) for x/y/z/w ++ // the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w ++ float gx = gx1 * (a1 - a0) + gx2 * (a2 - a0); ++ float gy = gy1 * (a1 - a0) + gy2 * (a2 - a0); ++ float gz = gz1 * (a1 - a0) + gz2 * (a2 - a0); ++ float gw = a0 - p0.x * gx - p0.y * gy - p0.z * gz; ++ ++ // quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K ++ // since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields ++ Q.a00 += w * (gx * gx); ++ Q.a11 += w * (gy * gy); ++ Q.a22 += w * (gz * gz); ++ ++ Q.a10 += w * (gy * gx); ++ Q.a20 += w * (gz * gx); ++ Q.a21 += w * (gz * gy); ++ ++ Q.b0 += w * (gx * gw); ++ Q.b1 += w * (gy * gw); ++ Q.b2 += w * (gz * gw); ++ ++ Q.c += w * (gw * gw); ++ ++ // the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError ++ Q.gx[k] = w * gx; ++ Q.gy[k] = w * gy; ++ Q.gz[k] = w * gz; ++ Q.gw[k] = w * gw; ++ ++#if TRACE > 2 ++ printf("attr%d: %e %e %e\n", ++ k, ++ (gx * p0.x + gy * p0.y + gz * p0.z + gw - a0), ++ (gx * p1.x + gy * p1.y + gz * p1.z + gw - a1), ++ (gx * p2.x + gy * p2.y + gz * p2.z + gw - a2) ++ ); ++#endif ++ } ++} ++#endif ++ + static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) + { + for (size_t i = 0; i < index_count; i += 3) +@@ -567,6 +686,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic + Quadric Q; + quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f); + ++#if ATTRIBUTES ++ quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w); ++#endif + quadricAdd(vertex_quadrics[remap[i0]], Q); + quadricAdd(vertex_quadrics[remap[i1]], Q); + quadricAdd(vertex_quadrics[remap[i2]], Q); +@@ -1259,13 +1381,19 @@ unsigned int* meshopt_simplifyDebugLoopBack = 0; + #endif + + size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) ++{ ++ return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, out_result_error, 0, 0, 0); ++} ++ ++size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count) + { + using namespace meshopt; + + assert(index_count % 3 == 0); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); ++ assert(vertex_stride > 0 && vertex_stride <= 256); ++ assert(vertex_stride % sizeof(float) == 0); + assert(target_index_count <= index_count); ++ assert(attribute_count <= ATTRIBUTES); + + meshopt_Allocator allocator; + +@@ -1279,7 +1407,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, + // build position remap that maps each vertex to the one with identical position + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count); +- buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator); ++ buildPositionRemap(remap, wedge, vertex_data, vertex_count, vertex_stride, allocator); + + // classify vertices; vertex kind determines collapse rules, see kCanCollapse + unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count); +@@ -1303,7 +1431,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, + #endif + + Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); +- rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); ++ rescalePositions(vertex_positions, vertex_data, vertex_count, vertex_stride); ++ ++#if ATTRIBUTES ++ for (size_t i = 0; i < vertex_count; ++i) ++ { ++ memset(vertex_positions[i].a, 0, sizeof(vertex_positions[i].a)); ++ ++ for (size_t k = 0; k < attribute_count; ++k) ++ { ++ float a = attributes[i * attribute_count + k]; ++ ++ vertex_positions[i].a[k] = a * attribute_weights[k]; ++ } ++ } ++#endif + + Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count); + memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); +@@ -1395,7 +1537,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, + + // result_error is quadratic; we need to remap it back to linear + if (out_result_error) ++ { + *out_result_error = sqrtf(result_error); ++ } + + return result_count; + } diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp index b2cb589462..0f10ebef4b 100644 --- a/thirdparty/meshoptimizer/simplifier.cpp +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -20,6 +20,8 @@ #define TRACESTATS(i) (void)0 #endif +#define ATTRIBUTES 8 + // This work is based on: // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 // Michael Garland. Quadric-based polygonal surface simplification. 1999 @@ -118,8 +120,13 @@ struct PositionHasher { const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + index * vertex_stride_float); + // scramble bits to make sure that integer coordinates have entropy in lower bits + unsigned int x = key[0] ^ (key[0] >> 17); + unsigned int y = key[1] ^ (key[1] >> 17); + unsigned int z = key[2] ^ (key[2] >> 17); + // Optimized Spatial Hashing for Collision Detection of Deformable Objects - return (key[0] * 73856093) ^ (key[1] * 19349663) ^ (key[2] * 83492791); + return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791); } bool equal(unsigned int lhs, unsigned int rhs) const @@ -358,6 +365,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned struct Vector3 { float x, y, z; + +#if ATTRIBUTES + float a[ATTRIBUTES]; +#endif }; static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) @@ -414,6 +425,13 @@ struct Quadric float a10, a20, a21; float b0, b1, b2, c; float w; + +#if ATTRIBUTES + float gx[ATTRIBUTES]; + float gy[ATTRIBUTES]; + float gz[ATTRIBUTES]; + float gw[ATTRIBUTES]; +#endif }; struct Collapse @@ -456,6 +474,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R) Q.b2 += R.b2; Q.c += R.c; Q.w += R.w; + +#if ATTRIBUTES + for (int k = 0; k < ATTRIBUTES; ++k) + { + Q.gx[k] += R.gx[k]; + Q.gy[k] += R.gy[k]; + Q.gz[k] += R.gz[k]; + Q.gw[k] += R.gw[k]; + } +#endif } static float quadricError(const Quadric& Q, const Vector3& v) @@ -481,6 +509,17 @@ static float quadricError(const Quadric& Q, const Vector3& v) r += ry * v.y; r += rz * v.z; +#if ATTRIBUTES + // see quadricUpdateAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr + for (int k = 0; k < ATTRIBUTES; ++k) + { + float a = v.a[k]; + + r += a * a * Q.w; + r -= 2 * a * (v.x * Q.gx[k] + v.y * Q.gy[k] + v.z * Q.gz[k] + Q.gw[k]); + } +#endif + float s = Q.w == 0.f ? 0.f : 1.f / Q.w; return fabsf(r) * s; @@ -504,6 +543,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo Q.b2 = c * dw; Q.c = d * dw; Q.w = w; + +#if ATTRIBUTES + memset(Q.gx, 0, sizeof(Q.gx)); + memset(Q.gy, 0, sizeof(Q.gy)); + memset(Q.gz, 0, sizeof(Q.gz)); + memset(Q.gw, 0, sizeof(Q.gw)); +#endif } static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w) @@ -556,6 +602,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3 quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight); } +#if ATTRIBUTES +static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float w) +{ + // for each attribute we want to encode the following function into the quadric: + // (eval(pos) - attr)^2 + // where eval(pos) interpolates attribute across the triangle like so: + // eval(pos) = pos.x * gx + pos.y * gy + pos.z * gz + gw + // where gx/gy/gz/gw are gradients + Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; + Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; + + // we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows: + // v = (d11 * d20 - d01 * d21) / denom + // w = (d00 * d21 - d01 * d20) / denom + // u = 1 - v - w + // here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj) + const Vector3& v0 = p10; + const Vector3& v1 = p20; + float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z; + float d01 = v0.x * v1.x + v0.y * v1.y + v0.z * v1.z; + float d11 = v1.x * v1.x + v1.y * v1.y + v1.z * v1.z; + float denom = d00 * d11 - d01 * d01; + float denomr = denom == 0 ? 0.f : 1.f / denom; + + // precompute gradient factors + // these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out common factors that are shared between attributes + float gx1 = (d11 * v0.x - d01 * v1.x) * denomr; + float gx2 = (d00 * v1.x - d01 * v0.x) * denomr; + float gy1 = (d11 * v0.y - d01 * v1.y) * denomr; + float gy2 = (d00 * v1.y - d01 * v0.y) * denomr; + float gz1 = (d11 * v0.z - d01 * v1.z) * denomr; + float gz2 = (d00 * v1.z - d01 * v0.z) * denomr; + + for (int k = 0; k < ATTRIBUTES; ++k) + { + float a0 = p0.a[k], a1 = p1.a[k], a2 = p2.a[k]; + + // compute gradient of eval(pos) for x/y/z/w + // the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w + float gx = gx1 * (a1 - a0) + gx2 * (a2 - a0); + float gy = gy1 * (a1 - a0) + gy2 * (a2 - a0); + float gz = gz1 * (a1 - a0) + gz2 * (a2 - a0); + float gw = a0 - p0.x * gx - p0.y * gy - p0.z * gz; + + // quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K + // since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields + Q.a00 += w * (gx * gx); + Q.a11 += w * (gy * gy); + Q.a22 += w * (gz * gz); + + Q.a10 += w * (gy * gx); + Q.a20 += w * (gz * gx); + Q.a21 += w * (gz * gy); + + Q.b0 += w * (gx * gw); + Q.b1 += w * (gy * gw); + Q.b2 += w * (gz * gw); + + Q.c += w * (gw * gw); + + // the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError + Q.gx[k] = w * gx; + Q.gy[k] = w * gy; + Q.gz[k] = w * gz; + Q.gw[k] = w * gw; + +#if TRACE > 2 + printf("attr%d: %e %e %e\n", + k, + (gx * p0.x + gy * p0.y + gz * p0.z + gw - a0), + (gx * p1.x + gy * p1.y + gz * p1.z + gw - a1), + (gx * p2.x + gy * p2.y + gz * p2.z + gw - a2) + ); +#endif + } +} +#endif + static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) { for (size_t i = 0; i < index_count; i += 3) @@ -567,6 +691,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic Quadric Q; quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f); +#if ATTRIBUTES + quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w); +#endif quadricAdd(vertex_quadrics[remap[i0]], Q); quadricAdd(vertex_quadrics[remap[i1]], Q); quadricAdd(vertex_quadrics[remap[i2]], Q); @@ -1038,7 +1165,7 @@ struct IdHasher struct TriangleHasher { - unsigned int* indices; + const unsigned int* indices; size_t hash(unsigned int i) const { @@ -1253,19 +1380,26 @@ static float interpolate(float y, float x0, float y0, float x1, float y1, float } // namespace meshopt #ifndef NDEBUG -unsigned char* meshopt_simplifyDebugKind = 0; -unsigned int* meshopt_simplifyDebugLoop = 0; -unsigned int* meshopt_simplifyDebugLoopBack = 0; +// Note: this is only exposed for debug visualization purposes; do *not* use these in debug builds +MESHOPTIMIZER_API unsigned char* meshopt_simplifyDebugKind = 0; +MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = 0; +MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0; #endif size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) { + return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, out_result_error, 0, 0, 0); +} + +size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count) +{ using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); - assert(vertex_positions_stride % sizeof(float) == 0); + assert(vertex_stride > 0 && vertex_stride <= 256); + assert(vertex_stride % sizeof(float) == 0); assert(target_index_count <= index_count); + assert(attribute_count <= ATTRIBUTES); meshopt_Allocator allocator; @@ -1279,7 +1413,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, // build position remap that maps each vertex to the one with identical position unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count); - buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator); + buildPositionRemap(remap, wedge, vertex_data, vertex_count, vertex_stride, allocator); // classify vertices; vertex kind determines collapse rules, see kCanCollapse unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count); @@ -1303,7 +1437,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, #endif Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); - rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); + rescalePositions(vertex_positions, vertex_data, vertex_count, vertex_stride); + +#if ATTRIBUTES + for (size_t i = 0; i < vertex_count; ++i) + { + memset(vertex_positions[i].a, 0, sizeof(vertex_positions[i].a)); + + for (size_t k = 0; k < attribute_count; ++k) + { + float a = attributes[i * attribute_count + k]; + + vertex_positions[i].a[k] = a * attribute_weights[k]; + } + } +#endif Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count); memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); @@ -1395,7 +1543,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, // result_error is quadratic; we need to remap it back to linear if (out_result_error) + { *out_result_error = sqrtf(result_error); + } return result_count; } diff --git a/thirdparty/misc/patches/polypartition-godot-types.patch b/thirdparty/misc/patches/polypartition-godot-types.patch index 59fdb2707c..782f02e8dc 100644 --- a/thirdparty/misc/patches/polypartition-godot-types.patch +++ b/thirdparty/misc/patches/polypartition-godot-types.patch @@ -1,5 +1,5 @@ diff --git a/thirdparty/misc/polypartition.cpp b/thirdparty/misc/polypartition.cpp -index 3a8a6efa8319..4f1b6dcb21d8 100644 +index 3a8a6efa83..5e94793b79 100644 --- a/thirdparty/misc/polypartition.cpp +++ b/thirdparty/misc/polypartition.cpp @@ -23,10 +23,7 @@ @@ -510,7 +510,7 @@ index 3a8a6efa8319..4f1b6dcb21d8 100644 - return 0; - } - numvertices += iter->GetNumPoints(); -+ for (iter = inpolys->front(); iter; iter++) { ++ for (iter = inpolys->front(); iter; iter = iter->next()) { + numvertices += iter->get().GetNumPoints(); } @@ -521,7 +521,7 @@ index 3a8a6efa8319..4f1b6dcb21d8 100644 polystartindex = 0; - for (iter = inpolys->begin(); iter != inpolys->end(); iter++) { - poly = &(*iter); -+ for (iter = inpolys->front(); iter; iter++) { ++ for (iter = inpolys->front(); iter; iter = iter->next()) { + poly = &(iter->get()); polyendindex = polystartindex + poly->GetNumPoints() - 1; for (i = 0; i < poly->GetNumPoints(); i++) { @@ -569,7 +569,7 @@ index 3a8a6efa8319..4f1b6dcb21d8 100644 newedge.p2 = v->p; edgeIter = edgeTree.lower_bound(newedge); - if (edgeIter == edgeTree.begin()) { -+ if (edgeIter == edgeTree.front()) { ++ if (edgeIter == nullptr || edgeIter == edgeTree.front()) { error = true; break; } @@ -606,7 +606,7 @@ index 3a8a6efa8319..4f1b6dcb21d8 100644 newedge.p2 = v->p; edgeIter = edgeTree.lower_bound(newedge); - if (edgeIter == edgeTree.begin()) { -+ if (edgeIter == edgeTree.front()) { ++ if (edgeIter == nullptr || edgeIter == edgeTree.front()) { error = true; break; } @@ -648,7 +648,7 @@ index 3a8a6efa8319..4f1b6dcb21d8 100644 newedge.p2 = v->p; edgeIter = edgeTree.lower_bound(newedge); - if (edgeIter == edgeTree.begin()) { -+ if (edgeIter == edgeTree.front()) { ++ if (edgeIter == nullptr || edgeIter == edgeTree.front()) { error = true; break; } @@ -716,7 +716,7 @@ index 3a8a6efa8319..4f1b6dcb21d8 100644 } } diff --git a/thirdparty/misc/polypartition.h b/thirdparty/misc/polypartition.h -index f163f5d2173f..b2d905a3ef76 100644 +index f163f5d217..b2d905a3ef 100644 --- a/thirdparty/misc/polypartition.h +++ b/thirdparty/misc/polypartition.h @@ -24,8 +24,9 @@ diff --git a/thirdparty/misc/polypartition.cpp b/thirdparty/misc/polypartition.cpp index 4f1b6dcb21..5e94793b79 100644 --- a/thirdparty/misc/polypartition.cpp +++ b/thirdparty/misc/polypartition.cpp @@ -1289,7 +1289,7 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto bool error = false; numvertices = 0; - for (iter = inpolys->front(); iter; iter++) { + for (iter = inpolys->front(); iter; iter = iter->next()) { numvertices += iter->get().GetNumPoints(); } @@ -1298,7 +1298,7 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto newnumvertices = numvertices; polystartindex = 0; - for (iter = inpolys->front(); iter; iter++) { + for (iter = inpolys->front(); iter; iter = iter->next()) { poly = &(iter->get()); polyendindex = polystartindex + poly->GetNumPoints() - 1; for (i = 0; i < poly->GetNumPoints(); i++) { @@ -1408,7 +1408,7 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto newedge.p1 = v->p; newedge.p2 = v->p; edgeIter = edgeTree.lower_bound(newedge); - if (edgeIter == edgeTree.front()) { + if (edgeIter == nullptr || edgeIter == edgeTree.front()) { error = true; break; } @@ -1449,7 +1449,7 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto newedge.p1 = v->p; newedge.p2 = v->p; edgeIter = edgeTree.lower_bound(newedge); - if (edgeIter == edgeTree.front()) { + if (edgeIter == nullptr || edgeIter == edgeTree.front()) { error = true; break; } @@ -1494,7 +1494,7 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto newedge.p1 = v->p; newedge.p2 = v->p; edgeIter = edgeTree.lower_bound(newedge); - if (edgeIter == edgeTree.front()) { + if (edgeIter == nullptr || edgeIter == edgeTree.front()) { error = true; break; } diff --git a/thirdparty/misc/smolv.cpp b/thirdparty/misc/smolv.cpp new file mode 100644 index 0000000000..26ed7294f9 --- /dev/null +++ b/thirdparty/misc/smolv.cpp @@ -0,0 +1,2108 @@ +// smol-v - public domain - https://github.com/aras-p/smol-v +// authored 2016-2020 by Aras Pranckevicius +// no warranty implied; use at your own risk +// See end of file for license information. + +#include "smolv.h" +#include <stdint.h> +#include <vector> +#include <algorithm> +#include <cstdio> +#include <cstring> + +#if !defined(_MSC_VER) && __cplusplus < 201103L +#define static_assert(x,y) +#endif + +#define _SMOLV_ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) + +// -------------------------------------------------------------------------------------------- +// Metadata about known SPIR-V operations + +enum SpvOp +{ + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpVectorShuffleCompact = 13, // not in SPIR-V, added for SMOL-V! + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, + SpvOpImageSparseRead = 320, + SpvOpSizeOf = 321, + SpvOpTypePipeStorage = 322, + SpvOpConstantPipeStorage = 323, + SpvOpCreatePipeFromPipeStorage = 324, + SpvOpGetKernelLocalSizeForSubgroupCount = 325, + SpvOpGetKernelMaxNumSubgroups = 326, + SpvOpTypeNamedBarrier = 327, + SpvOpNamedBarrierInitialize = 328, + SpvOpMemoryNamedBarrier = 329, + SpvOpModuleProcessed = 330, + SpvOpExecutionModeId = 331, + SpvOpDecorateId = 332, + SpvOpGroupNonUniformElect = 333, + SpvOpGroupNonUniformAll = 334, + SpvOpGroupNonUniformAny = 335, + SpvOpGroupNonUniformAllEqual = 336, + SpvOpGroupNonUniformBroadcast = 337, + SpvOpGroupNonUniformBroadcastFirst = 338, + SpvOpGroupNonUniformBallot = 339, + SpvOpGroupNonUniformInverseBallot = 340, + SpvOpGroupNonUniformBallotBitExtract = 341, + SpvOpGroupNonUniformBallotBitCount = 342, + SpvOpGroupNonUniformBallotFindLSB = 343, + SpvOpGroupNonUniformBallotFindMSB = 344, + SpvOpGroupNonUniformShuffle = 345, + SpvOpGroupNonUniformShuffleXor = 346, + SpvOpGroupNonUniformShuffleUp = 347, + SpvOpGroupNonUniformShuffleDown = 348, + SpvOpGroupNonUniformIAdd = 349, + SpvOpGroupNonUniformFAdd = 350, + SpvOpGroupNonUniformIMul = 351, + SpvOpGroupNonUniformFMul = 352, + SpvOpGroupNonUniformSMin = 353, + SpvOpGroupNonUniformUMin = 354, + SpvOpGroupNonUniformFMin = 355, + SpvOpGroupNonUniformSMax = 356, + SpvOpGroupNonUniformUMax = 357, + SpvOpGroupNonUniformFMax = 358, + SpvOpGroupNonUniformBitwiseAnd = 359, + SpvOpGroupNonUniformBitwiseOr = 360, + SpvOpGroupNonUniformBitwiseXor = 361, + SpvOpGroupNonUniformLogicalAnd = 362, + SpvOpGroupNonUniformLogicalOr = 363, + SpvOpGroupNonUniformLogicalXor = 364, + SpvOpGroupNonUniformQuadBroadcast = 365, + SpvOpGroupNonUniformQuadSwap = 366, +}; +static const int kKnownOpsCount = SpvOpGroupNonUniformQuadSwap+1; + + +static const char* kSpirvOpNames[] = +{ + "Nop", + "Undef", + "SourceContinued", + "Source", + "SourceExtension", + "Name", + "MemberName", + "String", + "Line", + "#9", + "Extension", + "ExtInstImport", + "ExtInst", + "VectorShuffleCompact", + "MemoryModel", + "EntryPoint", + "ExecutionMode", + "Capability", + "#18", + "TypeVoid", + "TypeBool", + "TypeInt", + "TypeFloat", + "TypeVector", + "TypeMatrix", + "TypeImage", + "TypeSampler", + "TypeSampledImage", + "TypeArray", + "TypeRuntimeArray", + "TypeStruct", + "TypeOpaque", + "TypePointer", + "TypeFunction", + "TypeEvent", + "TypeDeviceEvent", + "TypeReserveId", + "TypeQueue", + "TypePipe", + "TypeForwardPointer", + "#40", + "ConstantTrue", + "ConstantFalse", + "Constant", + "ConstantComposite", + "ConstantSampler", + "ConstantNull", + "#47", + "SpecConstantTrue", + "SpecConstantFalse", + "SpecConstant", + "SpecConstantComposite", + "SpecConstantOp", + "#53", + "Function", + "FunctionParameter", + "FunctionEnd", + "FunctionCall", + "#58", + "Variable", + "ImageTexelPointer", + "Load", + "Store", + "CopyMemory", + "CopyMemorySized", + "AccessChain", + "InBoundsAccessChain", + "PtrAccessChain", + "ArrayLength", + "GenericPtrMemSemantics", + "InBoundsPtrAccessChain", + "Decorate", + "MemberDecorate", + "DecorationGroup", + "GroupDecorate", + "GroupMemberDecorate", + "#76", + "VectorExtractDynamic", + "VectorInsertDynamic", + "VectorShuffle", + "CompositeConstruct", + "CompositeExtract", + "CompositeInsert", + "CopyObject", + "Transpose", + "#85", + "SampledImage", + "ImageSampleImplicitLod", + "ImageSampleExplicitLod", + "ImageSampleDrefImplicitLod", + "ImageSampleDrefExplicitLod", + "ImageSampleProjImplicitLod", + "ImageSampleProjExplicitLod", + "ImageSampleProjDrefImplicitLod", + "ImageSampleProjDrefExplicitLod", + "ImageFetch", + "ImageGather", + "ImageDrefGather", + "ImageRead", + "ImageWrite", + "Image", + "ImageQueryFormat", + "ImageQueryOrder", + "ImageQuerySizeLod", + "ImageQuerySize", + "ImageQueryLod", + "ImageQueryLevels", + "ImageQuerySamples", + "#108", + "ConvertFToU", + "ConvertFToS", + "ConvertSToF", + "ConvertUToF", + "UConvert", + "SConvert", + "FConvert", + "QuantizeToF16", + "ConvertPtrToU", + "SatConvertSToU", + "SatConvertUToS", + "ConvertUToPtr", + "PtrCastToGeneric", + "GenericCastToPtr", + "GenericCastToPtrExplicit", + "Bitcast", + "#125", + "SNegate", + "FNegate", + "IAdd", + "FAdd", + "ISub", + "FSub", + "IMul", + "FMul", + "UDiv", + "SDiv", + "FDiv", + "UMod", + "SRem", + "SMod", + "FRem", + "FMod", + "VectorTimesScalar", + "MatrixTimesScalar", + "VectorTimesMatrix", + "MatrixTimesVector", + "MatrixTimesMatrix", + "OuterProduct", + "Dot", + "IAddCarry", + "ISubBorrow", + "UMulExtended", + "SMulExtended", + "#153", + "Any", + "All", + "IsNan", + "IsInf", + "IsFinite", + "IsNormal", + "SignBitSet", + "LessOrGreater", + "Ordered", + "Unordered", + "LogicalEqual", + "LogicalNotEqual", + "LogicalOr", + "LogicalAnd", + "LogicalNot", + "Select", + "IEqual", + "INotEqual", + "UGreaterThan", + "SGreaterThan", + "UGreaterThanEqual", + "SGreaterThanEqual", + "ULessThan", + "SLessThan", + "ULessThanEqual", + "SLessThanEqual", + "FOrdEqual", + "FUnordEqual", + "FOrdNotEqual", + "FUnordNotEqual", + "FOrdLessThan", + "FUnordLessThan", + "FOrdGreaterThan", + "FUnordGreaterThan", + "FOrdLessThanEqual", + "FUnordLessThanEqual", + "FOrdGreaterThanEqual", + "FUnordGreaterThanEqual", + "#192", + "#193", + "ShiftRightLogical", + "ShiftRightArithmetic", + "ShiftLeftLogical", + "BitwiseOr", + "BitwiseXor", + "BitwiseAnd", + "Not", + "BitFieldInsert", + "BitFieldSExtract", + "BitFieldUExtract", + "BitReverse", + "BitCount", + "#206", + "DPdx", + "DPdy", + "Fwidth", + "DPdxFine", + "DPdyFine", + "FwidthFine", + "DPdxCoarse", + "DPdyCoarse", + "FwidthCoarse", + "#216", + "#217", + "EmitVertex", + "EndPrimitive", + "EmitStreamVertex", + "EndStreamPrimitive", + "#222", + "#223", + "ControlBarrier", + "MemoryBarrier", + "#226", + "AtomicLoad", + "AtomicStore", + "AtomicExchange", + "AtomicCompareExchange", + "AtomicCompareExchangeWeak", + "AtomicIIncrement", + "AtomicIDecrement", + "AtomicIAdd", + "AtomicISub", + "AtomicSMin", + "AtomicUMin", + "AtomicSMax", + "AtomicUMax", + "AtomicAnd", + "AtomicOr", + "AtomicXor", + "#243", + "#244", + "Phi", + "LoopMerge", + "SelectionMerge", + "Label", + "Branch", + "BranchConditional", + "Switch", + "Kill", + "Return", + "ReturnValue", + "Unreachable", + "LifetimeStart", + "LifetimeStop", + "#258", + "GroupAsyncCopy", + "GroupWaitEvents", + "GroupAll", + "GroupAny", + "GroupBroadcast", + "GroupIAdd", + "GroupFAdd", + "GroupFMin", + "GroupUMin", + "GroupSMin", + "GroupFMax", + "GroupUMax", + "GroupSMax", + "#272", + "#273", + "ReadPipe", + "WritePipe", + "ReservedReadPipe", + "ReservedWritePipe", + "ReserveReadPipePackets", + "ReserveWritePipePackets", + "CommitReadPipe", + "CommitWritePipe", + "IsValidReserveId", + "GetNumPipePackets", + "GetMaxPipePackets", + "GroupReserveReadPipePackets", + "GroupReserveWritePipePackets", + "GroupCommitReadPipe", + "GroupCommitWritePipe", + "#289", + "#290", + "EnqueueMarker", + "EnqueueKernel", + "GetKernelNDrangeSubGroupCount", + "GetKernelNDrangeMaxSubGroupSize", + "GetKernelWorkGroupSize", + "GetKernelPreferredWorkGroupSizeMultiple", + "RetainEvent", + "ReleaseEvent", + "CreateUserEvent", + "IsValidEvent", + "SetUserEventStatus", + "CaptureEventProfilingInfo", + "GetDefaultQueue", + "BuildNDRange", + "ImageSparseSampleImplicitLod", + "ImageSparseSampleExplicitLod", + "ImageSparseSampleDrefImplicitLod", + "ImageSparseSampleDrefExplicitLod", + "ImageSparseSampleProjImplicitLod", + "ImageSparseSampleProjExplicitLod", + "ImageSparseSampleProjDrefImplicitLod", + "ImageSparseSampleProjDrefExplicitLod", + "ImageSparseFetch", + "ImageSparseGather", + "ImageSparseDrefGather", + "ImageSparseTexelsResident", + "NoLine", + "AtomicFlagTestAndSet", + "AtomicFlagClear", + "ImageSparseRead", + "SizeOf", + "TypePipeStorage", + "ConstantPipeStorage", + "CreatePipeFromPipeStorage", + "GetKernelLocalSizeForSubgroupCount", + "GetKernelMaxNumSubgroups", + "TypeNamedBarrier", + "NamedBarrierInitialize", + "MemoryNamedBarrier", + "ModuleProcessed", + "ExecutionModeId", + "DecorateId", + "GroupNonUniformElect", + "GroupNonUniformAll", + "GroupNonUniformAny", + "GroupNonUniformAllEqual", + "GroupNonUniformBroadcast", + "GroupNonUniformBroadcastFirst", + "GroupNonUniformBallot", + "GroupNonUniformInverseBallot", + "GroupNonUniformBallotBitExtract", + "GroupNonUniformBallotBitCount", + "GroupNonUniformBallotFindLSB", + "GroupNonUniformBallotFindMSB", + "GroupNonUniformShuffle", + "GroupNonUniformShuffleXor", + "GroupNonUniformShuffleUp", + "GroupNonUniformShuffleDown", + "GroupNonUniformIAdd", + "GroupNonUniformFAdd", + "GroupNonUniformIMul", + "GroupNonUniformFMul", + "GroupNonUniformSMin", + "GroupNonUniformUMin", + "GroupNonUniformFMin", + "GroupNonUniformSMax", + "GroupNonUniformUMax", + "GroupNonUniformFMax", + "GroupNonUniformBitwiseAnd", + "GroupNonUniformBitwiseOr", + "GroupNonUniformBitwiseXor", + "GroupNonUniformLogicalAnd", + "GroupNonUniformLogicalOr", + "GroupNonUniformLogicalXor", + "GroupNonUniformQuadBroadcast", + "GroupNonUniformQuadSwap", +}; +static_assert(_SMOLV_ARRAY_SIZE(kSpirvOpNames) == kKnownOpsCount, "kSpirvOpNames table mismatch with known SpvOps"); + + +struct OpData +{ + uint8_t hasResult; // does it have result ID? + uint8_t hasType; // does it have type ID? + uint8_t deltaFromResult; // How many words after (optional) type+result to write out as deltas from result? + uint8_t varrest; // should the rest of words be written in varint encoding? +}; +static const OpData kSpirvOpData[] = +{ + {0, 0, 0, 0}, // Nop + {1, 1, 0, 0}, // Undef + {0, 0, 0, 0}, // SourceContinued + {0, 0, 0, 1}, // Source + {0, 0, 0, 0}, // SourceExtension + {0, 0, 0, 0}, // Name + {0, 0, 0, 0}, // MemberName + {0, 0, 0, 0}, // String + {0, 0, 0, 1}, // Line + {1, 1, 0, 0}, // #9 + {0, 0, 0, 0}, // Extension + {1, 0, 0, 0}, // ExtInstImport + {1, 1, 0, 1}, // ExtInst + {1, 1, 2, 1}, // VectorShuffleCompact - new in SMOLV + {0, 0, 0, 1}, // MemoryModel + {0, 0, 0, 1}, // EntryPoint + {0, 0, 0, 1}, // ExecutionMode + {0, 0, 0, 1}, // Capability + {1, 1, 0, 0}, // #18 + {1, 0, 0, 1}, // TypeVoid + {1, 0, 0, 1}, // TypeBool + {1, 0, 0, 1}, // TypeInt + {1, 0, 0, 1}, // TypeFloat + {1, 0, 0, 1}, // TypeVector + {1, 0, 0, 1}, // TypeMatrix + {1, 0, 0, 1}, // TypeImage + {1, 0, 0, 1}, // TypeSampler + {1, 0, 0, 1}, // TypeSampledImage + {1, 0, 0, 1}, // TypeArray + {1, 0, 0, 1}, // TypeRuntimeArray + {1, 0, 0, 1}, // TypeStruct + {1, 0, 0, 1}, // TypeOpaque + {1, 0, 0, 1}, // TypePointer + {1, 0, 0, 1}, // TypeFunction + {1, 0, 0, 1}, // TypeEvent + {1, 0, 0, 1}, // TypeDeviceEvent + {1, 0, 0, 1}, // TypeReserveId + {1, 0, 0, 1}, // TypeQueue + {1, 0, 0, 1}, // TypePipe + {0, 0, 0, 1}, // TypeForwardPointer + {1, 1, 0, 0}, // #40 + {1, 1, 0, 0}, // ConstantTrue + {1, 1, 0, 0}, // ConstantFalse + {1, 1, 0, 0}, // Constant + {1, 1, 9, 0}, // ConstantComposite + {1, 1, 0, 1}, // ConstantSampler + {1, 1, 0, 0}, // ConstantNull + {1, 1, 0, 0}, // #47 + {1, 1, 0, 0}, // SpecConstantTrue + {1, 1, 0, 0}, // SpecConstantFalse + {1, 1, 0, 0}, // SpecConstant + {1, 1, 9, 0}, // SpecConstantComposite + {1, 1, 0, 0}, // SpecConstantOp + {1, 1, 0, 0}, // #53 + {1, 1, 0, 1}, // Function + {1, 1, 0, 0}, // FunctionParameter + {0, 0, 0, 0}, // FunctionEnd + {1, 1, 9, 0}, // FunctionCall + {1, 1, 0, 0}, // #58 + {1, 1, 0, 1}, // Variable + {1, 1, 0, 0}, // ImageTexelPointer + {1, 1, 1, 1}, // Load + {0, 0, 2, 1}, // Store + {0, 0, 0, 0}, // CopyMemory + {0, 0, 0, 0}, // CopyMemorySized + {1, 1, 0, 1}, // AccessChain + {1, 1, 0, 0}, // InBoundsAccessChain + {1, 1, 0, 0}, // PtrAccessChain + {1, 1, 0, 0}, // ArrayLength + {1, 1, 0, 0}, // GenericPtrMemSemantics + {1, 1, 0, 0}, // InBoundsPtrAccessChain + {0, 0, 0, 1}, // Decorate + {0, 0, 0, 1}, // MemberDecorate + {1, 0, 0, 0}, // DecorationGroup + {0, 0, 0, 0}, // GroupDecorate + {0, 0, 0, 0}, // GroupMemberDecorate + {1, 1, 0, 0}, // #76 + {1, 1, 1, 1}, // VectorExtractDynamic + {1, 1, 2, 1}, // VectorInsertDynamic + {1, 1, 2, 1}, // VectorShuffle + {1, 1, 9, 0}, // CompositeConstruct + {1, 1, 1, 1}, // CompositeExtract + {1, 1, 2, 1}, // CompositeInsert + {1, 1, 1, 0}, // CopyObject + {1, 1, 0, 0}, // Transpose + {1, 1, 0, 0}, // #85 + {1, 1, 0, 0}, // SampledImage + {1, 1, 2, 1}, // ImageSampleImplicitLod + {1, 1, 2, 1}, // ImageSampleExplicitLod + {1, 1, 3, 1}, // ImageSampleDrefImplicitLod + {1, 1, 3, 1}, // ImageSampleDrefExplicitLod + {1, 1, 2, 1}, // ImageSampleProjImplicitLod + {1, 1, 2, 1}, // ImageSampleProjExplicitLod + {1, 1, 3, 1}, // ImageSampleProjDrefImplicitLod + {1, 1, 3, 1}, // ImageSampleProjDrefExplicitLod + {1, 1, 2, 1}, // ImageFetch + {1, 1, 3, 1}, // ImageGather + {1, 1, 3, 1}, // ImageDrefGather + {1, 1, 2, 1}, // ImageRead + {0, 0, 3, 1}, // ImageWrite + {1, 1, 1, 0}, // Image + {1, 1, 1, 0}, // ImageQueryFormat + {1, 1, 1, 0}, // ImageQueryOrder + {1, 1, 2, 0}, // ImageQuerySizeLod + {1, 1, 1, 0}, // ImageQuerySize + {1, 1, 2, 0}, // ImageQueryLod + {1, 1, 1, 0}, // ImageQueryLevels + {1, 1, 1, 0}, // ImageQuerySamples + {1, 1, 0, 0}, // #108 + {1, 1, 1, 0}, // ConvertFToU + {1, 1, 1, 0}, // ConvertFToS + {1, 1, 1, 0}, // ConvertSToF + {1, 1, 1, 0}, // ConvertUToF + {1, 1, 1, 0}, // UConvert + {1, 1, 1, 0}, // SConvert + {1, 1, 1, 0}, // FConvert + {1, 1, 1, 0}, // QuantizeToF16 + {1, 1, 1, 0}, // ConvertPtrToU + {1, 1, 1, 0}, // SatConvertSToU + {1, 1, 1, 0}, // SatConvertUToS + {1, 1, 1, 0}, // ConvertUToPtr + {1, 1, 1, 0}, // PtrCastToGeneric + {1, 1, 1, 0}, // GenericCastToPtr + {1, 1, 1, 1}, // GenericCastToPtrExplicit + {1, 1, 1, 0}, // Bitcast + {1, 1, 0, 0}, // #125 + {1, 1, 1, 0}, // SNegate + {1, 1, 1, 0}, // FNegate + {1, 1, 2, 0}, // IAdd + {1, 1, 2, 0}, // FAdd + {1, 1, 2, 0}, // ISub + {1, 1, 2, 0}, // FSub + {1, 1, 2, 0}, // IMul + {1, 1, 2, 0}, // FMul + {1, 1, 2, 0}, // UDiv + {1, 1, 2, 0}, // SDiv + {1, 1, 2, 0}, // FDiv + {1, 1, 2, 0}, // UMod + {1, 1, 2, 0}, // SRem + {1, 1, 2, 0}, // SMod + {1, 1, 2, 0}, // FRem + {1, 1, 2, 0}, // FMod + {1, 1, 2, 0}, // VectorTimesScalar + {1, 1, 2, 0}, // MatrixTimesScalar + {1, 1, 2, 0}, // VectorTimesMatrix + {1, 1, 2, 0}, // MatrixTimesVector + {1, 1, 2, 0}, // MatrixTimesMatrix + {1, 1, 2, 0}, // OuterProduct + {1, 1, 2, 0}, // Dot + {1, 1, 2, 0}, // IAddCarry + {1, 1, 2, 0}, // ISubBorrow + {1, 1, 2, 0}, // UMulExtended + {1, 1, 2, 0}, // SMulExtended + {1, 1, 0, 0}, // #153 + {1, 1, 1, 0}, // Any + {1, 1, 1, 0}, // All + {1, 1, 1, 0}, // IsNan + {1, 1, 1, 0}, // IsInf + {1, 1, 1, 0}, // IsFinite + {1, 1, 1, 0}, // IsNormal + {1, 1, 1, 0}, // SignBitSet + {1, 1, 2, 0}, // LessOrGreater + {1, 1, 2, 0}, // Ordered + {1, 1, 2, 0}, // Unordered + {1, 1, 2, 0}, // LogicalEqual + {1, 1, 2, 0}, // LogicalNotEqual + {1, 1, 2, 0}, // LogicalOr + {1, 1, 2, 0}, // LogicalAnd + {1, 1, 1, 0}, // LogicalNot + {1, 1, 3, 0}, // Select + {1, 1, 2, 0}, // IEqual + {1, 1, 2, 0}, // INotEqual + {1, 1, 2, 0}, // UGreaterThan + {1, 1, 2, 0}, // SGreaterThan + {1, 1, 2, 0}, // UGreaterThanEqual + {1, 1, 2, 0}, // SGreaterThanEqual + {1, 1, 2, 0}, // ULessThan + {1, 1, 2, 0}, // SLessThan + {1, 1, 2, 0}, // ULessThanEqual + {1, 1, 2, 0}, // SLessThanEqual + {1, 1, 2, 0}, // FOrdEqual + {1, 1, 2, 0}, // FUnordEqual + {1, 1, 2, 0}, // FOrdNotEqual + {1, 1, 2, 0}, // FUnordNotEqual + {1, 1, 2, 0}, // FOrdLessThan + {1, 1, 2, 0}, // FUnordLessThan + {1, 1, 2, 0}, // FOrdGreaterThan + {1, 1, 2, 0}, // FUnordGreaterThan + {1, 1, 2, 0}, // FOrdLessThanEqual + {1, 1, 2, 0}, // FUnordLessThanEqual + {1, 1, 2, 0}, // FOrdGreaterThanEqual + {1, 1, 2, 0}, // FUnordGreaterThanEqual + {1, 1, 0, 0}, // #192 + {1, 1, 0, 0}, // #193 + {1, 1, 2, 0}, // ShiftRightLogical + {1, 1, 2, 0}, // ShiftRightArithmetic + {1, 1, 2, 0}, // ShiftLeftLogical + {1, 1, 2, 0}, // BitwiseOr + {1, 1, 2, 0}, // BitwiseXor + {1, 1, 2, 0}, // BitwiseAnd + {1, 1, 1, 0}, // Not + {1, 1, 4, 0}, // BitFieldInsert + {1, 1, 3, 0}, // BitFieldSExtract + {1, 1, 3, 0}, // BitFieldUExtract + {1, 1, 1, 0}, // BitReverse + {1, 1, 1, 0}, // BitCount + {1, 1, 0, 0}, // #206 + {1, 1, 0, 0}, // DPdx + {1, 1, 0, 0}, // DPdy + {1, 1, 0, 0}, // Fwidth + {1, 1, 0, 0}, // DPdxFine + {1, 1, 0, 0}, // DPdyFine + {1, 1, 0, 0}, // FwidthFine + {1, 1, 0, 0}, // DPdxCoarse + {1, 1, 0, 0}, // DPdyCoarse + {1, 1, 0, 0}, // FwidthCoarse + {1, 1, 0, 0}, // #216 + {1, 1, 0, 0}, // #217 + {0, 0, 0, 0}, // EmitVertex + {0, 0, 0, 0}, // EndPrimitive + {0, 0, 0, 0}, // EmitStreamVertex + {0, 0, 0, 0}, // EndStreamPrimitive + {1, 1, 0, 0}, // #222 + {1, 1, 0, 0}, // #223 + {0, 0, 3, 0}, // ControlBarrier + {0, 0, 2, 0}, // MemoryBarrier + {1, 1, 0, 0}, // #226 + {1, 1, 0, 0}, // AtomicLoad + {0, 0, 0, 0}, // AtomicStore + {1, 1, 0, 0}, // AtomicExchange + {1, 1, 0, 0}, // AtomicCompareExchange + {1, 1, 0, 0}, // AtomicCompareExchangeWeak + {1, 1, 0, 0}, // AtomicIIncrement + {1, 1, 0, 0}, // AtomicIDecrement + {1, 1, 0, 0}, // AtomicIAdd + {1, 1, 0, 0}, // AtomicISub + {1, 1, 0, 0}, // AtomicSMin + {1, 1, 0, 0}, // AtomicUMin + {1, 1, 0, 0}, // AtomicSMax + {1, 1, 0, 0}, // AtomicUMax + {1, 1, 0, 0}, // AtomicAnd + {1, 1, 0, 0}, // AtomicOr + {1, 1, 0, 0}, // AtomicXor + {1, 1, 0, 0}, // #243 + {1, 1, 0, 0}, // #244 + {1, 1, 0, 0}, // Phi + {0, 0, 2, 1}, // LoopMerge + {0, 0, 1, 1}, // SelectionMerge + {1, 0, 0, 0}, // Label + {0, 0, 1, 0}, // Branch + {0, 0, 3, 1}, // BranchConditional + {0, 0, 0, 0}, // Switch + {0, 0, 0, 0}, // Kill + {0, 0, 0, 0}, // Return + {0, 0, 0, 0}, // ReturnValue + {0, 0, 0, 0}, // Unreachable + {0, 0, 0, 0}, // LifetimeStart + {0, 0, 0, 0}, // LifetimeStop + {1, 1, 0, 0}, // #258 + {1, 1, 0, 0}, // GroupAsyncCopy + {0, 0, 0, 0}, // GroupWaitEvents + {1, 1, 0, 0}, // GroupAll + {1, 1, 0, 0}, // GroupAny + {1, 1, 0, 0}, // GroupBroadcast + {1, 1, 0, 0}, // GroupIAdd + {1, 1, 0, 0}, // GroupFAdd + {1, 1, 0, 0}, // GroupFMin + {1, 1, 0, 0}, // GroupUMin + {1, 1, 0, 0}, // GroupSMin + {1, 1, 0, 0}, // GroupFMax + {1, 1, 0, 0}, // GroupUMax + {1, 1, 0, 0}, // GroupSMax + {1, 1, 0, 0}, // #272 + {1, 1, 0, 0}, // #273 + {1, 1, 0, 0}, // ReadPipe + {1, 1, 0, 0}, // WritePipe + {1, 1, 0, 0}, // ReservedReadPipe + {1, 1, 0, 0}, // ReservedWritePipe + {1, 1, 0, 0}, // ReserveReadPipePackets + {1, 1, 0, 0}, // ReserveWritePipePackets + {0, 0, 0, 0}, // CommitReadPipe + {0, 0, 0, 0}, // CommitWritePipe + {1, 1, 0, 0}, // IsValidReserveId + {1, 1, 0, 0}, // GetNumPipePackets + {1, 1, 0, 0}, // GetMaxPipePackets + {1, 1, 0, 0}, // GroupReserveReadPipePackets + {1, 1, 0, 0}, // GroupReserveWritePipePackets + {0, 0, 0, 0}, // GroupCommitReadPipe + {0, 0, 0, 0}, // GroupCommitWritePipe + {1, 1, 0, 0}, // #289 + {1, 1, 0, 0}, // #290 + {1, 1, 0, 0}, // EnqueueMarker + {1, 1, 0, 0}, // EnqueueKernel + {1, 1, 0, 0}, // GetKernelNDrangeSubGroupCount + {1, 1, 0, 0}, // GetKernelNDrangeMaxSubGroupSize + {1, 1, 0, 0}, // GetKernelWorkGroupSize + {1, 1, 0, 0}, // GetKernelPreferredWorkGroupSizeMultiple + {0, 0, 0, 0}, // RetainEvent + {0, 0, 0, 0}, // ReleaseEvent + {1, 1, 0, 0}, // CreateUserEvent + {1, 1, 0, 0}, // IsValidEvent + {0, 0, 0, 0}, // SetUserEventStatus + {0, 0, 0, 0}, // CaptureEventProfilingInfo + {1, 1, 0, 0}, // GetDefaultQueue + {1, 1, 0, 0}, // BuildNDRange + {1, 1, 2, 1}, // ImageSparseSampleImplicitLod + {1, 1, 2, 1}, // ImageSparseSampleExplicitLod + {1, 1, 3, 1}, // ImageSparseSampleDrefImplicitLod + {1, 1, 3, 1}, // ImageSparseSampleDrefExplicitLod + {1, 1, 2, 1}, // ImageSparseSampleProjImplicitLod + {1, 1, 2, 1}, // ImageSparseSampleProjExplicitLod + {1, 1, 3, 1}, // ImageSparseSampleProjDrefImplicitLod + {1, 1, 3, 1}, // ImageSparseSampleProjDrefExplicitLod + {1, 1, 2, 1}, // ImageSparseFetch + {1, 1, 3, 1}, // ImageSparseGather + {1, 1, 3, 1}, // ImageSparseDrefGather + {1, 1, 1, 0}, // ImageSparseTexelsResident + {0, 0, 0, 0}, // NoLine + {1, 1, 0, 0}, // AtomicFlagTestAndSet + {0, 0, 0, 0}, // AtomicFlagClear + {1, 1, 0, 0}, // ImageSparseRead + {1, 1, 0, 0}, // SizeOf + {1, 1, 0, 0}, // TypePipeStorage + {1, 1, 0, 0}, // ConstantPipeStorage + {1, 1, 0, 0}, // CreatePipeFromPipeStorage + {1, 1, 0, 0}, // GetKernelLocalSizeForSubgroupCount + {1, 1, 0, 0}, // GetKernelMaxNumSubgroups + {1, 1, 0, 0}, // TypeNamedBarrier + {1, 1, 0, 1}, // NamedBarrierInitialize + {0, 0, 2, 1}, // MemoryNamedBarrier + {1, 1, 0, 0}, // ModuleProcessed + {0, 0, 0, 1}, // ExecutionModeId + {0, 0, 0, 1}, // DecorateId + {1, 1, 1, 1}, // GroupNonUniformElect + {1, 1, 1, 1}, // GroupNonUniformAll + {1, 1, 1, 1}, // GroupNonUniformAny + {1, 1, 1, 1}, // GroupNonUniformAllEqual + {1, 1, 1, 1}, // GroupNonUniformBroadcast + {1, 1, 1, 1}, // GroupNonUniformBroadcastFirst + {1, 1, 1, 1}, // GroupNonUniformBallot + {1, 1, 1, 1}, // GroupNonUniformInverseBallot + {1, 1, 1, 1}, // GroupNonUniformBallotBitExtract + {1, 1, 1, 1}, // GroupNonUniformBallotBitCount + {1, 1, 1, 1}, // GroupNonUniformBallotFindLSB + {1, 1, 1, 1}, // GroupNonUniformBallotFindMSB + {1, 1, 1, 1}, // GroupNonUniformShuffle + {1, 1, 1, 1}, // GroupNonUniformShuffleXor + {1, 1, 1, 1}, // GroupNonUniformShuffleUp + {1, 1, 1, 1}, // GroupNonUniformShuffleDown + {1, 1, 1, 1}, // GroupNonUniformIAdd + {1, 1, 1, 1}, // GroupNonUniformFAdd + {1, 1, 1, 1}, // GroupNonUniformIMul + {1, 1, 1, 1}, // GroupNonUniformFMul + {1, 1, 1, 1}, // GroupNonUniformSMin + {1, 1, 1, 1}, // GroupNonUniformUMin + {1, 1, 1, 1}, // GroupNonUniformFMin + {1, 1, 1, 1}, // GroupNonUniformSMax + {1, 1, 1, 1}, // GroupNonUniformUMax + {1, 1, 1, 1}, // GroupNonUniformFMax + {1, 1, 1, 1}, // GroupNonUniformBitwiseAnd + {1, 1, 1, 1}, // GroupNonUniformBitwiseOr + {1, 1, 1, 1}, // GroupNonUniformBitwiseXor + {1, 1, 1, 1}, // GroupNonUniformLogicalAnd + {1, 1, 1, 1}, // GroupNonUniformLogicalOr + {1, 1, 1, 1}, // GroupNonUniformLogicalXor + {1, 1, 1, 1}, // GroupNonUniformQuadBroadcast + {1, 1, 1, 1}, // GroupNonUniformQuadSwap +}; +static_assert(_SMOLV_ARRAY_SIZE(kSpirvOpData) == kKnownOpsCount, "kSpirvOpData table mismatch with known SpvOps"); + +// Instruction encoding depends on the table that describes the various SPIR-V opcodes. +// Whenever we change or expand the table, we need to bump up the SMOL-V version, and make +// sure that we can still decode files encoded by an older version. +static int smolv_GetKnownOpsCount(int version) +{ + if (version == 0) + return SpvOpModuleProcessed+1; + if (version == 1) // 2020 February, version 1 added ExecutionModeId..GroupNonUniformQuadSwap + return SpvOpGroupNonUniformQuadSwap+1; + return 0; +} + +static bool smolv_OpHasResult(SpvOp op, int opsCount) +{ + if (op < 0 || op >= opsCount) + return false; + return kSpirvOpData[op].hasResult != 0; +} + +static bool smolv_OpHasType(SpvOp op, int opsCount) +{ + if (op < 0 || op >= opsCount) + return false; + return kSpirvOpData[op].hasType != 0; +} + +static int smolv_OpDeltaFromResult(SpvOp op, int opsCount) +{ + if (op < 0 || op >= opsCount) + return 0; + return kSpirvOpData[op].deltaFromResult; +} + +static bool smolv_OpVarRest(SpvOp op, int opsCount) +{ + if (op < 0 || op >= opsCount) + return false; + return kSpirvOpData[op].varrest != 0; +} + +static bool smolv_OpDebugInfo(SpvOp op, int opsCount) +{ + return + op == SpvOpSourceContinued || + op == SpvOpSource || + op == SpvOpSourceExtension || + op == SpvOpName || + op == SpvOpMemberName || + op == SpvOpString || + op == SpvOpLine || + op == SpvOpNoLine || + op == SpvOpModuleProcessed; +} + + +static int smolv_DecorationExtraOps(int dec) +{ + if (dec == 0 || (dec >= 2 && dec <= 5)) // RelaxedPrecision, Block..ColMajor + return 0; + if (dec >= 29 && dec <= 37) // Stream..XfbStride + return 1; + return -1; // unknown, encode length +} + + +// -------------------------------------------------------------------------------------------- + + +static bool smolv_CheckGenericHeader(const uint32_t* words, size_t wordCount, uint32_t expectedMagic, uint32_t versionMask) +{ + if (!words) + return false; + if (wordCount < 5) + return false; + + uint32_t headerMagic = words[0]; + if (headerMagic != expectedMagic) + return false; + uint32_t headerVersion = words[1] & versionMask; + if (headerVersion < 0x00010000 || headerVersion > 0x00010500) + return false; // only support 1.0 through 1.5 + + return true; +} + +static const int kSpirVHeaderMagic = 0x07230203; +static const int kSmolHeaderMagic = 0x534D4F4C; // "SMOL" + +static const int kSmolCurrEncodingVersion = 1; + +static bool smolv_CheckSpirVHeader(const uint32_t* words, size_t wordCount) +{ + //@TODO: if SPIR-V header magic was reversed, that means the file got written + // in a "big endian" order. Need to byteswap all words then. + return smolv_CheckGenericHeader(words, wordCount, kSpirVHeaderMagic, 0xFFFFFFFF); +} +static bool smolv_CheckSmolHeader(const uint8_t* bytes, size_t byteCount) +{ + if (!smolv_CheckGenericHeader((const uint32_t*)bytes, byteCount/4, kSmolHeaderMagic, 0x00FFFFFF)) + return false; + if (byteCount < 24) // one more word past header to store decoded length + return false; + // SMOL-V version + int smolVersion = ((const uint32_t*)bytes)[1] >> 24; + if (smolVersion < 0 || smolVersion > kSmolCurrEncodingVersion) + return false; + return true; +} + + +static void smolv_Write4(smolv::ByteArray& arr, uint32_t v) +{ + arr.push_back(v & 0xFF); + arr.push_back((v >> 8) & 0xFF); + arr.push_back((v >> 16) & 0xFF); + arr.push_back(v >> 24); +} + +static void smolv_Write4(uint8_t*& buf, uint32_t v) +{ + memcpy(buf, &v, 4); + buf += 4; +} + + +static bool smolv_Read4(const uint8_t*& data, const uint8_t* dataEnd, uint32_t& outv) +{ + if (data + 4 > dataEnd) + return false; + outv = (data[0]) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24); + data += 4; + return true; +} + + +// -------------------------------------------------------------------------------------------- + +// Variable-length integer encoding for unsigned integers. In each byte: +// - highest bit set if more bytes follow, cleared if this is last byte. +// - other 7 bits are the actual value payload. +// Takes 1-5 bytes to encode an integer (values between 0 and 127 take one byte, etc.). + +static void smolv_WriteVarint(smolv::ByteArray& arr, uint32_t v) +{ + while (v > 127) + { + arr.push_back((v & 127) | 128); + v >>= 7; + } + arr.push_back(v & 127); +} + +static bool smolv_ReadVarint(const uint8_t*& data, const uint8_t* dataEnd, uint32_t& outVal) +{ + uint32_t v = 0; + uint32_t shift = 0; + while (data < dataEnd) + { + uint8_t b = *data; + v |= (b & 127) << shift; + shift += 7; + data++; + if (!(b & 128)) + break; + } + outVal = v; + return true; //@TODO: report failures +} + +static uint32_t smolv_ZigEncode(int32_t i) +{ + return (uint32_t(i) << 1) ^ (i >> 31); +} + +static int32_t smolv_ZigDecode(uint32_t u) +{ + return (u & 1) ? ((u >> 1) ^ ~0) : (u >> 1); +} + + +// Remap most common Op codes (Load, Store, Decorate, VectorShuffle etc.) to be in < 16 range, for +// more compact varint encoding. This basically swaps rarely used op values that are < 16 with the +// ones that are common. + +static SpvOp smolv_RemapOp(SpvOp op) +{ +# define _SMOLV_SWAP_OP(op1,op2) if (op==op1) return op2; if (op==op2) return op1 + _SMOLV_SWAP_OP(SpvOpDecorate,SpvOpNop); // 0: 24% + _SMOLV_SWAP_OP(SpvOpLoad,SpvOpUndef); // 1: 17% + _SMOLV_SWAP_OP(SpvOpStore,SpvOpSourceContinued); // 2: 9% + _SMOLV_SWAP_OP(SpvOpAccessChain,SpvOpSource); // 3: 7.2% + _SMOLV_SWAP_OP(SpvOpVectorShuffle,SpvOpSourceExtension); // 4: 5.0% + // Name - already small enum value - 5: 4.4% + // MemberName - already small enum value - 6: 2.9% + _SMOLV_SWAP_OP(SpvOpMemberDecorate,SpvOpString); // 7: 4.0% + _SMOLV_SWAP_OP(SpvOpLabel,SpvOpLine); // 8: 0.9% + _SMOLV_SWAP_OP(SpvOpVariable,(SpvOp)9); // 9: 3.9% + _SMOLV_SWAP_OP(SpvOpFMul,SpvOpExtension); // 10: 3.9% + _SMOLV_SWAP_OP(SpvOpFAdd,SpvOpExtInstImport); // 11: 2.5% + // ExtInst - already small enum value - 12: 1.2% + // VectorShuffleCompact - already small enum value - used for compact shuffle encoding + _SMOLV_SWAP_OP(SpvOpTypePointer,SpvOpMemoryModel); // 14: 2.2% + _SMOLV_SWAP_OP(SpvOpFNegate,SpvOpEntryPoint); // 15: 1.1% +# undef _SMOLV_SWAP_OP + return op; +} + + +// For most compact varint encoding of common instructions, the instruction length should come out +// into 3 bits (be <8). SPIR-V instruction lengths are always at least 1, and for some other +// instructions they are guaranteed to be some other minimum length. Adjust the length before encoding, +// and after decoding accordingly. + +static uint32_t smolv_EncodeLen(SpvOp op, uint32_t len) +{ + len--; + if (op == SpvOpVectorShuffle) len -= 4; + if (op == SpvOpVectorShuffleCompact) len -= 4; + if (op == SpvOpDecorate) len -= 2; + if (op == SpvOpLoad) len -= 3; + if (op == SpvOpAccessChain) len -= 3; + return len; +} + +static uint32_t smolv_DecodeLen(SpvOp op, uint32_t len) +{ + len++; + if (op == SpvOpVectorShuffle) len += 4; + if (op == SpvOpVectorShuffleCompact) len += 4; + if (op == SpvOpDecorate) len += 2; + if (op == SpvOpLoad) len += 3; + if (op == SpvOpAccessChain) len += 3; + return len; +} + + +// Shuffling bits of length + opcode to be more compact in varint encoding in typical cases: +// 0x LLLL OOOO is how SPIR-V encodes it (L=length, O=op), we shuffle into: +// 0x LLLO OOLO, so that common case (op<16, len<8) is encoded into one byte. + +static bool smolv_WriteLengthOp(smolv::ByteArray& arr, uint32_t len, SpvOp op) +{ + len = smolv_EncodeLen(op, len); + // SPIR-V length field is 16 bits; if we get a larger value that means something + // was wrong, e.g. a vector shuffle instruction with less than 4 words (and our + // adjustment to common lengths in smolv_EncodeLen wrapped around) + if (len > 0xFFFF) + return false; + op = smolv_RemapOp(op); + uint32_t oplen = ((len >> 4) << 20) | ((op >> 4) << 8) | ((len & 0xF) << 4) | (op & 0xF); + smolv_WriteVarint(arr, oplen); + return true; +} + +static bool smolv_ReadLengthOp(const uint8_t*& data, const uint8_t* dataEnd, uint32_t& outLen, SpvOp& outOp) +{ + uint32_t val; + if (!smolv_ReadVarint(data, dataEnd, val)) + return false; + outLen = ((val >> 20) << 4) | ((val >> 4) & 0xF); + outOp = (SpvOp)(((val >> 4) & 0xFFF0) | (val & 0xF)); + + outOp = smolv_RemapOp(outOp); + outLen = smolv_DecodeLen(outOp, outLen); + return true; +} + + + +#define _SMOLV_READ_OP(len, words, op) \ + uint32_t len = words[0] >> 16; \ + if (len < 1) return false; /* malformed instruction, length needs to be at least 1 */ \ + if (words + len > wordsEnd) return false; /* malformed instruction, goes past end of data */ \ + SpvOp op = (SpvOp)(words[0] & 0xFFFF) + + +bool smolv::Encode(const void* spirvData, size_t spirvSize, ByteArray& outSmolv, uint32_t flags, StripOpNameFilterFunc stripFilter) +{ + const size_t wordCount = spirvSize / 4; + if (wordCount * 4 != spirvSize) + return false; + const uint32_t* words = (const uint32_t*)spirvData; + const uint32_t* wordsEnd = words + wordCount; + if (!smolv_CheckSpirVHeader(words, wordCount)) + return false; + + // reserve space in output (typical compression is to about 30%; reserve half of input space) + outSmolv.reserve(outSmolv.size() + spirvSize/2); + + // header (matches SPIR-V one, except different magic) + smolv_Write4(outSmolv, kSmolHeaderMagic); + smolv_Write4(outSmolv, (words[1] & 0x00FFFFFF) + (kSmolCurrEncodingVersion<<24)); // SPIR-V version (_XXX) + SMOL-V version (X___) + smolv_Write4(outSmolv, words[2]); // generator + smolv_Write4(outSmolv, words[3]); // bound + smolv_Write4(outSmolv, words[4]); // schema + + const size_t headerSpirvSizeOffset = outSmolv.size(); // size field may get updated later if stripping is enabled + smolv_Write4(outSmolv, (uint32_t)spirvSize); // space needed to decode (i.e. original SPIR-V size) + + size_t strippedSpirvWordCount = wordCount; + uint32_t prevResult = 0; + uint32_t prevDecorate = 0; + + const int knownOpsCount = smolv_GetKnownOpsCount(kSmolCurrEncodingVersion); + + words += 5; + while (words < wordsEnd) + { + _SMOLV_READ_OP(instrLen, words, op); + + if ((flags & kEncodeFlagStripDebugInfo) && smolv_OpDebugInfo(op, knownOpsCount)) + { + if (!stripFilter || op != SpvOpName || !stripFilter(reinterpret_cast<const char*>(&words[2]))) + { + strippedSpirvWordCount -= instrLen; + words += instrLen; + continue; + } + } + + // A usual case of vector shuffle, with less than 4 components, each with a value + // in [0..3] range: encode it in a more compact form, with the swizzle pattern in one byte. + // Turn this into a VectorShuffleCompact instruction, that takes up unused slot in Ops. + uint32_t swizzle = 0; + if (op == SpvOpVectorShuffle && instrLen <= 9) + { + uint32_t swz0 = instrLen > 5 ? words[5] : 0; + uint32_t swz1 = instrLen > 6 ? words[6] : 0; + uint32_t swz2 = instrLen > 7 ? words[7] : 0; + uint32_t swz3 = instrLen > 8 ? words[8] : 0; + if (swz0 < 4 && swz1 < 4 && swz2 < 4 && swz3 < 4) + { + op = SpvOpVectorShuffleCompact; + swizzle = (swz0 << 6) | (swz1 << 4) | (swz2 << 2) | (swz3); + } + } + + // length + opcode + if (!smolv_WriteLengthOp(outSmolv, instrLen, op)) + return false; + + size_t ioffs = 1; + // write type as varint, if we have it + if (smolv_OpHasType(op, knownOpsCount)) + { + if (ioffs >= instrLen) + return false; + smolv_WriteVarint(outSmolv, words[ioffs]); + ioffs++; + } + // write result as delta+zig+varint, if we have it + if (smolv_OpHasResult(op, knownOpsCount)) + { + if (ioffs >= instrLen) + return false; + uint32_t v = words[ioffs]; + smolv_WriteVarint(outSmolv, smolv_ZigEncode(v - prevResult)); // some deltas are negative, use zig + prevResult = v; + ioffs++; + } + + // Decorate & MemberDecorate: IDs relative to previous decorate + if (op == SpvOpDecorate || op == SpvOpMemberDecorate) + { + if (ioffs >= instrLen) + return false; + uint32_t v = words[ioffs]; + smolv_WriteVarint(outSmolv, smolv_ZigEncode(v - prevDecorate)); // spirv-remapped deltas often negative, use zig + prevDecorate = v; + ioffs++; + } + + // MemberDecorate special encoding: whole row of MemberDecorate instructions is often referring + // to the same type and linearly increasing member indices. Scan ahead to see how many we have, + // and encode whole bunch as one. + if (op == SpvOpMemberDecorate) + { + // scan ahead until we reach end, non-member-decoration or different type + const uint32_t decorationType = words[ioffs-1]; + const uint32_t* memberWords = words; + uint32_t prevIndex = 0; + uint32_t prevOffset = 0; + // write a byte on how many we have encoded as a bunch + size_t countLocation = outSmolv.size(); + outSmolv.push_back(0); + int count = 0; + while (memberWords < wordsEnd && count < 255) + { + _SMOLV_READ_OP(memberLen, memberWords, memberOp); + if (memberOp != SpvOpMemberDecorate) + break; + if (memberLen < 4) + return false; // invalid input + if (memberWords[1] != decorationType) + break; + + // write member index as delta from previous + uint32_t memberIndex = memberWords[2]; + smolv_WriteVarint(outSmolv, memberIndex - prevIndex); + prevIndex = memberIndex; + + // decoration (and length if not common/known) + uint32_t memberDec = memberWords[3]; + smolv_WriteVarint(outSmolv, memberDec); + const int knownExtraOps = smolv_DecorationExtraOps(memberDec); + if (knownExtraOps == -1) + smolv_WriteVarint(outSmolv, memberLen-4); + else if (unsigned(knownExtraOps) + 4 != memberLen) + return false; // invalid input + + // Offset decorations are most often linearly increasing, so encode as deltas + if (memberDec == 35) // Offset + { + if (memberLen != 5) + return false; + smolv_WriteVarint(outSmolv, memberWords[4]-prevOffset); + prevOffset = memberWords[4]; + } + else + { + // write rest of decorations as varint + for (uint32_t i = 4; i < memberLen; ++i) + smolv_WriteVarint(outSmolv, memberWords[i]); + } + + memberWords += memberLen; + ++count; + } + outSmolv[countLocation] = uint8_t(count); + words = memberWords; + continue; + } + + // Write out this many IDs, encoding them relative+zigzag to result ID + int relativeCount = smolv_OpDeltaFromResult(op, knownOpsCount); + for (int i = 0; i < relativeCount && ioffs < instrLen; ++i, ++ioffs) + { + if (ioffs >= instrLen) + return false; + uint32_t delta = prevResult - words[ioffs]; + // some deltas are negative (often on branches, or if program was processed by spirv-remap), + // so use zig encoding + smolv_WriteVarint(outSmolv, smolv_ZigEncode(delta)); + } + + if (op == SpvOpVectorShuffleCompact) + { + // compact vector shuffle, just write out single swizzle byte + outSmolv.push_back(uint8_t(swizzle)); + ioffs = instrLen; + } + else if (smolv_OpVarRest(op, knownOpsCount)) + { + // write out rest of words with variable encoding (expected to be small integers) + for (; ioffs < instrLen; ++ioffs) + smolv_WriteVarint(outSmolv, words[ioffs]); + } + else + { + // write out rest of words without any encoding + for (; ioffs < instrLen; ++ioffs) + smolv_Write4(outSmolv, words[ioffs]); + } + + words += instrLen; + } + + if (strippedSpirvWordCount != wordCount) + { + uint8_t* headerSpirvSize = &outSmolv[headerSpirvSizeOffset]; + smolv_Write4(headerSpirvSize, (uint32_t)strippedSpirvWordCount * 4); + } + + return true; +} + + +size_t smolv::GetDecodedBufferSize(const void* smolvData, size_t smolvSize) +{ + if (!smolv_CheckSmolHeader((const uint8_t*)smolvData, smolvSize)) + return 0; + const uint32_t* words = (const uint32_t*)smolvData; + return words[5]; +} + + +bool smolv::Decode(const void* smolvData, size_t smolvSize, void* spirvOutputBuffer, size_t spirvOutputBufferSize, uint32_t flags) +{ + // check header, and whether we have enough output buffer space + const size_t neededBufferSize = GetDecodedBufferSize(smolvData, smolvSize); + if (neededBufferSize == 0) + return false; // invalid SMOL-V + if (spirvOutputBufferSize < neededBufferSize) + return false; // not enough space in output buffer + if (spirvOutputBuffer == NULL) + return false; // output buffer is null + + const uint8_t* bytes = (const uint8_t*)smolvData; + const uint8_t* bytesEnd = bytes + smolvSize; + + uint8_t* outSpirv = (uint8_t*)spirvOutputBuffer; + + uint32_t val; + int smolVersion = 0; + + // header + smolv_Write4(outSpirv, kSpirVHeaderMagic); bytes += 4; + smolv_Read4(bytes, bytesEnd, val); smolVersion = val >> 24; val &= 0x00FFFFFF; smolv_Write4(outSpirv, val); // version + smolv_Read4(bytes, bytesEnd, val); smolv_Write4(outSpirv, val); // generator + smolv_Read4(bytes, bytesEnd, val); smolv_Write4(outSpirv, val); // bound + smolv_Read4(bytes, bytesEnd, val); smolv_Write4(outSpirv, val); // schema + bytes += 4; // decode buffer size + + // there are two SMOL-V encoding versions, both not indicating anything in their header version field: + // one that is called "before zero" here (2016-08-31 code). Support decoding that one only by presence + // of this special flag. + const bool beforeZeroVersion = smolVersion == 0 && (flags & kDecodeFlagUse20160831AsZeroVersion) != 0; + + const int knownOpsCount = smolv_GetKnownOpsCount(smolVersion); + + uint32_t prevResult = 0; + uint32_t prevDecorate = 0; + + while (bytes < bytesEnd) + { + // read length + opcode + uint32_t instrLen; + SpvOp op; + if (!smolv_ReadLengthOp(bytes, bytesEnd, instrLen, op)) + return false; + const bool wasSwizzle = (op == SpvOpVectorShuffleCompact); + if (wasSwizzle) + op = SpvOpVectorShuffle; + smolv_Write4(outSpirv, (instrLen << 16) | op); + + size_t ioffs = 1; + + // read type as varint, if we have it + if (smolv_OpHasType(op, knownOpsCount)) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + smolv_Write4(outSpirv, val); + ioffs++; + } + // read result as delta+varint, if we have it + if (smolv_OpHasResult(op, knownOpsCount)) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + val = prevResult + smolv_ZigDecode(val); + smolv_Write4(outSpirv, val); + prevResult = val; + ioffs++; + } + + // Decorate: IDs relative to previous decorate + if (op == SpvOpDecorate || op == SpvOpMemberDecorate) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + // "before zero" version did not use zig encoding for the value + val = prevDecorate + (beforeZeroVersion ? val : smolv_ZigDecode(val)); + smolv_Write4(outSpirv, val); + prevDecorate = val; + ioffs++; + } + + // MemberDecorate special decoding + if (op == SpvOpMemberDecorate && !beforeZeroVersion) + { + if (bytes >= bytesEnd) + return false; // broken input + int count = *bytes++; + int prevIndex = 0; + int prevOffset = 0; + for (int m = 0; m < count; ++m) + { + // read member index + uint32_t memberIndex; + if (!smolv_ReadVarint(bytes, bytesEnd, memberIndex)) return false; + memberIndex += prevIndex; + prevIndex = memberIndex; + + // decoration (and length if not common/known) + uint32_t memberDec; + if (!smolv_ReadVarint(bytes, bytesEnd, memberDec)) return false; + const int knownExtraOps = smolv_DecorationExtraOps(memberDec); + uint32_t memberLen; + if (knownExtraOps == -1) + { + if (!smolv_ReadVarint(bytes, bytesEnd, memberLen)) return false; + memberLen += 4; + } + else + memberLen = 4 + knownExtraOps; + + // write SPIR-V op+length (unless it's first member decoration, in which case it was written before) + if (m != 0) + { + smolv_Write4(outSpirv, (memberLen << 16) | op); + smolv_Write4(outSpirv, prevDecorate); + } + smolv_Write4(outSpirv, memberIndex); + smolv_Write4(outSpirv, memberDec); + // Special case for Offset decorations + if (memberDec == 35) // Offset + { + if (memberLen != 5) + return false; + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + val += prevOffset; + smolv_Write4(outSpirv, val); + prevOffset = val; + } + else + { + for (uint32_t i = 4; i < memberLen; ++i) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + smolv_Write4(outSpirv, val); + } + } + } + continue; + } + + // Read this many IDs, that are relative to result ID + int relativeCount = smolv_OpDeltaFromResult(op, knownOpsCount); + // "before zero" version only used zig encoding for IDs of several ops; after + // that ops got zig encoding for their IDs + bool zigDecodeVals = true; + if (beforeZeroVersion) + { + if (op != SpvOpControlBarrier && op != SpvOpMemoryBarrier && op != SpvOpLoopMerge && op != SpvOpSelectionMerge && op != SpvOpBranch && op != SpvOpBranchConditional && op != SpvOpMemoryNamedBarrier) + zigDecodeVals = false; + } + for (int i = 0; i < relativeCount && ioffs < instrLen; ++i, ++ioffs) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + if (zigDecodeVals) + val = smolv_ZigDecode(val); + smolv_Write4(outSpirv, prevResult - val); + } + + if (wasSwizzle && instrLen <= 9) + { + uint32_t swizzle = *bytes++; + if (instrLen > 5) smolv_Write4(outSpirv, (swizzle >> 6) & 3); + if (instrLen > 6) smolv_Write4(outSpirv, (swizzle >> 4) & 3); + if (instrLen > 7) smolv_Write4(outSpirv, (swizzle >> 2) & 3); + if (instrLen > 8) smolv_Write4(outSpirv, swizzle & 3); + } + else if (smolv_OpVarRest(op, knownOpsCount)) + { + // read rest of words with variable encoding + for (; ioffs < instrLen; ++ioffs) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + smolv_Write4(outSpirv, val); + } + } + else + { + // read rest of words without any encoding + for (; ioffs < instrLen; ++ioffs) + { + if (!smolv_Read4(bytes, bytesEnd, val)) return false; + smolv_Write4(outSpirv, val); + } + } + } + + if ((uint8_t*)spirvOutputBuffer + neededBufferSize != outSpirv) + return false; // something went wrong during decoding? we should have decoded to exact output size + + return true; +} + + + +// -------------------------------------------------------------------------------------------- +// Calculating instruction count / space stats on SPIR-V and SMOL-V + + +struct smolv::Stats +{ + Stats() { memset(this, 0, sizeof(*this)); } + size_t opCounts[kKnownOpsCount]; + size_t opSizes[kKnownOpsCount]; + size_t smolOpSizes[kKnownOpsCount]; + size_t varintCountsOp[6]; + size_t varintCountsType[6]; + size_t varintCountsRes[6]; + size_t varintCountsOther[6]; + size_t totalOps; + size_t totalSize; + size_t totalSizeSmol; + size_t inputCount; +}; + + +smolv::Stats* smolv::StatsCreate() +{ + return new Stats(); +} + +void smolv::StatsDelete(smolv::Stats *s) +{ + delete s; +} + + +bool smolv::StatsCalculate(smolv::Stats* stats, const void* spirvData, size_t spirvSize) +{ + if (!stats) + return false; + + const size_t wordCount = spirvSize / 4; + if (wordCount * 4 != spirvSize) + return false; + const uint32_t* words = (const uint32_t*)spirvData; + const uint32_t* wordsEnd = words + wordCount; + if (!smolv_CheckSpirVHeader(words, wordCount)) + return false; + words += 5; + + stats->inputCount++; + stats->totalSize += wordCount; + + while (words < wordsEnd) + { + _SMOLV_READ_OP(instrLen, words, op); + + if (op < kKnownOpsCount) + { + stats->opCounts[op]++; + stats->opSizes[op] += instrLen; + } + words += instrLen; + stats->totalOps++; + } + + return true; +} + + +bool smolv::StatsCalculateSmol(smolv::Stats* stats, const void* smolvData, size_t smolvSize) +{ + if (!stats) + return false; + + // debugging helper to dump all encoded bytes to stdout, keep at "if 0" +# if 0 +# define _SMOLV_DEBUG_PRINT_ENCODED_BYTES() { \ + printf("Op %-22s ", op < kKnownOpsCount ? kSpirvOpNames[op] : "???"); \ + for (const uint8_t* b = instrBegin; b < bytes; ++b) \ + printf("%02x ", *b); \ + printf("\n"); \ + } +# else +# define _SMOLV_DEBUG_PRINT_ENCODED_BYTES() {} +# endif + + const uint8_t* bytes = (const uint8_t*)smolvData; + const uint8_t* bytesEnd = bytes + smolvSize; + if (!smolv_CheckSmolHeader(bytes, smolvSize)) + return false; + + uint32_t val; + int smolVersion; + bytes += 4; + smolv_Read4(bytes, bytesEnd, val); smolVersion = val >> 24; + const int knownOpsCount = smolv_GetKnownOpsCount(smolVersion); + bytes += 16; + + stats->totalSizeSmol += smolvSize; + + while (bytes < bytesEnd) + { + const uint8_t* instrBegin = bytes; + const uint8_t* varBegin; + + // read length + opcode + uint32_t instrLen; + SpvOp op; + varBegin = bytes; + if (!smolv_ReadLengthOp(bytes, bytesEnd, instrLen, op)) + return false; + const bool wasSwizzle = (op == SpvOpVectorShuffleCompact); + if (wasSwizzle) + op = SpvOpVectorShuffle; + stats->varintCountsOp[bytes-varBegin]++; + + size_t ioffs = 1; + if (smolv_OpHasType(op, knownOpsCount)) + { + varBegin = bytes; + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + stats->varintCountsType[bytes-varBegin]++; + ioffs++; + } + if (smolv_OpHasResult(op, knownOpsCount)) + { + varBegin = bytes; + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + stats->varintCountsRes[bytes-varBegin]++; + ioffs++; + } + + if (op == SpvOpDecorate || op == SpvOpMemberDecorate) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + ioffs++; + } + // MemberDecorate special decoding + if (op == SpvOpMemberDecorate) + { + if (bytes >= bytesEnd) + return false; // broken input + int count = *bytes++; + for (int m = 0; m < count; ++m) + { + uint32_t memberIndex; + if (!smolv_ReadVarint(bytes, bytesEnd, memberIndex)) return false; + uint32_t memberDec; + if (!smolv_ReadVarint(bytes, bytesEnd, memberDec)) return false; + const int knownExtraOps = smolv_DecorationExtraOps(memberDec); + uint32_t memberLen; + if (knownExtraOps == -1) + { + if (!smolv_ReadVarint(bytes, bytesEnd, memberLen)) return false; + memberLen += 4; + } + else + memberLen = 4 + knownExtraOps; + for (uint32_t i = 4; i < memberLen; ++i) + { + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + } + } + stats->smolOpSizes[op] += bytes - instrBegin; + _SMOLV_DEBUG_PRINT_ENCODED_BYTES(); + continue; + } + + int relativeCount = smolv_OpDeltaFromResult(op, knownOpsCount); + for (int i = 0; i < relativeCount && ioffs < instrLen; ++i, ++ioffs) + { + varBegin = bytes; + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + stats->varintCountsRes[bytes-varBegin]++; + } + + if (wasSwizzle && instrLen <= 9) + { + bytes++; + } + else if (smolv_OpVarRest(op, knownOpsCount)) + { + for (; ioffs < instrLen; ++ioffs) + { + varBegin = bytes; + if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false; + stats->varintCountsOther[bytes-varBegin]++; + } + } + else + { + for (; ioffs < instrLen; ++ioffs) + { + if (!smolv_Read4(bytes, bytesEnd, val)) return false; + } + } + + if (op < kKnownOpsCount) + { + stats->smolOpSizes[op] += bytes - instrBegin; + } + _SMOLV_DEBUG_PRINT_ENCODED_BYTES(); + } + + return true; +} + +static bool CompareOpCounters (std::pair<SpvOp,size_t> a, std::pair<SpvOp,size_t> b) +{ + return a.second > b.second; +} + +void smolv::StatsPrint(const Stats* stats) +{ + if (!stats) + return; + + typedef std::pair<SpvOp,size_t> OpCounter; + OpCounter counts[kKnownOpsCount]; + OpCounter sizes[kKnownOpsCount]; + OpCounter sizesSmol[kKnownOpsCount]; + for (int i = 0; i < kKnownOpsCount; ++i) + { + counts[i].first = (SpvOp)i; + counts[i].second = stats->opCounts[i]; + sizes[i].first = (SpvOp)i; + sizes[i].second = stats->opSizes[i]; + sizesSmol[i].first = (SpvOp)i; + sizesSmol[i].second = stats->smolOpSizes[i]; + } + std::sort(counts, counts + kKnownOpsCount, CompareOpCounters); + std::sort(sizes, sizes + kKnownOpsCount, CompareOpCounters); + std::sort(sizesSmol, sizesSmol + kKnownOpsCount, CompareOpCounters); + + printf("Stats for %i SPIR-V inputs, total size %i words (%.1fKB):\n", (int)stats->inputCount, (int)stats->totalSize, stats->totalSize * 4.0f / 1024.0f); + printf("Most occuring ops:\n"); + for (int i = 0; i < 30; ++i) + { + SpvOp op = counts[i].first; + printf(" #%2i: %4i %-20s %4i (%4.1f%%)\n", i, op, kSpirvOpNames[op], (int)counts[i].second, (float)counts[i].second / (float)stats->totalOps * 100.0f); + } + printf("Largest total size of ops:\n"); + for (int i = 0; i < 30; ++i) + { + SpvOp op = sizes[i].first; + printf(" #%2i: %-22s %6i (%4.1f%%) avg len %.1f\n", + i, + kSpirvOpNames[op], + (int)sizes[i].second*4, + (float)sizes[i].second / (float)stats->totalSize * 100.0f, + (float)sizes[i].second*4 / (float)stats->opCounts[op] + ); + } + printf("SMOL varint encoding counts per byte length:\n"); + printf(" B: %6s %6s %6s %6s\n", "Op", "Type", "Result", "Other"); + for (int i = 1; i < 6; ++i) + { + printf(" %i: %6i %6i %6i %6i\n", i, (int)stats->varintCountsOp[i], (int)stats->varintCountsType[i], (int)stats->varintCountsRes[i], (int)stats->varintCountsOther[i]); + } + printf("Largest total size of ops in SMOL:\n"); + for (int i = 0; i < 30; ++i) + { + SpvOp op = sizesSmol[i].first; + printf(" #%2i: %-22s %6i (%4.1f%%) avg len %.1f\n", + i, + kSpirvOpNames[op], + (int)sizesSmol[i].second, + (float)sizesSmol[i].second / (float)stats->totalSizeSmol * 100.0f, + (float)sizesSmol[i].second / (float)stats->opCounts[op] + ); + } +} + + +// ------------------------------------------------------------------------------ +// This software is available under 2 licenses -- choose whichever you prefer. +// ------------------------------------------------------------------------------ +// ALTERNATIVE A - MIT License +// Copyright (c) 2016-2020 Aras Pranckevicius +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// ------------------------------------------------------------------------------ +// ALTERNATIVE B - Public Domain (www.unlicense.org) +// This is free and unencumbered software released into the public domain. +// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +// software, either in source code form or as a compiled binary, for any purpose, +// commercial or non-commercial, and by any means. +// In jurisdictions that recognize copyright laws, the author or authors of this +// software dedicate any and all copyright interest in the software to the public +// domain. We make this dedication for the benefit of the public at large and to +// the detriment of our heirs and successors. We intend this dedication to be an +// overt act of relinquishment in perpetuity of all present and future rights to +// this software under copyright law. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// ------------------------------------------------------------------------------ diff --git a/thirdparty/misc/smolv.h b/thirdparty/misc/smolv.h new file mode 100644 index 0000000000..798ee4126f --- /dev/null +++ b/thirdparty/misc/smolv.h @@ -0,0 +1,169 @@ +// smol-v - public domain - https://github.com/aras-p/smol-v +// authored 2016-2020 by Aras Pranckevicius +// no warranty implied; use at your own risk +// See end of file for license information. +// +// +// ### OVERVIEW: +// +// SMOL-V encodes Vulkan/Khronos SPIR-V format programs into a form that is smaller, and is more +// compressible. Normally no changes to the programs are done; they decode +// into exactly same program as what was encoded. Optionally, debug information +// can be removed too. +// +// SPIR-V is a very verbose format, several times larger than same programs expressed in other +// shader formats (e.g. DX11 bytecode, GLSL, DX9 bytecode etc.). The SSA-form with ever increasing +// IDs is not very appreciated by regular data compressors either. SMOL-V does several things +// to improve this: +// - Many words, especially ones that most often have small values, are encoded using +// "varint" scheme (1-5 bytes per word, with just one byte for values in 0..127 range). +// See https://developers.google.com/protocol-buffers/docs/encoding +// - Some IDs used in the program are delta-encoded, relative to previously seen IDs (e.g. Result +// IDs). Often instructions reference things that were computed just before, so this results in +// small deltas. These values are also encoded using "varint" scheme. +// - Reordering instruction opcodes so that the most common ones are the smallest values, for smaller +// varint encoding. +// - Encoding several instructions in a more compact form, e.g. the "typical <=4 component swizzle" +// shape of a VectorShuffle instruction, or sequences of MemberDecorate instructions. +// +// A somewhat similar utility is spirv-remap from glslang, see +// https://github.com/KhronosGroup/glslang/blob/master/README-spirv-remap.txt +// +// +// ### USAGE: +// +// Add source/smolv.h and source/smolv.cpp to your C++ project build. +// Currently it might require C++11 or somesuch; I only tested with Visual Studio 2017/2019, Mac Xcode 11 and Gcc 5.4. +// +// smolv::Encode and smolv::Decode is the basic functionality. +// +// Other functions are for development/statistics purposes, to figure out frequencies and +// distributions of the instructions. +// +// There's a test + compression benchmarking suite in testing/testmain.cpp; using that needs adding +// other files under testing/external to the build too (3rd party code: glslang remapper, Zstd, LZ4). +// +// +// ### LIMITATIONS / TODO: +// +// - SPIR-V where the words got stored in big-endian layout is not supported yet. +// - The whole thing might not work on Big-Endian CPUs. It might, but I'm not 100% sure. +// - Not much prevention is done against malformed/corrupted inputs, TODO. +// - Out of memory cases are not handled. The code will either throw exception +// or crash, depending on your compilation flags. + +#pragma once + +#include <stdint.h> +#include <vector> +#include <cstddef> + +namespace smolv +{ + typedef std::vector<uint8_t> ByteArray; + + enum EncodeFlags + { + kEncodeFlagNone = 0, + kEncodeFlagStripDebugInfo = (1<<0), // Strip all optional SPIR-V instructions (debug names etc.) + }; + enum DecodeFlags + { + kDecodeFlagNone = 0, + kDecodeFlagUse20160831AsZeroVersion = (1 << 0), // For "version zero" of SMOL-V encoding, use 2016 08 31 code path (this is what happens to be used by Unity 2017-2020) + }; + + // Preserve *some* OpName debug names. + // Return true to preserve, false to strip. + // This is really only used to implement a workaround for problems with some Vulkan drivers. + typedef bool(*StripOpNameFilterFunc)(const char* name); + + // ------------------------------------------------------------------- + // Encoding / Decoding + + // Encode SPIR-V into SMOL-V. + // + // Resulting data is appended to outSmolv array (the array is not cleared). + // + // flags is bitset of EncodeFlags values. + // + // Returns false on malformed SPIR-V input; if that happens the output array might get + // partial/broken SMOL-V program. + bool Encode(const void* spirvData, size_t spirvSize, ByteArray& outSmolv, uint32_t flags = kEncodeFlagNone, StripOpNameFilterFunc stripFilter = 0); + + + // Decode SMOL-V into SPIR-V. + // + // Resulting data is written into the passed buffer. Get required buffer space with + // GetDecodeBufferSize; this is the size of decoded SPIR-V program. + // + // flags is bitset of DecodeFlags values. + + // Decoding does no memory allocations. + // + // Returns false on malformed input; if that happens the output buffer might be only partially + // written to. + bool Decode(const void* smolvData, size_t smolvSize, void* spirvOutputBuffer, size_t spirvOutputBufferSize, uint32_t flags = kDecodeFlagNone); + + + // Given a SMOL-V program, get size of the decoded SPIR-V program. + // This is the buffer size that Decode expects. + // + // Returns zero on malformed input (just checks the header, not the full input). + size_t GetDecodedBufferSize(const void* smolvData, size_t smolvSize); + + + // ------------------------------------------------------------------- + // Computing instruction statistics on SPIR-V/SMOL-V programs + + struct Stats; + + Stats* StatsCreate(); + void StatsDelete(Stats* s); + + bool StatsCalculate(Stats* stats, const void* spirvData, size_t spirvSize); + bool StatsCalculateSmol(Stats* stats, const void* smolvData, size_t smolvSize); + void StatsPrint(const Stats* stats); + +} // namespace smolv + + +// ------------------------------------------------------------------------------ +// This software is available under 2 licenses -- choose whichever you prefer. +// ------------------------------------------------------------------------------ +// ALTERNATIVE A - MIT License +// Copyright (c) 2016-2020 Aras Pranckevicius +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// ------------------------------------------------------------------------------ +// ALTERNATIVE B - Public Domain (www.unlicense.org) +// This is free and unencumbered software released into the public domain. +// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +// software, either in source code form or as a compiled binary, for any purpose, +// commercial or non-commercial, and by any means. +// In jurisdictions that recognize copyright laws, the author or authors of this +// software dedicate any and all copyright interest in the software to the public +// domain. We make this dedication for the benefit of the public at large and to +// the detriment of our heirs and successors. We intend this dedication to be an +// overt act of relinquishment in perpetuity of all present and future rights to +// this software under copyright law. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// ------------------------------------------------------------------------------ diff --git a/thirdparty/oidn/core/transfer_function.cpp b/thirdparty/oidn/core/transfer_function.cpp index 487f0a9f75..ce5deca56b 100644 --- a/thirdparty/oidn/core/transfer_function.cpp +++ b/thirdparty/oidn/core/transfer_function.cpp @@ -24,10 +24,6 @@ namespace oidn { float AutoexposureNode::autoexposure(const Image& color) { assert(color.format == Format::Float3); -// -- GODOT start -- -// We don't want to mess with TTB and we don't use autoexposure, so we disable this code -#if 0 -// -- GODOT end -- constexpr float key = 0.18f; constexpr float eps = 1e-8f; @@ -42,61 +38,66 @@ namespace oidn { // Compute the average log luminance of the downsampled image using Sum = std::pair<float, int>; - Sum sum = - tbb::parallel_reduce( - tbb::blocked_range2d<int>(0, HK, 0, WK), - Sum(0.f, 0), - [&](const tbb::blocked_range2d<int>& r, Sum sum) -> Sum + // -- GODOT start -- + // Sum sum = + // tbb::parallel_reduce( + // tbb::blocked_range2d<int>(0, HK, 0, WK), + // Sum(0.f, 0), + // [&](const tbb::blocked_range2d<int>& r, Sum sum) -> Sum + // { + // // Iterate over blocks + // for (int i = r.rows().begin(); i != r.rows().end(); ++i) + // { + // for (int j = r.cols().begin(); j != r.cols().end(); ++j) + // { + + Sum sum = Sum(0.0f, 0); + + for (int i = 0; i != HK; ++i) + { + for (int j = 0; j != WK; ++j) + { + // Compute the average luminance in the current block + const int beginH = int(ptrdiff_t(i) * H / HK); + const int beginW = int(ptrdiff_t(j) * W / WK); + const int endH = int(ptrdiff_t(i+1) * H / HK); + const int endW = int(ptrdiff_t(j+1) * W / WK); + + float L = 0.f; + + for (int h = beginH; h < endH; ++h) { - // Iterate over blocks - for (int i = r.rows().begin(); i != r.rows().end(); ++i) + for (int w = beginW; w < endW; ++w) { - for (int j = r.cols().begin(); j != r.cols().end(); ++j) - { - // Compute the average luminance in the current block - const int beginH = int(ptrdiff_t(i) * H / HK); - const int beginW = int(ptrdiff_t(j) * W / WK); - const int endH = int(ptrdiff_t(i+1) * H / HK); - const int endW = int(ptrdiff_t(j+1) * W / WK); - - float L = 0.f; - - for (int h = beginH; h < endH; ++h) - { - for (int w = beginW; w < endW; ++w) - { - const float* rgb = (const float*)color.get(h, w); - - const float r = maxSafe(rgb[0], 0.f); - const float g = maxSafe(rgb[1], 0.f); - const float b = maxSafe(rgb[2], 0.f); - - L += luminance(r, g, b); - } - } - - L /= (endH - beginH) * (endW - beginW); - - // Accumulate the log luminance - if (L > eps) - { - sum.first += log2(L); - sum.second++; - } - } + const float* rgb = (const float*)color.get(h, w); + + const float r = maxSafe(rgb[0], 0.f); + const float g = maxSafe(rgb[1], 0.f); + const float b = maxSafe(rgb[2], 0.f); + + L += luminance(r, g, b); } + } - return sum; - }, - [](Sum a, Sum b) -> Sum { return Sum(a.first+b.first, a.second+b.second); }, - tbb::static_partitioner() - ); + L /= (endH - beginH) * (endW - beginW); + + // Accumulate the log luminance + if (L > eps) + { + sum.first += log2(L); + sum.second++; + } + } + } + + // return sum; + // }, + // [](Sum a, Sum b) -> Sum { return Sum(a.first+b.first, a.second+b.second); }, + // tbb::static_partitioner() + // ); + // -- GODOT end -- return (sum.second > 0) ? (key / exp2(sum.first / float(sum.second))) : 1.f; -// -- GODOT start -- -#endif - return 1.0; -// -- GODOT end -- } } // namespace oidn diff --git a/thirdparty/oidn/patches/godot-changes-c58c5216.patch b/thirdparty/oidn/patches/godot-changes-c58c5216.patch index 6a54703064..c01f00187b 100644 --- a/thirdparty/oidn/patches/godot-changes-c58c5216.patch +++ b/thirdparty/oidn/patches/godot-changes-c58c5216.patch @@ -280,28 +280,58 @@ index 8c2de09..ed8328c 100644 namespace oidn { diff --git a/core/transfer_function.cpp b/core/transfer_function.cpp -index 601f814..487f0a9 100644 +index 601f814..ce5deca 100644 --- a/core/transfer_function.cpp +++ b/core/transfer_function.cpp -@@ -24,6 +24,10 @@ namespace oidn { - float AutoexposureNode::autoexposure(const Image& color) - { - assert(color.format == Format::Float3); -+// -- GODOT start -- -+// We don't want to mess with TTB and we don't use autoexposure, so we disable this code -+#if 0 -+// -- GODOT end -- +@@ -38,16 +38,24 @@ namespace oidn { + // Compute the average log luminance of the downsampled image + using Sum = std::pair<float, int>; + +- Sum sum = +- tbb::parallel_reduce( +- tbb::blocked_range2d<int>(0, HK, 0, WK), +- Sum(0.f, 0), +- [&](const tbb::blocked_range2d<int>& r, Sum sum) -> Sum ++ // -- GODOT start -- ++ // Sum sum = ++ // tbb::parallel_reduce( ++ // tbb::blocked_range2d<int>(0, HK, 0, WK), ++ // Sum(0.f, 0), ++ // [&](const tbb::blocked_range2d<int>& r, Sum sum) -> Sum ++ // { ++ // // Iterate over blocks ++ // for (int i = r.rows().begin(); i != r.rows().end(); ++i) ++ // { ++ // for (int j = r.cols().begin(); j != r.cols().end(); ++j) ++ // { ++ ++ Sum sum = Sum(0.0f, 0); ++ ++ for (int i = 0; i != HK; ++i) + { +- // Iterate over blocks +- for (int i = r.rows().begin(); i != r.rows().end(); ++i) +- { +- for (int j = r.cols().begin(); j != r.cols().end(); ++j) ++ for (int j = 0; j != WK; ++j) + { + // Compute the average luminance in the current block + const int beginH = int(ptrdiff_t(i) * H / HK); +@@ -82,11 +90,12 @@ namespace oidn { + } + } - constexpr float key = 0.18f; - constexpr float eps = 1e-8f; -@@ -89,6 +93,10 @@ namespace oidn { - ); +- return sum; +- }, +- [](Sum a, Sum b) -> Sum { return Sum(a.first+b.first, a.second+b.second); }, +- tbb::static_partitioner() +- ); ++ // return sum; ++ // }, ++ // [](Sum a, Sum b) -> Sum { return Sum(a.first+b.first, a.second+b.second); }, ++ // tbb::static_partitioner() ++ // ); ++ // -- GODOT end -- return (sum.second > 0) ? (key / exp2(sum.first / float(sum.second))) : 1.f; -+// -- GODOT start -- -+#endif -+ return 1.0; -+// -- GODOT end -- } - - } // namespace oidn |