diff options
Diffstat (limited to 'thirdparty/astcenc/astcenc_averages_and_directions.cpp')
-rw-r--r-- | thirdparty/astcenc/astcenc_averages_and_directions.cpp | 59 |
1 files changed, 6 insertions, 53 deletions
diff --git a/thirdparty/astcenc/astcenc_averages_and_directions.cpp b/thirdparty/astcenc/astcenc_averages_and_directions.cpp index d1f003844a..dcff0d224b 100644 --- a/thirdparty/astcenc/astcenc_averages_and_directions.cpp +++ b/thirdparty/astcenc/astcenc_averages_and_directions.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2022 Arm Limited +// Copyright 2011-2023 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -725,8 +725,7 @@ void compute_error_squared_rgba( const image_block& blk, const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS], const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS], - float uncor_lengths[BLOCK_MAX_PARTITIONS], - float samec_lengths[BLOCK_MAX_PARTITIONS], + float line_lengths[BLOCK_MAX_PARTITIONS], float& uncor_error, float& samec_error ) { @@ -740,12 +739,6 @@ void compute_error_squared_rgba( { const uint8_t *texel_indexes = pi.texels_of_partition[partition]; - float uncor_loparam = 1e10f; - float uncor_hiparam = -1e10f; - - float samec_loparam = 1e10f; - float samec_hiparam = -1e10f; - processed_line4 l_uncor = uncor_plines[partition]; processed_line4 l_samec = samec_plines[partition]; @@ -773,9 +766,6 @@ void compute_error_squared_rgba( vfloat uncor_loparamv(1e10f); vfloat uncor_hiparamv(-1e10f); - vfloat samec_loparamv(1e10f); - vfloat samec_hiparamv(-1e10f); - vfloat ew_r(blk.channel_weight.lane<0>()); vfloat ew_g(blk.channel_weight.lane<1>()); vfloat ew_b(blk.channel_weight.lane<2>()); @@ -825,9 +815,6 @@ void compute_error_squared_rgba( + (data_b * l_samec_bs2) + (data_a * l_samec_bs3); - samec_loparamv = min(samec_param, samec_loparamv); - samec_hiparamv = max(samec_param, samec_hiparamv); - vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r; vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g; vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b; @@ -843,18 +830,9 @@ void compute_error_squared_rgba( lane_ids += vint(ASTCENC_SIMD_WIDTH); } - uncor_loparam = hmin_s(uncor_loparamv); - uncor_hiparam = hmax_s(uncor_hiparamv); - - samec_loparam = hmin_s(samec_loparamv); - samec_hiparam = hmax_s(samec_hiparamv); - - float uncor_linelen = uncor_hiparam - uncor_loparam; - float samec_linelen = samec_hiparam - samec_loparam; - // Turn very small numbers and NaNs into a small number - uncor_lengths[partition] = astc::max(uncor_linelen, 1e-7f); - samec_lengths[partition] = astc::max(samec_linelen, 1e-7f); + float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv); + line_lengths[partition] = astc::max(uncor_linelen, 1e-7f); } uncor_error = hadd_s(uncor_errorsumv); @@ -882,19 +860,9 @@ void compute_error_squared_rgb( unsigned int texel_count = pi.partition_texel_count[partition]; promise(texel_count > 0); - float uncor_loparam = 1e10f; - float uncor_hiparam = -1e10f; - - float samec_loparam = 1e10f; - float samec_hiparam = -1e10f; - processed_line3 l_uncor = pl.uncor_pline; processed_line3 l_samec = pl.samec_pline; - // This implementation is an example vectorization of this function. - // It works for - the codec is a 2-4% faster than not vectorizing - but - // the benefit is limited by the use of gathers and register pressure - // Vectorize some useful scalar inputs vfloat l_uncor_bs0(l_uncor.bs.lane<0>()); vfloat l_uncor_bs1(l_uncor.bs.lane<1>()); @@ -913,9 +881,6 @@ void compute_error_squared_rgb( vfloat uncor_loparamv(1e10f); vfloat uncor_hiparamv(-1e10f); - vfloat samec_loparamv(1e10f); - vfloat samec_hiparamv(-1e10f); - vfloat ew_r(blk.channel_weight.lane<0>()); vfloat ew_g(blk.channel_weight.lane<1>()); vfloat ew_b(blk.channel_weight.lane<2>()); @@ -958,9 +923,6 @@ void compute_error_squared_rgb( + (data_g * l_samec_bs1) + (data_b * l_samec_bs2); - samec_loparamv = min(samec_param, samec_loparamv); - samec_hiparamv = max(samec_param, samec_hiparamv); - vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r; vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g; vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b; @@ -974,18 +936,9 @@ void compute_error_squared_rgb( lane_ids += vint(ASTCENC_SIMD_WIDTH); } - uncor_loparam = hmin_s(uncor_loparamv); - uncor_hiparam = hmax_s(uncor_hiparamv); - - samec_loparam = hmin_s(samec_loparamv); - samec_hiparam = hmax_s(samec_hiparamv); - - float uncor_linelen = uncor_hiparam - uncor_loparam; - float samec_linelen = samec_hiparam - samec_loparam; - // Turn very small numbers and NaNs into a small number - pl.uncor_line_len = astc::max(uncor_linelen, 1e-7f); - pl.samec_line_len = astc::max(samec_linelen, 1e-7f); + float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv); + pl.line_length = astc::max(uncor_linelen, 1e-7f); } uncor_error = hadd_s(uncor_errorsumv); |