1 files changed, 6 insertions, 53 deletions
diff --git a/thirdparty/astcenc/astcenc_averages_and_directions.cpp b/thirdparty/astcenc/astcenc_averages_and_directions.cpp
index d1f003844a..dcff0d224b 100644
--- a/thirdparty/astcenc/astcenc_averages_and_directions.cpp
+++ b/thirdparty/astcenc/astcenc_averages_and_directions.cpp
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
-// Copyright 2011-2022 Arm Limited
+// Copyright 2011-2023 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
@@ -725,8 +725,7 @@ void compute_error_squared_rgba(
 	const image_block& blk,
 	const processed_line4 uncor_plines[BLOCK_MAX_PARTITIONS],
 	const processed_line4 samec_plines[BLOCK_MAX_PARTITIONS],
-	float uncor_lengths[BLOCK_MAX_PARTITIONS],
-	float samec_lengths[BLOCK_MAX_PARTITIONS],
+	float line_lengths[BLOCK_MAX_PARTITIONS],
 	float& uncor_error,
 	float& samec_error
 ) {
@@ -740,12 +739,6 @@ void compute_error_squared_rgba(
 	{
 		const uint8_t *texel_indexes = pi.texels_of_partition[partition];
 
-		float uncor_loparam = 1e10f;
-		float uncor_hiparam = -1e10f;
-
-		float samec_loparam = 1e10f;
-		float samec_hiparam = -1e10f;
-
 		processed_line4 l_uncor = uncor_plines[partition];
 		processed_line4 l_samec = samec_plines[partition];
 
@@ -773,9 +766,6 @@ void compute_error_squared_rgba(
 		vfloat uncor_loparamv(1e10f);
 		vfloat uncor_hiparamv(-1e10f);
 
-		vfloat samec_loparamv(1e10f);
-		vfloat samec_hiparamv(-1e10f);
-
 		vfloat ew_r(blk.channel_weight.lane<0>());
 		vfloat ew_g(blk.channel_weight.lane<1>());
 		vfloat ew_b(blk.channel_weight.lane<2>());
@@ -825,9 +815,6 @@ void compute_error_squared_rgba(
 			                   + (data_b * l_samec_bs2)
 			                   + (data_a * l_samec_bs3);
 
-			samec_loparamv = min(samec_param, samec_loparamv);
-			samec_hiparamv = max(samec_param, samec_hiparamv);
-
 			vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
 			vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
 			vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
@@ -843,18 +830,9 @@ void compute_error_squared_rgba(
 			lane_ids += vint(ASTCENC_SIMD_WIDTH);
 		}
 
-		uncor_loparam = hmin_s(uncor_loparamv);
-		uncor_hiparam = hmax_s(uncor_hiparamv);
-
-		samec_loparam = hmin_s(samec_loparamv);
-		samec_hiparam = hmax_s(samec_hiparamv);
-
-		float uncor_linelen = uncor_hiparam - uncor_loparam;
-		float samec_linelen = samec_hiparam - samec_loparam;
-
 		// Turn very small numbers and NaNs into a small number
-		uncor_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
-		samec_lengths[partition] = astc::max(samec_linelen, 1e-7f);
+		float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv);
+		line_lengths[partition] = astc::max(uncor_linelen, 1e-7f);
 	}
 
 	uncor_error = hadd_s(uncor_errorsumv);
@@ -882,19 +860,9 @@ void compute_error_squared_rgb(
 		unsigned int texel_count = pi.partition_texel_count[partition];
 		promise(texel_count > 0);
 
-		float uncor_loparam = 1e10f;
-		float uncor_hiparam = -1e10f;
-
-		float samec_loparam = 1e10f;
-		float samec_hiparam = -1e10f;
-
 		processed_line3 l_uncor = pl.uncor_pline;
 		processed_line3 l_samec = pl.samec_pline;
 
-		// This implementation is an example vectorization of this function.
-		// It works for - the codec is a 2-4% faster than not vectorizing - but
-		// the benefit is limited by the use of gathers and register pressure
-
 		// Vectorize some useful scalar inputs
 		vfloat l_uncor_bs0(l_uncor.bs.lane<0>());
 		vfloat l_uncor_bs1(l_uncor.bs.lane<1>());
@@ -913,9 +881,6 @@ void compute_error_squared_rgb(
 		vfloat uncor_loparamv(1e10f);
 		vfloat uncor_hiparamv(-1e10f);
 
-		vfloat samec_loparamv(1e10f);
-		vfloat samec_hiparamv(-1e10f);
-
 		vfloat ew_r(blk.channel_weight.lane<0>());
 		vfloat ew_g(blk.channel_weight.lane<1>());
 		vfloat ew_b(blk.channel_weight.lane<2>());
@@ -958,9 +923,6 @@ void compute_error_squared_rgb(
 			                   + (data_g * l_samec_bs1)
 			                   + (data_b * l_samec_bs2);
 
-			samec_loparamv = min(samec_param, samec_loparamv);
-			samec_hiparamv = max(samec_param, samec_hiparamv);
-
 			vfloat samec_dist0 = samec_param * l_samec_bs0 - data_r;
 			vfloat samec_dist1 = samec_param * l_samec_bs1 - data_g;
 			vfloat samec_dist2 = samec_param * l_samec_bs2 - data_b;
@@ -974,18 +936,9 @@ void compute_error_squared_rgb(
 			lane_ids += vint(ASTCENC_SIMD_WIDTH);
 		}
 
-		uncor_loparam = hmin_s(uncor_loparamv);
-		uncor_hiparam = hmax_s(uncor_hiparamv);
-
-		samec_loparam = hmin_s(samec_loparamv);
-		samec_hiparam = hmax_s(samec_hiparamv);
-
-		float uncor_linelen = uncor_hiparam - uncor_loparam;
-		float samec_linelen = samec_hiparam - samec_loparam;
-
 		// Turn very small numbers and NaNs into a small number
-		pl.uncor_line_len = astc::max(uncor_linelen, 1e-7f);
-		pl.samec_line_len = astc::max(samec_linelen, 1e-7f);
+		float uncor_linelen = hmax_s(uncor_hiparamv) - hmin_s(uncor_loparamv);
+		pl.line_length = astc::max(uncor_linelen, 1e-7f);
 	}
 
 	uncor_error = hadd_s(uncor_errorsumv);