summaryrefslogtreecommitdiff
path: root/thirdparty/squish/clusterfit.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/squish/clusterfit.cpp')
-rw-r--r--thirdparty/squish/clusterfit.cpp731
1 files changed, 365 insertions, 366 deletions
diff --git a/thirdparty/squish/clusterfit.cpp b/thirdparty/squish/clusterfit.cpp
index afea84880c..1610ecb5d8 100644
--- a/thirdparty/squish/clusterfit.cpp
+++ b/thirdparty/squish/clusterfit.cpp
@@ -1,29 +1,29 @@
/* -----------------------------------------------------------------------------
- Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
- Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
+ Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
+ Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
-------------------------------------------------------------------------- */
-
+
#include "clusterfit.h"
#include "colourset.h"
#include "colourblock.h"
@@ -31,363 +31,362 @@
namespace squish {
-ClusterFit::ClusterFit( ColourSet const* colours, int flags )
+ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
: ColourFit( colours, flags )
{
- // set the iteration count
- m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
-
- // initialise the best error
- m_besterror = VEC4_CONST( FLT_MAX );
-
- // initialise the metric
- bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
- if( perceptual )
- m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f );
- else
- m_metric = VEC4_CONST( 1.0f );
-
- // cache some values
- int const count = m_colours->GetCount();
- Vec3 const* values = m_colours->GetPoints();
-
- // get the covariance matrix
- Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
-
- // compute the principle component
- m_principle = ComputePrincipleComponent( covariance );
+ // set the iteration count
+ m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
+
+ // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
+ if( metric )
+ m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
+ else
+ m_metric = VEC4_CONST( 1.0f );
+
+ // initialise the best error
+ m_besterror = VEC4_CONST( FLT_MAX );
+
+ // cache some values
+ int const count = m_colours->GetCount();
+ Vec3 const* values = m_colours->GetPoints();
+
+ // get the covariance matrix
+ Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
+
+ // compute the principle component
+ m_principle = ComputePrincipleComponent( covariance );
}
bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
{
- // cache some values
- int const count = m_colours->GetCount();
- Vec3 const* values = m_colours->GetPoints();
-
- // build the list of dot products
- float dps[16];
- u8* order = ( u8* )m_order + 16*iteration;
- for( int i = 0; i < count; ++i )
- {
- dps[i] = Dot( values[i], axis );
- order[i] = ( u8 )i;
- }
-
- // stable sort using them
- for( int i = 0; i < count; ++i )
- {
- for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
- {
- std::swap( dps[j], dps[j - 1] );
- std::swap( order[j], order[j - 1] );
- }
- }
-
- // check this ordering is unique
- for( int it = 0; it < iteration; ++it )
- {
- u8 const* prev = ( u8* )m_order + 16*it;
- bool same = true;
- for( int i = 0; i < count; ++i )
- {
- if( order[i] != prev[i] )
- {
- same = false;
- break;
- }
- }
- if( same )
- return false;
- }
-
- // copy the ordering and weight all the points
- Vec3 const* unweighted = m_colours->GetPoints();
- float const* weights = m_colours->GetWeights();
- m_xsum_wsum = VEC4_CONST( 0.0f );
- for( int i = 0; i < count; ++i )
- {
- int j = order[i];
- Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
- Vec4 w( weights[j] );
- Vec4 x = p*w;
- m_points_weights[i] = x;
- m_xsum_wsum += x;
- }
- return true;
+ // cache some values
+ int const count = m_colours->GetCount();
+ Vec3 const* values = m_colours->GetPoints();
+
+ // build the list of dot products
+ float dps[16];
+ u8* order = ( u8* )m_order + 16*iteration;
+ for( int i = 0; i < count; ++i )
+ {
+ dps[i] = Dot( values[i], axis );
+ order[i] = ( u8 )i;
+ }
+
+ // stable sort using them
+ for( int i = 0; i < count; ++i )
+ {
+ for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
+ {
+ std::swap( dps[j], dps[j - 1] );
+ std::swap( order[j], order[j - 1] );
+ }
+ }
+
+ // check this ordering is unique
+ for( int it = 0; it < iteration; ++it )
+ {
+ u8 const* prev = ( u8* )m_order + 16*it;
+ bool same = true;
+ for( int i = 0; i < count; ++i )
+ {
+ if( order[i] != prev[i] )
+ {
+ same = false;
+ break;
+ }
+ }
+ if( same )
+ return false;
+ }
+
+ // copy the ordering and weight all the points
+ Vec3 const* unweighted = m_colours->GetPoints();
+ float const* weights = m_colours->GetWeights();
+ m_xsum_wsum = VEC4_CONST( 0.0f );
+ for( int i = 0; i < count; ++i )
+ {
+ int j = order[i];
+ Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
+ Vec4 w( weights[j] );
+ Vec4 x = p*w;
+ m_points_weights[i] = x;
+ m_xsum_wsum += x;
+ }
+ return true;
}
void ClusterFit::Compress3( void* block )
{
- // declare variables
- int const count = m_colours->GetCount();
- Vec4 const two = VEC4_CONST( 2.0 );
- Vec4 const one = VEC4_CONST( 1.0f );
- Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
- Vec4 const zero = VEC4_CONST( 0.0f );
- Vec4 const half = VEC4_CONST( 0.5f );
- Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
- Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
-
- // prepare an ordering using the principle axis
- ConstructOrdering( m_principle, 0 );
-
- // check all possible clusters and iterate on the total order
- Vec4 beststart = VEC4_CONST( 0.0f );
- Vec4 bestend = VEC4_CONST( 0.0f );
- Vec4 besterror = m_besterror;
- u8 bestindices[16];
- int bestiteration = 0;
- int besti = 0, bestj = 0;
-
- // loop over iterations (we avoid the case that all points in first or last cluster)
- for( int iterationIndex = 0;; )
- {
- // first cluster [0,i) is at the start
- Vec4 part0 = VEC4_CONST( 0.0f );
- for( int i = 0; i < count; ++i )
- {
- // second cluster [i,j) is half along
- Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
- int jmin = ( i == 0 ) ? 1 : i;
- for( int j = jmin;; )
- {
- // last cluster [j,count) is at the end
- Vec4 part2 = m_xsum_wsum - part1 - part0;
-
- // compute least squares terms directly
- Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
- Vec4 alpha2_sum = alphax_sum.SplatW();
-
- Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
- Vec4 beta2_sum = betax_sum.SplatW();
-
- Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
-
- // compute the least-squares optimal points
- Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
- Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
- Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
-
- // clamp to the grid
- a = Min( one, Max( zero, a ) );
- b = Min( one, Max( zero, b ) );
- a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
- b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
-
- // compute the error (we skip the constant xxsum)
- Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
- Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
- Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
- Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
- // apply the metric to the error term
- Vec4 e5 = e4*m_metric;
- Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
-
- // keep the solution if it wins
- if( CompareAnyLessThan( error, besterror ) )
- {
- beststart = a;
- bestend = b;
- besti = i;
- bestj = j;
- besterror = error;
- bestiteration = iterationIndex;
- }
-
- // advance
- if( j == count )
- break;
- part1 += m_points_weights[j];
- ++j;
- }
-
- // advance
- part0 += m_points_weights[i];
- }
-
- // stop if we didn't improve in this iteration
- if( bestiteration != iterationIndex )
- break;
-
- // advance if possible
- ++iterationIndex;
- if( iterationIndex == m_iterationCount )
- break;
-
- // stop if a new iteration is an ordering that has already been tried
- Vec3 axis = ( bestend - beststart ).GetVec3();
- if( !ConstructOrdering( axis, iterationIndex ) )
- break;
- }
-
- // save the block if necessary
- if( CompareAnyLessThan( besterror, m_besterror ) )
- {
- // remap the indices
- u8 const* order = ( u8* )m_order + 16*bestiteration;
-
- u8 unordered[16];
- for( int m = 0; m < besti; ++m )
- unordered[order[m]] = 0;
- for( int m = besti; m < bestj; ++m )
- unordered[order[m]] = 2;
- for( int m = bestj; m < count; ++m )
- unordered[order[m]] = 1;
-
- m_colours->RemapIndices( unordered, bestindices );
-
- // save the block
- WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
-
- // save the error
- m_besterror = besterror;
- }
+ // declare variables
+ int const count = m_colours->GetCount();
+ Vec4 const two = VEC4_CONST( 2.0 );
+ Vec4 const one = VEC4_CONST( 1.0f );
+ Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
+ Vec4 const zero = VEC4_CONST( 0.0f );
+ Vec4 const half = VEC4_CONST( 0.5f );
+ Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
+ Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
+
+ // prepare an ordering using the principle axis
+ ConstructOrdering( m_principle, 0 );
+
+ // check all possible clusters and iterate on the total order
+ Vec4 beststart = VEC4_CONST( 0.0f );
+ Vec4 bestend = VEC4_CONST( 0.0f );
+ Vec4 besterror = m_besterror;
+ u8 bestindices[16];
+ int bestiteration = 0;
+ int besti = 0, bestj = 0;
+
+ // loop over iterations (we avoid the case that all points in first or last cluster)
+ for( int iterationIndex = 0;; )
+ {
+ // first cluster [0,i) is at the start
+ Vec4 part0 = VEC4_CONST( 0.0f );
+ for( int i = 0; i < count; ++i )
+ {
+ // second cluster [i,j) is half along
+ Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
+ int jmin = ( i == 0 ) ? 1 : i;
+ for( int j = jmin;; )
+ {
+ // last cluster [j,count) is at the end
+ Vec4 part2 = m_xsum_wsum - part1 - part0;
+
+ // compute least squares terms directly
+ Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
+ Vec4 alpha2_sum = alphax_sum.SplatW();
+
+ Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
+ Vec4 beta2_sum = betax_sum.SplatW();
+
+ Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
+
+ // compute the least-squares optimal points
+ Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
+ Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
+ Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
+
+ // clamp to the grid
+ a = Min( one, Max( zero, a ) );
+ b = Min( one, Max( zero, b ) );
+ a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
+ b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
+
+ // compute the error (we skip the constant xxsum)
+ Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+ Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
+ Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
+ Vec4 e4 = MultiplyAdd( two, e3, e1 );
+
+ // apply the metric to the error term
+ Vec4 e5 = e4*m_metric;
+ Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
+
+ // keep the solution if it wins
+ if( CompareAnyLessThan( error, besterror ) )
+ {
+ beststart = a;
+ bestend = b;
+ besti = i;
+ bestj = j;
+ besterror = error;
+ bestiteration = iterationIndex;
+ }
+
+ // advance
+ if( j == count )
+ break;
+ part1 += m_points_weights[j];
+ ++j;
+ }
+
+ // advance
+ part0 += m_points_weights[i];
+ }
+
+ // stop if we didn't improve in this iteration
+ if( bestiteration != iterationIndex )
+ break;
+
+ // advance if possible
+ ++iterationIndex;
+ if( iterationIndex == m_iterationCount )
+ break;
+
+ // stop if a new iteration is an ordering that has already been tried
+ Vec3 axis = ( bestend - beststart ).GetVec3();
+ if( !ConstructOrdering( axis, iterationIndex ) )
+ break;
+ }
+
+ // save the block if necessary
+ if( CompareAnyLessThan( besterror, m_besterror ) )
+ {
+ // remap the indices
+ u8 const* order = ( u8* )m_order + 16*bestiteration;
+
+ u8 unordered[16];
+ for( int m = 0; m < besti; ++m )
+ unordered[order[m]] = 0;
+ for( int m = besti; m < bestj; ++m )
+ unordered[order[m]] = 2;
+ for( int m = bestj; m < count; ++m )
+ unordered[order[m]] = 1;
+
+ m_colours->RemapIndices( unordered, bestindices );
+
+ // save the block
+ WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
+
+ // save the error
+ m_besterror = besterror;
+ }
}
void ClusterFit::Compress4( void* block )
{
- // declare variables
- int const count = m_colours->GetCount();
- Vec4 const two = VEC4_CONST( 2.0f );
- Vec4 const one = VEC4_CONST( 1.0f );
- Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
- Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
- Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
- Vec4 const zero = VEC4_CONST( 0.0f );
- Vec4 const half = VEC4_CONST( 0.5f );
- Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
- Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
-
- // prepare an ordering using the principle axis
- ConstructOrdering( m_principle, 0 );
-
- // check all possible clusters and iterate on the total order
- Vec4 beststart = VEC4_CONST( 0.0f );
- Vec4 bestend = VEC4_CONST( 0.0f );
- Vec4 besterror = m_besterror;
- u8 bestindices[16];
- int bestiteration = 0;
- int besti = 0, bestj = 0, bestk = 0;
-
- // loop over iterations (we avoid the case that all points in first or last cluster)
- for( int iterationIndex = 0;; )
- {
- // first cluster [0,i) is at the start
- Vec4 part0 = VEC4_CONST( 0.0f );
- for( int i = 0; i < count; ++i )
- {
- // second cluster [i,j) is one third along
- Vec4 part1 = VEC4_CONST( 0.0f );
- for( int j = i;; )
- {
- // third cluster [j,k) is two thirds along
- Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
- int kmin = ( j == 0 ) ? 1 : j;
- for( int k = kmin;; )
- {
- // last cluster [k,count) is at the end
- Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
-
- // compute least squares terms directly
- Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
- Vec4 const alpha2_sum = alphax_sum.SplatW();
-
- Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
- Vec4 const beta2_sum = betax_sum.SplatW();
-
- Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
-
- // compute the least-squares optimal points
- Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
- Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
- Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
-
- // clamp to the grid
- a = Min( one, Max( zero, a ) );
- b = Min( one, Max( zero, b ) );
- a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
- b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
-
- // compute the error (we skip the constant xxsum)
- Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
- Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
- Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
- Vec4 e4 = MultiplyAdd( two, e3, e1 );
-
- // apply the metric to the error term
- Vec4 e5 = e4*m_metric;
- Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
-
- // keep the solution if it wins
- if( CompareAnyLessThan( error, besterror ) )
- {
- beststart = a;
- bestend = b;
- besterror = error;
- besti = i;
- bestj = j;
- bestk = k;
- bestiteration = iterationIndex;
- }
-
- // advance
- if( k == count )
- break;
- part2 += m_points_weights[k];
- ++k;
- }
-
- // advance
- if( j == count )
- break;
- part1 += m_points_weights[j];
- ++j;
- }
-
- // advance
- part0 += m_points_weights[i];
- }
-
- // stop if we didn't improve in this iteration
- if( bestiteration != iterationIndex )
- break;
-
- // advance if possible
- ++iterationIndex;
- if( iterationIndex == m_iterationCount )
- break;
-
- // stop if a new iteration is an ordering that has already been tried
- Vec3 axis = ( bestend - beststart ).GetVec3();
- if( !ConstructOrdering( axis, iterationIndex ) )
- break;
- }
-
- // save the block if necessary
- if( CompareAnyLessThan( besterror, m_besterror ) )
- {
- // remap the indices
- u8 const* order = ( u8* )m_order + 16*bestiteration;
-
- u8 unordered[16];
- for( int m = 0; m < besti; ++m )
- unordered[order[m]] = 0;
- for( int m = besti; m < bestj; ++m )
- unordered[order[m]] = 2;
- for( int m = bestj; m < bestk; ++m )
- unordered[order[m]] = 3;
- for( int m = bestk; m < count; ++m )
- unordered[order[m]] = 1;
-
- m_colours->RemapIndices( unordered, bestindices );
-
- // save the block
- WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
-
- // save the error
- m_besterror = besterror;
- }
+ // declare variables
+ int const count = m_colours->GetCount();
+ Vec4 const two = VEC4_CONST( 2.0f );
+ Vec4 const one = VEC4_CONST( 1.0f );
+ Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
+ Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
+ Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
+ Vec4 const zero = VEC4_CONST( 0.0f );
+ Vec4 const half = VEC4_CONST( 0.5f );
+ Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
+ Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
+
+ // prepare an ordering using the principle axis
+ ConstructOrdering( m_principle, 0 );
+
+ // check all possible clusters and iterate on the total order
+ Vec4 beststart = VEC4_CONST( 0.0f );
+ Vec4 bestend = VEC4_CONST( 0.0f );
+ Vec4 besterror = m_besterror;
+ u8 bestindices[16];
+ int bestiteration = 0;
+ int besti = 0, bestj = 0, bestk = 0;
+
+ // loop over iterations (we avoid the case that all points in first or last cluster)
+ for( int iterationIndex = 0;; )
+ {
+ // first cluster [0,i) is at the start
+ Vec4 part0 = VEC4_CONST( 0.0f );
+ for( int i = 0; i < count; ++i )
+ {
+ // second cluster [i,j) is one third along
+ Vec4 part1 = VEC4_CONST( 0.0f );
+ for( int j = i;; )
+ {
+ // third cluster [j,k) is two thirds along
+ Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
+ int kmin = ( j == 0 ) ? 1 : j;
+ for( int k = kmin;; )
+ {
+ // last cluster [k,count) is at the end
+ Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
+
+ // compute least squares terms directly
+ Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
+ Vec4 const alpha2_sum = alphax_sum.SplatW();
+
+ Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
+ Vec4 const beta2_sum = betax_sum.SplatW();
+
+ Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
+
+ // compute the least-squares optimal points
+ Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
+ Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
+ Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
+
+ // clamp to the grid
+ a = Min( one, Max( zero, a ) );
+ b = Min( one, Max( zero, b ) );
+ a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
+ b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
+
+ // compute the error (we skip the constant xxsum)
+ Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
+ Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
+ Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
+ Vec4 e4 = MultiplyAdd( two, e3, e1 );
+
+ // apply the metric to the error term
+ Vec4 e5 = e4*m_metric;
+ Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
+
+ // keep the solution if it wins
+ if( CompareAnyLessThan( error, besterror ) )
+ {
+ beststart = a;
+ bestend = b;
+ besterror = error;
+ besti = i;
+ bestj = j;
+ bestk = k;
+ bestiteration = iterationIndex;
+ }
+
+ // advance
+ if( k == count )
+ break;
+ part2 += m_points_weights[k];
+ ++k;
+ }
+
+ // advance
+ if( j == count )
+ break;
+ part1 += m_points_weights[j];
+ ++j;
+ }
+
+ // advance
+ part0 += m_points_weights[i];
+ }
+
+ // stop if we didn't improve in this iteration
+ if( bestiteration != iterationIndex )
+ break;
+
+ // advance if possible
+ ++iterationIndex;
+ if( iterationIndex == m_iterationCount )
+ break;
+
+ // stop if a new iteration is an ordering that has already been tried
+ Vec3 axis = ( bestend - beststart ).GetVec3();
+ if( !ConstructOrdering( axis, iterationIndex ) )
+ break;
+ }
+
+ // save the block if necessary
+ if( CompareAnyLessThan( besterror, m_besterror ) )
+ {
+ // remap the indices
+ u8 const* order = ( u8* )m_order + 16*bestiteration;
+
+ u8 unordered[16];
+ for( int m = 0; m < besti; ++m )
+ unordered[order[m]] = 0;
+ for( int m = besti; m < bestj; ++m )
+ unordered[order[m]] = 2;
+ for( int m = bestj; m < bestk; ++m )
+ unordered[order[m]] = 3;
+ for( int m = bestk; m < count; ++m )
+ unordered[order[m]] = 1;
+
+ m_colours->RemapIndices( unordered, bestindices );
+
+ // save the block
+ WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
+
+ // save the error
+ m_besterror = besterror;
+ }
}
} // namespace squish