1 files changed, 358 insertions, 202 deletions
diff --git a/thirdparty/squish/squish.cpp b/thirdparty/squish/squish.cpp
index bbe89bfcfe..d3cbabbafd 100644
--- a/thirdparty/squish/squish.cpp
+++ b/thirdparty/squish/squish.cpp
@@ -1,29 +1,30 @@
 /* -----------------------------------------------------------------------------
 
-	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
-
-	Permission is hereby granted, free of charge, to any person obtaining
-	a copy of this software and associated documentation files (the 
-	"Software"), to	deal in the Software without restriction, including
-	without limitation the rights to use, copy, modify, merge, publish,
-	distribute, sublicense, and/or sell copies of the Software, and to 
-	permit persons to whom the Software is furnished to do so, subject to 
-	the following conditions:
-
-	The above copyright notice and this permission notice shall be included
-	in all copies or substantial portions of the Software.
-
-	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
+    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
    -------------------------------------------------------------------------- */
-   
-#include <squish.h>
+
+#include <string.h>
+#include "squish.h"
 #include "colourset.h"
 #include "maths.h"
 #include "rangefit.h"
@@ -36,204 +37,359 @@ namespace squish {
 
 static int FixFlags( int flags )
 {
-	// grab the flag bits
-	int method = flags & ( kDxt1 | kDxt3 | kDxt5 );
-	int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
-	int metric = flags & ( kColourMetricPerceptual | kColourMetricUniform );
-	int extra = flags & kWeightColourByAlpha;
-	
-	// set defaults
-	if( method != kDxt3 && method != kDxt5 )
-		method = kDxt1;
-	if( fit != kColourRangeFit )
-		fit = kColourClusterFit;
-	if( metric != kColourMetricUniform )
-		metric = kColourMetricPerceptual;
-		
-	// done
-	return method | fit | metric | extra;
-}
+    // grab the flag bits
+    int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
+    int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
+    int extra = flags & kWeightColourByAlpha;
 
-void Compress( u8 const* rgba, void* block, int flags )
-{
-	// compress with full mask
-	CompressMasked( rgba, 0xffff, block, flags );
+    // set defaults
+    if ( method != kDxt3
+    &&   method != kDxt5
+    &&   method != kBc4
+    &&   method != kBc5 )
+    {
+        method = kDxt1;
+    }
+    if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
+        fit = kColourClusterFit;
+
+    // done
+    return method | fit | extra;
 }
 
-void CompressMasked( u8 const* rgba, int mask, void* block, int flags )
+void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
 {
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	// get the block locations
-	void* colourBlock = block;
-	void* alphaBock = block;
-	if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
-		colourBlock = reinterpret_cast< u8* >( block ) + 8;
-
-	// create the minimal point set
-	ColourSet colours( rgba, mask, flags );
-	
-	// check the compression type and compress colour
-	if( colours.GetCount() == 1 )
-	{
-		// always do a single colour fit
-		SingleColourFit fit( &colours, flags );
-		fit.Compress( colourBlock );
-	}
-	else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
-	{
-		// do a range fit
-		RangeFit fit( &colours, flags );
-		fit.Compress( colourBlock );
-	}
-	else
-	{
-		// default to a cluster fit (could be iterative or not)
-		ClusterFit fit( &colours, flags );
-		fit.Compress( colourBlock );
-	}
-	
-	// compress alpha separately if necessary
-	if( ( flags & kDxt3 ) != 0 )
-		CompressAlphaDxt3( rgba, mask, alphaBock );
-	else if( ( flags & kDxt5 ) != 0 )
-		CompressAlphaDxt5( rgba, mask, alphaBock );
+    // fix any bad flags
+    flags = FixFlags( flags );
+
+    if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
+    {
+        u8 alpha[16*4];
+        for( int i = 0; i < 16; ++i )
+        {
+            alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
+        }
+
+        u8* rBlock = reinterpret_cast< u8* >( block );
+        CompressAlphaDxt5( alpha, mask, rBlock );
+
+        if ( ( flags & ( kBc5 ) ) != 0 )
+        {
+            for( int i = 0; i < 16; ++i )
+            {
+                alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
+            }
+
+            u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
+            CompressAlphaDxt5( alpha, mask, gBlock );
+        }
+
+        return;
+    }
+
+    // get the block locations
+    void* colourBlock = block;
+    void* alphaBlock = block;
+    if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
+        colourBlock = reinterpret_cast< u8* >( block ) + 8;
+
+    // create the minimal point set
+    ColourSet colours( rgba, mask, flags );
+
+    // check the compression type and compress colour
+    if( colours.GetCount() == 1 )
+    {
+        // always do a single colour fit
+        SingleColourFit fit( &colours, flags );
+        fit.Compress( colourBlock );
+    }
+    else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
+    {
+        // do a range fit
+        RangeFit fit( &colours, flags, metric );
+        fit.Compress( colourBlock );
+    }
+    else
+    {
+        // default to a cluster fit (could be iterative or not)
+        ClusterFit fit( &colours, flags, metric );
+        fit.Compress( colourBlock );
+    }
+
+    // compress alpha separately if necessary
+    if( ( flags & kDxt3 ) != 0 )
+        CompressAlphaDxt3( rgba, mask, alphaBlock );
+    else if( ( flags & kDxt5 ) != 0 )
+        CompressAlphaDxt5( rgba, mask, alphaBlock );
 }
 
 void Decompress( u8* rgba, void const* block, int flags )
 {
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	// get the block locations
-	void const* colourBlock = block;
-	void const* alphaBock = block;
-	if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
-		colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
-
-	// decompress colour
-	DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
-
-	// decompress alpha separately if necessary
-	if( ( flags & kDxt3 ) != 0 )
-		DecompressAlphaDxt3( rgba, alphaBock );
-	else if( ( flags & kDxt5 ) != 0 )
-		DecompressAlphaDxt5( rgba, alphaBock );
+    // fix any bad flags
+    flags = FixFlags( flags );
+
+    // get the block locations
+    void const* colourBlock = block;
+    void const* alphaBlock = block;
+    if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
+        colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
+
+    // decompress colour
+    DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
+
+    // decompress alpha separately if necessary
+    if( ( flags & kDxt3 ) != 0 )
+        DecompressAlphaDxt3( rgba, alphaBlock );
+    else if( ( flags & kDxt5 ) != 0 )
+        DecompressAlphaDxt5( rgba, alphaBlock );
 }
 
 int GetStorageRequirements( int width, int height, int flags )
 {
-	// fix any bad flags
-	flags = FixFlags( flags );
-	
-	// compute the storage requirements
-	int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
-	int blocksize = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
-	return blockcount*blocksize;	
+    // fix any bad flags
+    flags = FixFlags( flags );
+
+    // compute the storage requirements
+    int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
+    int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+    return blockcount*blocksize;
+}
+
+void CopyRGBA( u8 const* source, u8* dest, int flags )
+{
+    if (flags & kSourceBGRA)
+    {
+        // convert from bgra to rgba
+        dest[0] = source[2];
+        dest[1] = source[1];
+        dest[2] = source[0];
+        dest[3] = source[3];
+    }
+    else
+    {
+        for( int i = 0; i < 4; ++i )
+            *dest++ = *source++;
+    }
+}
+
+void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
+{
+    // fix any bad flags
+    flags = FixFlags( flags );
+
+    // initialise the block output
+    u8* targetBlock = reinterpret_cast< u8* >( blocks );
+    int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+
+    // loop over blocks
+    for( int y = 0; y < height; y += 4 )
+    {
+        for( int x = 0; x < width; x += 4 )
+        {
+            // build the 4x4 block of pixels
+            u8 sourceRgba[16*4];
+            u8* targetPixel = sourceRgba;
+            int mask = 0;
+            for( int py = 0; py < 4; ++py )
+            {
+                for( int px = 0; px < 4; ++px )
+                {
+                    // get the source pixel in the image
+                    int sx = x + px;
+                    int sy = y + py;
+
+                    // enable if we're in the image
+                    if( sx < width && sy < height )
+                    {
+                        // copy the rgba value
+                        u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
+                        CopyRGBA(sourcePixel, targetPixel, flags);
+                        // enable this pixel
+                        mask |= ( 1 << ( 4*py + px ) );
+                    }
+
+                    // advance to the next pixel
+                    targetPixel += 4;
+                }
+            }
+
+            // compress it into the output
+            CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
+
+            // advance
+            targetBlock += bytesPerBlock;
+        }
+    }
+}
+
+void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
+{
+    CompressImage(rgba, width, height, width*4, blocks, flags, metric);
 }
 
-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags )
+void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
 {
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	// initialise the block output
-	u8* targetBlock = reinterpret_cast< u8* >( blocks );
-	int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
-
-	// loop over blocks
-	for( int y = 0; y < height; y += 4 )
-	{
-		for( int x = 0; x < width; x += 4 )
-		{
-			// build the 4x4 block of pixels
-			u8 sourceRgba[16*4];
-			u8* targetPixel = sourceRgba;
-			int mask = 0;
-			for( int py = 0; py < 4; ++py )
-			{
-				for( int px = 0; px < 4; ++px )
-				{
-					// get the source pixel in the image
-					int sx = x + px;
-					int sy = y + py;
-					
-					// enable if we're in the image
-					if( sx < width && sy < height )
-					{
-						// copy the rgba value
-						u8 const* sourcePixel = rgba + 4*( width*sy + sx );
-						for( int i = 0; i < 4; ++i )
-							*targetPixel++ = *sourcePixel++;
-							
-						// enable this pixel
-						mask |= ( 1 << ( 4*py + px ) );
-					}
-					else
-					{
-						// skip this pixel as its outside the image
-						targetPixel += 4;
-					}
-				}
-			}
-			
-			// compress it into the output
-			CompressMasked( sourceRgba, mask, targetBlock, flags );
-			
-			// advance
-			targetBlock += bytesPerBlock;
-		}
-	}
+    // fix any bad flags
+    flags = FixFlags( flags );
+
+    // initialise the block input
+    u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
+    int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+
+    // loop over blocks
+    for( int y = 0; y < height; y += 4 )
+    {
+        for( int x = 0; x < width; x += 4 )
+        {
+            // decompress the block
+            u8 targetRgba[4*16];
+            Decompress( targetRgba, sourceBlock, flags );
+
+            // write the decompressed pixels to the correct image locations
+            u8 const* sourcePixel = targetRgba;
+            for( int py = 0; py < 4; ++py )
+            {
+                for( int px = 0; px < 4; ++px )
+                {
+                    // get the target location
+                    int sx = x + px;
+                    int sy = y + py;
+
+                    // write if we're in the image
+                    if( sx < width && sy < height )
+                    {
+                        // copy the rgba value
+                        u8* targetPixel = rgba + pitch*sy + 4*sx;
+                        CopyRGBA(sourcePixel, targetPixel, flags);
+                    }
+
+                    // advance to the next pixel
+                    sourcePixel += 4;
+                }
+            }
+
+            // advance
+            sourceBlock += bytesPerBlock;
+        }
+    }
 }
 
 void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
 {
-	// fix any bad flags
-	flags = FixFlags( flags );
-
-	// initialise the block input
-	u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
-	int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
-
-	// loop over blocks
-	for( int y = 0; y < height; y += 4 )
-	{
-		for( int x = 0; x < width; x += 4 )
-		{
-			// decompress the block
-			u8 targetRgba[4*16];
-			Decompress( targetRgba, sourceBlock, flags );
-			
-			// write the decompressed pixels to the correct image locations
-			u8 const* sourcePixel = targetRgba;
-			for( int py = 0; py < 4; ++py )
-			{
-				for( int px = 0; px < 4; ++px )
-				{
-					// get the target location
-					int sx = x + px;
-					int sy = y + py;
-					if( sx < width && sy < height )
-					{
-						u8* targetPixel = rgba + 4*( width*sy + sx );
-						
-						// copy the rgba value
-						for( int i = 0; i < 4; ++i )
-							*targetPixel++ = *sourcePixel++;
-					}
-					else
-					{
-						// skip this pixel as its outside the image
-						sourcePixel += 4;
-					}
-				}
-			}
-			
-			// advance
-			sourceBlock += bytesPerBlock;
-		}
-	}
+    DecompressImage( rgba, width, height, width*4, blocks, flags );
+}
+
+static double ErrorSq(double x, double y)
+{
+    return (x - y) * (x - y);
+}
+
+static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
+{
+    // Computes the MSE for the block and weights it by the variance of the original block.
+    // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
+    // then the block is close to being a single colour. Quantisation errors in single colour blocks
+    // are easier to see than similar errors in blocks that contain more colours, particularly when there
+    // are many such blocks in a large area (eg a blue sky background) as they cause banding.  Given that
+    // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
+    // of 5. This implies that images with large, single colour areas will have a higher potential WMSE
+    // than images with lots of detail.
+
+    cmse = amse = 0;
+    unsigned int sum_p[4];  // per channel sum of pixels
+    unsigned int sum_p2[4]; // per channel sum of pixels squared
+    memset(sum_p, 0, sizeof(sum_p));
+    memset(sum_p2, 0, sizeof(sum_p2));
+    for( unsigned int py = 0; py < 4; ++py )
+    {
+        for( unsigned int px = 0; px < 4; ++px )
+        {
+            if( px < w && py < h )
+            {
+                double pixelCMSE = 0;
+                for( int i = 0; i < 3; ++i )
+                {
+                    pixelCMSE += ErrorSq(original[i], compressed[i]);
+                    sum_p[i] += original[i];
+                    sum_p2[i] += (unsigned int)original[i]*original[i];
+                }
+                if( original[3] == 0 && compressed[3] == 0 )
+                    pixelCMSE = 0; // transparent in both, so colour is inconsequential
+                amse += ErrorSq(original[3], compressed[3]);
+                cmse += pixelCMSE;
+                sum_p[3] += original[3];
+                sum_p2[3] += (unsigned int)original[3]*original[3];
+            }
+            original += 4;
+            compressed += 4;
+        }
+    }
+    unsigned int variance = 0;
+    for( int i = 0; i < 4; ++i )
+        variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
+    if( variance < 4 * w * w * h * h )
+    {
+        amse *= 5;
+        cmse *= 5;
+    }
+}
+
+void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
+{
+    // fix any bad flags
+    flags = FixFlags( flags );
+    colourMSE = alphaMSE = 0;
+
+    // initialise the block input
+    squish::u8 const* sourceBlock = dxt;
+    int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
+
+    // loop over blocks
+    for( int y = 0; y < height; y += 4 )
+    {
+        for( int x = 0; x < width; x += 4 )
+        {
+            // decompress the block
+            u8 targetRgba[4*16];
+            Decompress( targetRgba, sourceBlock, flags );
+            u8 const* sourcePixel = targetRgba;
+
+            // copy across to a similar pixel block
+            u8 originalRgba[4*16];
+            u8* originalPixel = originalRgba;
+
+            for( int py = 0; py < 4; ++py )
+            {
+                for( int px = 0; px < 4; ++px )
+                {
+                    int sx = x + px;
+                    int sy = y + py;
+                    if( sx < width && sy < height )
+                    {
+                        u8 const* targetPixel = rgba + pitch*sy + 4*sx;
+                        CopyRGBA(targetPixel, originalPixel, flags);
+                    }
+                    sourcePixel += 4;
+                    originalPixel += 4;
+                }
+            }
+
+            // compute the weighted MSE of the block
+            double blockCMSE, blockAMSE;
+            ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
+            colourMSE += blockCMSE;
+            alphaMSE += blockAMSE;
+            // advance
+            sourceBlock += bytesPerBlock;
+        }
+    }
+    colourMSE /= (width * height * 3);
+    alphaMSE /= (width * height);
+}
+
+void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
+{
+    ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
 }
 
 } // namespace squish