summaryrefslogtreecommitdiff
path: root/thirdparty/squish/squish.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/squish/squish.cpp')
-rw-r--r--thirdparty/squish/squish.cpp560
1 files changed, 358 insertions, 202 deletions
diff --git a/thirdparty/squish/squish.cpp b/thirdparty/squish/squish.cpp
index bbe89bfcfe..d3cbabbafd 100644
--- a/thirdparty/squish/squish.cpp
+++ b/thirdparty/squish/squish.cpp
@@ -1,29 +1,30 @@
/* -----------------------------------------------------------------------------
- Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
+ Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
-------------------------------------------------------------------------- */
-
-#include <squish.h>
+
+#include <string.h>
+#include "squish.h"
#include "colourset.h"
#include "maths.h"
#include "rangefit.h"
@@ -36,204 +37,359 @@ namespace squish {
static int FixFlags( int flags )
{
- // grab the flag bits
- int method = flags & ( kDxt1 | kDxt3 | kDxt5 );
- int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
- int metric = flags & ( kColourMetricPerceptual | kColourMetricUniform );
- int extra = flags & kWeightColourByAlpha;
-
- // set defaults
- if( method != kDxt3 && method != kDxt5 )
- method = kDxt1;
- if( fit != kColourRangeFit )
- fit = kColourClusterFit;
- if( metric != kColourMetricUniform )
- metric = kColourMetricPerceptual;
-
- // done
- return method | fit | metric | extra;
-}
+ // grab the flag bits
+ int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
+ int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
+ int extra = flags & kWeightColourByAlpha;
-void Compress( u8 const* rgba, void* block, int flags )
-{
- // compress with full mask
- CompressMasked( rgba, 0xffff, block, flags );
+ // set defaults
+ if ( method != kDxt3
+ && method != kDxt5
+ && method != kBc4
+ && method != kBc5 )
+ {
+ method = kDxt1;
+ }
+ if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
+ fit = kColourClusterFit;
+
+ // done
+ return method | fit | extra;
}
-void CompressMasked( u8 const* rgba, int mask, void* block, int flags )
+void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
{
- // fix any bad flags
- flags = FixFlags( flags );
-
- // get the block locations
- void* colourBlock = block;
- void* alphaBock = block;
- if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
- colourBlock = reinterpret_cast< u8* >( block ) + 8;
-
- // create the minimal point set
- ColourSet colours( rgba, mask, flags );
-
- // check the compression type and compress colour
- if( colours.GetCount() == 1 )
- {
- // always do a single colour fit
- SingleColourFit fit( &colours, flags );
- fit.Compress( colourBlock );
- }
- else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
- {
- // do a range fit
- RangeFit fit( &colours, flags );
- fit.Compress( colourBlock );
- }
- else
- {
- // default to a cluster fit (could be iterative or not)
- ClusterFit fit( &colours, flags );
- fit.Compress( colourBlock );
- }
-
- // compress alpha separately if necessary
- if( ( flags & kDxt3 ) != 0 )
- CompressAlphaDxt3( rgba, mask, alphaBock );
- else if( ( flags & kDxt5 ) != 0 )
- CompressAlphaDxt5( rgba, mask, alphaBock );
+ // fix any bad flags
+ flags = FixFlags( flags );
+
+ if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
+ {
+ u8 alpha[16*4];
+ for( int i = 0; i < 16; ++i )
+ {
+ alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
+ }
+
+ u8* rBlock = reinterpret_cast< u8* >( block );
+ CompressAlphaDxt5( alpha, mask, rBlock );
+
+ if ( ( flags & ( kBc5 ) ) != 0 )
+ {
+ for( int i = 0; i < 16; ++i )
+ {
+ alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
+ }
+
+ u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
+ CompressAlphaDxt5( alpha, mask, gBlock );
+ }
+
+ return;
+ }
+
+ // get the block locations
+ void* colourBlock = block;
+ void* alphaBlock = block;
+ if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
+ colourBlock = reinterpret_cast< u8* >( block ) + 8;
+
+ // create the minimal point set
+ ColourSet colours( rgba, mask, flags );
+
+ // check the compression type and compress colour
+ if( colours.GetCount() == 1 )
+ {
+ // always do a single colour fit
+ SingleColourFit fit( &colours, flags );
+ fit.Compress( colourBlock );
+ }
+ else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
+ {
+ // do a range fit
+ RangeFit fit( &colours, flags, metric );
+ fit.Compress( colourBlock );
+ }
+ else
+ {
+ // default to a cluster fit (could be iterative or not)
+ ClusterFit fit( &colours, flags, metric );
+ fit.Compress( colourBlock );
+ }
+
+ // compress alpha separately if necessary
+ if( ( flags & kDxt3 ) != 0 )
+ CompressAlphaDxt3( rgba, mask, alphaBlock );
+ else if( ( flags & kDxt5 ) != 0 )
+ CompressAlphaDxt5( rgba, mask, alphaBlock );
}
void Decompress( u8* rgba, void const* block, int flags )
{
- // fix any bad flags
- flags = FixFlags( flags );
-
- // get the block locations
- void const* colourBlock = block;
- void const* alphaBock = block;
- if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
- colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
-
- // decompress colour
- DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
-
- // decompress alpha separately if necessary
- if( ( flags & kDxt3 ) != 0 )
- DecompressAlphaDxt3( rgba, alphaBock );
- else if( ( flags & kDxt5 ) != 0 )
- DecompressAlphaDxt5( rgba, alphaBock );
+ // fix any bad flags
+ flags = FixFlags( flags );
+
+ // get the block locations
+ void const* colourBlock = block;
+ void const* alphaBlock = block;
+ if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
+ colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
+
+ // decompress colour
+ DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
+
+ // decompress alpha separately if necessary
+ if( ( flags & kDxt3 ) != 0 )
+ DecompressAlphaDxt3( rgba, alphaBlock );
+ else if( ( flags & kDxt5 ) != 0 )
+ DecompressAlphaDxt5( rgba, alphaBlock );
}
int GetStorageRequirements( int width, int height, int flags )
{
- // fix any bad flags
- flags = FixFlags( flags );
-
- // compute the storage requirements
- int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
- int blocksize = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
- return blockcount*blocksize;
+ // fix any bad flags
+ flags = FixFlags( flags );
+
+ // compute the storage requirements
+ int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
+ int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+ return blockcount*blocksize;
+}
+
+void CopyRGBA( u8 const* source, u8* dest, int flags )
+{
+ if (flags & kSourceBGRA)
+ {
+ // convert from bgra to rgba
+ dest[0] = source[2];
+ dest[1] = source[1];
+ dest[2] = source[0];
+ dest[3] = source[3];
+ }
+ else
+ {
+ for( int i = 0; i < 4; ++i )
+ *dest++ = *source++;
+ }
+}
+
+void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
+{
+ // fix any bad flags
+ flags = FixFlags( flags );
+
+ // initialise the block output
+ u8* targetBlock = reinterpret_cast< u8* >( blocks );
+ int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+
+ // loop over blocks
+ for( int y = 0; y < height; y += 4 )
+ {
+ for( int x = 0; x < width; x += 4 )
+ {
+ // build the 4x4 block of pixels
+ u8 sourceRgba[16*4];
+ u8* targetPixel = sourceRgba;
+ int mask = 0;
+ for( int py = 0; py < 4; ++py )
+ {
+ for( int px = 0; px < 4; ++px )
+ {
+ // get the source pixel in the image
+ int sx = x + px;
+ int sy = y + py;
+
+ // enable if we're in the image
+ if( sx < width && sy < height )
+ {
+ // copy the rgba value
+ u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
+ CopyRGBA(sourcePixel, targetPixel, flags);
+ // enable this pixel
+ mask |= ( 1 << ( 4*py + px ) );
+ }
+
+ // advance to the next pixel
+ targetPixel += 4;
+ }
+ }
+
+ // compress it into the output
+ CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
+
+ // advance
+ targetBlock += bytesPerBlock;
+ }
+ }
+}
+
+void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
+{
+ CompressImage(rgba, width, height, width*4, blocks, flags, metric);
}
-void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags )
+void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
{
- // fix any bad flags
- flags = FixFlags( flags );
-
- // initialise the block output
- u8* targetBlock = reinterpret_cast< u8* >( blocks );
- int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
-
- // loop over blocks
- for( int y = 0; y < height; y += 4 )
- {
- for( int x = 0; x < width; x += 4 )
- {
- // build the 4x4 block of pixels
- u8 sourceRgba[16*4];
- u8* targetPixel = sourceRgba;
- int mask = 0;
- for( int py = 0; py < 4; ++py )
- {
- for( int px = 0; px < 4; ++px )
- {
- // get the source pixel in the image
- int sx = x + px;
- int sy = y + py;
-
- // enable if we're in the image
- if( sx < width && sy < height )
- {
- // copy the rgba value
- u8 const* sourcePixel = rgba + 4*( width*sy + sx );
- for( int i = 0; i < 4; ++i )
- *targetPixel++ = *sourcePixel++;
-
- // enable this pixel
- mask |= ( 1 << ( 4*py + px ) );
- }
- else
- {
- // skip this pixel as its outside the image
- targetPixel += 4;
- }
- }
- }
-
- // compress it into the output
- CompressMasked( sourceRgba, mask, targetBlock, flags );
-
- // advance
- targetBlock += bytesPerBlock;
- }
- }
+ // fix any bad flags
+ flags = FixFlags( flags );
+
+ // initialise the block input
+ u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
+ int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
+
+ // loop over blocks
+ for( int y = 0; y < height; y += 4 )
+ {
+ for( int x = 0; x < width; x += 4 )
+ {
+ // decompress the block
+ u8 targetRgba[4*16];
+ Decompress( targetRgba, sourceBlock, flags );
+
+ // write the decompressed pixels to the correct image locations
+ u8 const* sourcePixel = targetRgba;
+ for( int py = 0; py < 4; ++py )
+ {
+ for( int px = 0; px < 4; ++px )
+ {
+ // get the target location
+ int sx = x + px;
+ int sy = y + py;
+
+ // write if we're in the image
+ if( sx < width && sy < height )
+ {
+ // copy the rgba value
+ u8* targetPixel = rgba + pitch*sy + 4*sx;
+ CopyRGBA(sourcePixel, targetPixel, flags);
+ }
+
+ // advance to the next pixel
+ sourcePixel += 4;
+ }
+ }
+
+ // advance
+ sourceBlock += bytesPerBlock;
+ }
+ }
}
void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
{
- // fix any bad flags
- flags = FixFlags( flags );
-
- // initialise the block input
- u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
- int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
-
- // loop over blocks
- for( int y = 0; y < height; y += 4 )
- {
- for( int x = 0; x < width; x += 4 )
- {
- // decompress the block
- u8 targetRgba[4*16];
- Decompress( targetRgba, sourceBlock, flags );
-
- // write the decompressed pixels to the correct image locations
- u8 const* sourcePixel = targetRgba;
- for( int py = 0; py < 4; ++py )
- {
- for( int px = 0; px < 4; ++px )
- {
- // get the target location
- int sx = x + px;
- int sy = y + py;
- if( sx < width && sy < height )
- {
- u8* targetPixel = rgba + 4*( width*sy + sx );
-
- // copy the rgba value
- for( int i = 0; i < 4; ++i )
- *targetPixel++ = *sourcePixel++;
- }
- else
- {
- // skip this pixel as its outside the image
- sourcePixel += 4;
- }
- }
- }
-
- // advance
- sourceBlock += bytesPerBlock;
- }
- }
+ DecompressImage( rgba, width, height, width*4, blocks, flags );
+}
+
+static double ErrorSq(double x, double y)
+{
+ return (x - y) * (x - y);
+}
+
+static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
+{
+ // Computes the MSE for the block and weights it by the variance of the original block.
+ // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
+ // then the block is close to being a single colour. Quantisation errors in single colour blocks
+ // are easier to see than similar errors in blocks that contain more colours, particularly when there
+ // are many such blocks in a large area (eg a blue sky background) as they cause banding. Given that
+ // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
+ // of 5. This implies that images with large, single colour areas will have a higher potential WMSE
+ // than images with lots of detail.
+
+ cmse = amse = 0;
+ unsigned int sum_p[4]; // per channel sum of pixels
+ unsigned int sum_p2[4]; // per channel sum of pixels squared
+ memset(sum_p, 0, sizeof(sum_p));
+ memset(sum_p2, 0, sizeof(sum_p2));
+ for( unsigned int py = 0; py < 4; ++py )
+ {
+ for( unsigned int px = 0; px < 4; ++px )
+ {
+ if( px < w && py < h )
+ {
+ double pixelCMSE = 0;
+ for( int i = 0; i < 3; ++i )
+ {
+ pixelCMSE += ErrorSq(original[i], compressed[i]);
+ sum_p[i] += original[i];
+ sum_p2[i] += (unsigned int)original[i]*original[i];
+ }
+ if( original[3] == 0 && compressed[3] == 0 )
+ pixelCMSE = 0; // transparent in both, so colour is inconsequential
+ amse += ErrorSq(original[3], compressed[3]);
+ cmse += pixelCMSE;
+ sum_p[3] += original[3];
+ sum_p2[3] += (unsigned int)original[3]*original[3];
+ }
+ original += 4;
+ compressed += 4;
+ }
+ }
+ unsigned int variance = 0;
+ for( int i = 0; i < 4; ++i )
+ variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
+ if( variance < 4 * w * w * h * h )
+ {
+ amse *= 5;
+ cmse *= 5;
+ }
+}
+
+void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
+{
+ // fix any bad flags
+ flags = FixFlags( flags );
+ colourMSE = alphaMSE = 0;
+
+ // initialise the block input
+ squish::u8 const* sourceBlock = dxt;
+ int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
+
+ // loop over blocks
+ for( int y = 0; y < height; y += 4 )
+ {
+ for( int x = 0; x < width; x += 4 )
+ {
+ // decompress the block
+ u8 targetRgba[4*16];
+ Decompress( targetRgba, sourceBlock, flags );
+ u8 const* sourcePixel = targetRgba;
+
+ // copy across to a similar pixel block
+ u8 originalRgba[4*16];
+ u8* originalPixel = originalRgba;
+
+ for( int py = 0; py < 4; ++py )
+ {
+ for( int px = 0; px < 4; ++px )
+ {
+ int sx = x + px;
+ int sy = y + py;
+ if( sx < width && sy < height )
+ {
+ u8 const* targetPixel = rgba + pitch*sy + 4*sx;
+ CopyRGBA(targetPixel, originalPixel, flags);
+ }
+ sourcePixel += 4;
+ originalPixel += 4;
+ }
+ }
+
+ // compute the weighted MSE of the block
+ double blockCMSE, blockAMSE;
+ ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
+ colourMSE += blockCMSE;
+ alphaMSE += blockAMSE;
+ // advance
+ sourceBlock += bytesPerBlock;
+ }
+ }
+ colourMSE /= (width * height * 3);
+ alphaMSE /= (width * height);
+}
+
+void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
+{
+ ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
}
} // namespace squish