236 files changed, 19541 insertions, 10444 deletions
diff --git a/.github/workflows/macos_builds.yml b/.github/workflows/macos_builds.yml
index aede3f8d49..68623f2770 100644
--- a/.github/workflows/macos_builds.yml
+++ b/.github/workflows/macos_builds.yml
@@ -4,7 +4,7 @@ on: [push, pull_request]
 # Global Settings
 env:
   # Only used for the cache key. Increment version to force clean build.
-  GODOT_BASE_BRANCH: master-v2
+  GODOT_BASE_BRANCH: master-v3
   SCONSFLAGS: verbose=yes warnings=extra werror=yes module_text_server_fb_enabled=yes
 
 concurrency:
diff --git a/core/error/error_macros.cpp b/core/error/error_macros.cpp
index 928ddd3397..ceccd43259 100644
--- a/core/error/error_macros.cpp
+++ b/core/error/error_macros.cpp
@@ -118,3 +118,7 @@ void _err_print_index_error(const char *p_function, const char *p_file, int p_li
 void _err_print_index_error(const char *p_function, const char *p_file, int p_line, int64_t p_index, int64_t p_size, const char *p_index_str, const char *p_size_str, const String &p_message, bool p_editor_notify, bool p_fatal) {
 	_err_print_index_error(p_function, p_file, p_line, p_index, p_size, p_index_str, p_size_str, p_message.utf8().get_data(), p_fatal);
 }
+
+void _err_flush_stdout() {
+	fflush(stdout);
+}
diff --git a/core/error/error_macros.h b/core/error/error_macros.h
index 802d7f9ef4..7b032fb4cd 100644
--- a/core/error/error_macros.h
+++ b/core/error/error_macros.h
@@ -69,6 +69,7 @@ void _err_print_error(const char *p_function, const char *p_file, int p_line, co
 void _err_print_error(const char *p_function, const char *p_file, int p_line, const String &p_error, const String &p_message, bool p_editor_notify = false, ErrorHandlerType p_type = ERR_HANDLER_ERROR);
 void _err_print_index_error(const char *p_function, const char *p_file, int p_line, int64_t p_index, int64_t p_size, const char *p_index_str, const char *p_size_str, const char *p_message = "", bool p_editor_notify = false, bool fatal = false);
 void _err_print_index_error(const char *p_function, const char *p_file, int p_line, int64_t p_index, int64_t p_size, const char *p_index_str, const char *p_size_str, const String &p_message, bool p_editor_notify = false, bool fatal = false);
+void _err_flush_stdout();
 
 #ifdef __GNUC__
 //#define FUNCTION_STR __PRETTY_FUNCTION__ - too annoying
@@ -789,6 +790,7 @@ void _err_print_index_error(const char *p_function, const char *p_file, int p_li
 #define CRASH_NOW()                                                                           \
 	if (true) {                                                                               \
 		_err_print_error(FUNCTION_STR, __FILE__, __LINE__, "FATAL: Method/function failed."); \
+		_err_flush_stdout();                                                                  \
 		GENERATE_TRAP();                                                                      \
 	} else                                                                                    \
 		((void)0)
@@ -801,6 +803,7 @@ void _err_print_index_error(const char *p_function, const char *p_file, int p_li
 #define CRASH_NOW_MSG(m_msg)                                                                         \
 	if (true) {                                                                                      \
 		_err_print_error(FUNCTION_STR, __FILE__, __LINE__, "FATAL: Method/function failed.", m_msg); \
+		_err_flush_stdout();                                                                         \
 		GENERATE_TRAP();                                                                             \
 	} else                                                                                           \
 		((void)0)
diff --git a/core/io/file_access_network.cpp b/core/io/file_access_network.cpp
index 307004b1c2..cb38ac0928 100644
--- a/core/io/file_access_network.cpp
+++ b/core/io/file_access_network.cpp
@@ -487,7 +487,6 @@ FileAccessNetwork::~FileAccessNetwork() {
 
 	FileAccessNetworkClient *nc = FileAccessNetworkClient::singleton;
 	nc->lock_mutex();
-	id = nc->last_id++;
 	nc->accesses.erase(id);
 	nc->unlock_mutex();
 }
diff --git a/core/io/http_client_tcp.cpp b/core/io/http_client_tcp.cpp
index e61833ce7c..f920799677 100644
--- a/core/io/http_client_tcp.cpp
+++ b/core/io/http_client_tcp.cpp
@@ -614,7 +614,7 @@ PackedByteArray HTTPClientTCP::read_response_body_chunk() {
 					for (int i = 0; i < chunk.size() - 2; i++) {
 						char c = chunk[i];
 						int v = 0;
-						if (c >= '0' && c <= '9') {
+						if (is_digit(c)) {
 							v = c - '0';
 						} else if (c >= 'a' && c <= 'f') {
 							v = c - 'a' + 10;
diff --git a/core/io/image.cpp b/core/io/image.cpp
index 9df2b6835c..577fc59807 100644
--- a/core/io/image.cpp
+++ b/core/io/image.cpp
@@ -30,14 +30,17 @@
 
 #include "image.h"
 
+#include "core/error/error_list.h"
 #include "core/error/error_macros.h"
 #include "core/io/image_loader.h"
 #include "core/io/resource_loader.h"
 #include "core/math/math_funcs.h"
 #include "core/string/print_string.h"
 #include "core/templates/hash_map.h"
+#include "core/variant/dictionary.h"
 
 #include <stdio.h>
+#include <cmath>
 
 const char *Image::format_names[Image::FORMAT_MAX] = {
 	"Lum8", //luminance
@@ -2056,7 +2059,7 @@ void Image::create(const char **p_xpm) {
 						for (int i = 0; i < 6; i++) {
 							char v = line_ptr[i];
 
-							if (v >= '0' && v <= '9') {
+							if (is_digit(v)) {
 								v -= '0';
 							} else if (v >= 'A' && v <= 'F') {
 								v = (v - 'A') + 10;
@@ -3135,6 +3138,8 @@ void Image::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("rgbe_to_srgb"), &Image::rgbe_to_srgb);
 	ClassDB::bind_method(D_METHOD("bump_map_to_normal_map", "bump_scale"), &Image::bump_map_to_normal_map, DEFVAL(1.0));
 
+	ClassDB::bind_method(D_METHOD("compute_image_metrics", "compared_image", "use_luma"), &Image::compute_image_metrics);
+
 	ClassDB::bind_method(D_METHOD("blit_rect", "src", "src_rect", "dst"), &Image::blit_rect);
 	ClassDB::bind_method(D_METHOD("blit_rect_mask", "src", "mask", "src_rect", "dst"), &Image::blit_rect_mask);
 	ClassDB::bind_method(D_METHOD("blend_rect", "src", "src_rect", "dst"), &Image::blend_rect);
@@ -3620,3 +3625,128 @@ Ref<Resource> Image::duplicate(bool p_subresources) const {
 void Image::set_as_black() {
 	memset(data.ptrw(), 0, data.size());
 }
+
+Dictionary Image::compute_image_metrics(const Ref<Image> p_compared_image, bool p_luma_metric) {
+	// https://github.com/richgel999/bc7enc_rdo/blob/master/LICENSE
+	//
+	// This is free and unencumbered software released into the public domain.
+	// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+	// software, either in source code form or as a compiled binary, for any purpose,
+	// commercial or non - commercial, and by any means.
+	// In jurisdictions that recognize copyright laws, the author or authors of this
+	// software dedicate any and all copyright interest in the software to the public
+	// domain. We make this dedication for the benefit of the public at large and to
+	// the detriment of our heirs and successors. We intend this dedication to be an
+	// overt act of relinquishment in perpetuity of all present and future rights to
+	// this software under copyright law.
+	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+	// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+	// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+	// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+	Dictionary result;
+	result["max"] = INFINITY;
+	result["mean"] = INFINITY;
+	result["mean_squared"] = INFINITY;
+	result["root_mean_squared"] = INFINITY;
+	result["peak_snr"] = 0.0f;
+
+	ERR_FAIL_NULL_V(p_compared_image, result);
+	Error err = OK;
+	Ref<Image> compared_image = duplicate(true);
+	if (compared_image->is_compressed()) {
+		err = compared_image->decompress();
+	}
+	ERR_FAIL_COND_V(err != OK, result);
+	Ref<Image> source_image = p_compared_image->duplicate(true);
+	if (source_image->is_compressed()) {
+		err = source_image->decompress();
+	}
+	ERR_FAIL_COND_V(err != OK, result);
+
+	ERR_FAIL_COND_V(err != OK, result);
+
+	ERR_FAIL_COND_V_MSG((compared_image->get_format() >= Image::FORMAT_RH) && (compared_image->get_format() <= Image::FORMAT_RGBE9995), result, "Metrics on HDR images are not supported.");
+	ERR_FAIL_COND_V_MSG((source_image->get_format() >= Image::FORMAT_RH) && (source_image->get_format() <= Image::FORMAT_RGBE9995), result, "Metrics on HDR images are not supported.");
+
+	double image_metric_max, image_metric_mean, image_metric_mean_squared, image_metric_root_mean_squared, image_metric_peak_snr = 0.0;
+	const bool average_component_error = true;
+
+	const uint32_t width = MIN(compared_image->get_width(), source_image->get_width());
+	const uint32_t height = MIN(compared_image->get_height(), source_image->get_height());
+
+	// Histogram approach originally due to Charles Bloom.
+	double hist[256];
+	memset(hist, 0, sizeof(hist));
+
+	for (uint32_t y = 0; y < height; y++) {
+		for (uint32_t x = 0; x < width; x++) {
+			const Color color_a = compared_image->get_pixel(x, y);
+
+			const Color color_b = source_image->get_pixel(x, y);
+
+			if (!p_luma_metric) {
+				ERR_FAIL_COND_V_MSG(color_a.r > 1.0f, Dictionary(), "Can't compare HDR colors.");
+				ERR_FAIL_COND_V_MSG(color_b.r > 1.0f, Dictionary(), "Can't compare HDR colors.");
+				hist[Math::abs(color_a.get_r8() - color_b.get_r8())]++;
+				ERR_FAIL_COND_V_MSG(color_a.g > 1.0f, Dictionary(), "Can't compare HDR colors.");
+				ERR_FAIL_COND_V_MSG(color_b.g > 1.0f, Dictionary(), "Can't compare HDR colors.");
+				hist[Math::abs(color_a.get_g8() - color_b.get_g8())]++;
+				ERR_FAIL_COND_V_MSG(color_a.b > 1.0f, Dictionary(), "Can't compare HDR colors.");
+				ERR_FAIL_COND_V_MSG(color_b.b > 1.0f, Dictionary(), "Can't compare HDR colors.");
+				hist[Math::abs(color_a.get_b8() - color_b.get_b8())]++;
+				ERR_FAIL_COND_V_MSG(color_a.a > 1.0f, Dictionary(), "Can't compare HDR colors.");
+				ERR_FAIL_COND_V_MSG(color_b.a > 1.0f, Dictionary(), "Can't compare HDR colors.");
+				hist[Math::abs(color_a.get_a8() - color_b.get_a8())]++;
+			} else {
+				ERR_FAIL_COND_V_MSG(color_a.r > 1.0f, Dictionary(), "Can't compare HDR colors.");
+				ERR_FAIL_COND_V_MSG(color_b.r > 1.0f, Dictionary(), "Can't compare HDR colors.");
+				// REC709 weightings
+				int luma_a = (13938U * color_a.get_r8() + 46869U * color_a.get_g8() + 4729U * color_a.get_b8() + 32768U) >> 16U;
+				int luma_b = (13938U * color_b.get_r8() + 46869U * color_b.get_g8() + 4729U * color_b.get_b8() + 32768U) >> 16U;
+				hist[Math::abs(luma_a - luma_b)]++;
+			}
+		}
+	}
+
+	image_metric_max = 0;
+	double sum = 0.0f, sum2 = 0.0f;
+	for (uint32_t i = 0; i < 256; i++) {
+		if (!hist[i]) {
+			continue;
+		}
+
+		image_metric_max = MAX(image_metric_max, i);
+
+		double x = i * hist[i];
+
+		sum += x;
+		sum2 += i * x;
+	}
+
+	// See http://richg42.blogspot.com/2016/09/how-to-compute-psnr-from-old-berkeley.html
+	double total_values = width * height;
+
+	if (average_component_error) {
+		total_values *= 4;
+	}
+
+	image_metric_mean = CLAMP(sum / total_values, 0.0f, 255.0f);
+	image_metric_mean_squared = CLAMP(sum2 / total_values, 0.0f, 255.0f * 255.0f);
+
+	image_metric_root_mean_squared = sqrt(image_metric_mean_squared);
+
+	if (!image_metric_root_mean_squared) {
+		image_metric_peak_snr = 1e+10f;
+	} else {
+		image_metric_peak_snr = CLAMP(log10(255.0f / image_metric_root_mean_squared) * 20.0f, 0.0f, 500.0f);
+	}
+	result["max"] = image_metric_max;
+	result["mean"] = image_metric_mean;
+	result["mean_squared"] = image_metric_mean_squared;
+	result["root_mean_squared"] = image_metric_root_mean_squared;
+	result["peak_snr"] = image_metric_peak_snr;
+	return result;
+}
diff --git a/core/io/image.h b/core/io/image.h
index ddfb2bb01d..53bfa0881f 100644
--- a/core/io/image.h
+++ b/core/io/image.h
@@ -399,6 +399,8 @@ public:
 		mipmaps = p_image->mipmaps;
 		data = p_image->data;
 	}
+
+	Dictionary compute_image_metrics(const Ref<Image> p_compared_image, bool p_luma_metric = true);
 };
 
 VARIANT_ENUM_CAST(Image::Format)
diff --git a/core/io/ip_address.cpp b/core/io/ip_address.cpp
index 38f99a08a4..d183c60798 100644
--- a/core/io/ip_address.cpp
+++ b/core/io/ip_address.cpp
@@ -71,7 +71,7 @@ static void _parse_hex(const String &p_string, int p_start, uint8_t *p_dst) {
 
 		int n = 0;
 		char32_t c = p_string[i];
-		if (c >= '0' && c <= '9') {
+		if (is_digit(c)) {
 			n = c - '0';
 		} else if (c >= 'a' && c <= 'f') {
 			n = 10 + (c - 'a');
@@ -113,7 +113,7 @@ void IPAddress::_parse_ipv6(const String &p_string) {
 		} else if (c == '.') {
 			part_ipv4 = true;
 
-		} else if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
+		} else if (is_hex_digit(c)) {
 			if (!part_found) {
 				parts[parts_idx++] = i;
 				part_found = true;
diff --git a/core/io/json.cpp b/core/io/json.cpp
index 7b642f6a59..4b745dff44 100644
--- a/core/io/json.cpp
+++ b/core/io/json.cpp
@@ -229,12 +229,12 @@ Error JSON::_get_token(const char32_t *p_str, int &index, int p_len, Token &r_to
 										r_err_str = "Unterminated String";
 										return ERR_PARSE_ERROR;
 									}
-									if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
+									if (!is_hex_digit(c)) {
 										r_err_str = "Malformed hex constant in string";
 										return ERR_PARSE_ERROR;
 									}
 									char32_t v;
-									if (c >= '0' && c <= '9') {
+									if (is_digit(c)) {
 										v = c - '0';
 									} else if (c >= 'a' && c <= 'f') {
 										v = c - 'a';
@@ -265,12 +265,12 @@ Error JSON::_get_token(const char32_t *p_str, int &index, int p_len, Token &r_to
 											r_err_str = "Unterminated String";
 											return ERR_PARSE_ERROR;
 										}
-										if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
+										if (!is_hex_digit(c)) {
 											r_err_str = "Malformed hex constant in string";
 											return ERR_PARSE_ERROR;
 										}
 										char32_t v;
-										if (c >= '0' && c <= '9') {
+										if (is_digit(c)) {
 											v = c - '0';
 										} else if (c >= 'a' && c <= 'f') {
 											v = c - 'a';
@@ -326,7 +326,7 @@ Error JSON::_get_token(const char32_t *p_str, int &index, int p_len, Token &r_to
 					break;
 				}
 
-				if (p_str[index] == '-' || (p_str[index] >= '0' && p_str[index] <= '9')) {
+				if (p_str[index] == '-' || is_digit(p_str[index])) {
 					//a number
 					const char32_t *rptr;
 					double number = String::to_float(&p_str[index], &rptr);
@@ -335,10 +335,10 @@ Error JSON::_get_token(const char32_t *p_str, int &index, int p_len, Token &r_to
 					r_token.value = number;
 					return OK;
 
-				} else if ((p_str[index] >= 'A' && p_str[index] <= 'Z') || (p_str[index] >= 'a' && p_str[index] <= 'z')) {
+				} else if (is_ascii_char(p_str[index])) {
 					String id;
 
-					while ((p_str[index] >= 'A' && p_str[index] <= 'Z') || (p_str[index] >= 'a' && p_str[index] <= 'z')) {
+					while (is_ascii_char(p_str[index])) {
 						id += p_str[index];
 						index++;
 					}
diff --git a/core/io/pck_packer.cpp b/core/io/pck_packer.cpp
index 221a680130..272ace3438 100644
--- a/core/io/pck_packer.cpp
+++ b/core/io/pck_packer.cpp
@@ -62,7 +62,7 @@ Error PCKPacker::pck_start(const String &p_file, int p_alignment, const String &
 		int v = 0;
 		if (i * 2 < _key.length()) {
 			char32_t ct = _key[i * 2];
-			if (ct >= '0' && ct <= '9') {
+			if (is_digit(ct)) {
 				ct = ct - '0';
 			} else if (ct >= 'a' && ct <= 'f') {
 				ct = 10 + ct - 'a';
@@ -72,7 +72,7 @@ Error PCKPacker::pck_start(const String &p_file, int p_alignment, const String &
 
 		if (i * 2 + 1 < _key.length()) {
 			char32_t ct = _key[i * 2 + 1];
-			if (ct >= '0' && ct <= '9') {
+			if (is_digit(ct)) {
 				ct = ct - '0';
 			} else if (ct >= 'a' && ct <= 'f') {
 				ct = 10 + ct - 'a';
diff --git a/core/io/resource_uid.cpp b/core/io/resource_uid.cpp
index 1a16d5b47a..776756e64e 100644
--- a/core/io/resource_uid.cpp
+++ b/core/io/resource_uid.cpp
@@ -71,9 +71,9 @@ ResourceUID::ID ResourceUID::text_to_id(const String &p_text) const {
 	for (uint32_t i = 6; i < l; i++) {
 		uid *= base;
 		uint32_t c = p_text[i];
-		if (c >= 'a' && c <= 'z') {
+		if (is_ascii_lower_case(c)) {
 			uid += c - 'a';
-		} else if (c >= '0' && c <= '9') {
+		} else if (is_digit(c)) {
 			uid += c - '0' + char_count;
 		} else {
 			return INVALID_ID;
diff --git a/core/math/aabb.h b/core/math/aabb.h
index 3d19410ddf..cb6f05e9ea 100644
--- a/core/math/aabb.h
+++ b/core/math/aabb.h
@@ -36,13 +36,13 @@
 #include "core/math/vector3.h"
 
 /**
- * AABB / AABB (Axis Aligned Bounding Box)
- * This is implemented by a point (position) and the box size
+ * AABB (Axis Aligned Bounding Box)
+ * This is implemented by a point (position) and the box size.
  */
+
 class Variant;
 
-class _NO_DISCARD_ AABB {
-public:
+struct _NO_DISCARD_ AABB {
 	Vector3 position;
 	Vector3 size;
 
diff --git a/core/math/basis.h b/core/math/basis.h
index 802da82089..683f05150c 100644
--- a/core/math/basis.h
+++ b/core/math/basis.h
@@ -34,11 +34,7 @@
 #include "core/math/quaternion.h"
 #include "core/math/vector3.h"
 
-class _NO_DISCARD_ Basis {
-private:
-	void _set_diagonal(const Vector3 &p_diag);
-
-public:
+struct _NO_DISCARD_ Basis {
 	Vector3 elements[3] = {
 		Vector3(1, 0, 0),
 		Vector3(0, 1, 0),
@@ -263,6 +259,10 @@ public:
 	}
 
 	_FORCE_INLINE_ Basis() {}
+
+private:
+	// Helper method.
+	void _set_diagonal(const Vector3 &p_diag);
 };
 
 _FORCE_INLINE_ void Basis::operator*=(const Basis &p_matrix) {
@@ -334,4 +334,5 @@ real_t Basis::determinant() const {
 			elements[1][0] * (elements[0][1] * elements[2][2] - elements[2][1] * elements[0][2]) +
 			elements[2][0] * (elements[0][1] * elements[1][2] - elements[1][1] * elements[0][2]);
 }
+
 #endif // BASIS_H
diff --git a/core/math/camera_matrix.cpp b/core/math/camera_matrix.cpp
index 2902ca59b9..f5d746ef0f 100644
--- a/core/math/camera_matrix.cpp
+++ b/core/math/camera_matrix.cpp
@@ -30,7 +30,11 @@
 
 #include "camera_matrix.h"
 
+#include "core/math/aabb.h"
 #include "core/math/math_funcs.h"
+#include "core/math/plane.h"
+#include "core/math/rect2.h"
+#include "core/math/transform_3d.h"
 #include "core/string/print_string.h"
 
 float CameraMatrix::determinant() const {
diff --git a/core/math/camera_matrix.h b/core/math/camera_matrix.h
index da1aba7562..285d2ae384 100644
--- a/core/math/camera_matrix.h
+++ b/core/math/camera_matrix.h
@@ -31,8 +31,14 @@
 #ifndef CAMERA_MATRIX_H
 #define CAMERA_MATRIX_H
 
-#include "core/math/rect2.h"
-#include "core/math/transform_3d.h"
+#include "core/math/math_defs.h"
+#include "core/math/vector3.h"
+
+struct AABB;
+struct Plane;
+struct Rect2;
+struct Transform3D;
+struct Vector2;
 
 struct CameraMatrix {
 	enum Planes {
diff --git a/core/math/delaunay_2d.h b/core/math/delaunay_2d.h
index 08f5df8472..c39997d6a9 100644
--- a/core/math/delaunay_2d.h
+++ b/core/math/delaunay_2d.h
@@ -32,6 +32,7 @@
 #define DELAUNAY_2D_H
 
 #include "core/math/rect2.h"
+#include "core/templates/vector.h"
 
 class Delaunay2D {
 public:
diff --git a/core/math/dynamic_bvh.h b/core/math/dynamic_bvh.h
index 3041cdf268..50ec2c2b30 100644
--- a/core/math/dynamic_bvh.h
+++ b/core/math/dynamic_bvh.h
@@ -28,8 +28,8 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#ifndef DYNAMICBVH_H
-#define DYNAMICBVH_H
+#ifndef DYNAMIC_BVH_H
+#define DYNAMIC_BVH_H
 
 #include "core/math/aabb.h"
 #include "core/templates/list.h"
@@ -474,4 +474,4 @@ void DynamicBVH::ray_query(const Vector3 &p_from, const Vector3 &p_to, QueryResu
 	} while (depth > 0);
 }
 
-#endif // DYNAMICBVH_H
+#endif // DYNAMIC_BVH_H
diff --git a/core/math/expression.cpp b/core/math/expression.cpp
index 0bd8a0abb5..0ddac9744e 100644
--- a/core/math/expression.cpp
+++ b/core/math/expression.cpp
@@ -37,18 +37,6 @@
 #include "core/os/os.h"
 #include "core/variant/variant_parser.h"
 
-static bool _is_number(char32_t c) {
-	return (c >= '0' && c <= '9');
-}
-
-static bool _is_hex_digit(char32_t c) {
-	return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
-}
-
-static bool _is_binary_digit(char32_t c) {
-	return (c == '0' || c == '1');
-}
-
 Error Expression::_get_token(Token &r_token) {
 	while (true) {
 #define GET_CHAR() (str_ofs >= expression.length() ? 0 : expression[str_ofs++])
@@ -96,7 +84,7 @@ Error Expression::_get_token(Token &r_token) {
 				r_token.type = TK_INPUT;
 				int index = 0;
 				do {
-					if (!_is_number(expression[str_ofs])) {
+					if (!is_digit(expression[str_ofs])) {
 						_set_error("Expected number after '$'");
 						r_token.type = TK_ERROR;
 						return ERR_PARSE_ERROR;
@@ -105,7 +93,7 @@ Error Expression::_get_token(Token &r_token) {
 					index += expression[str_ofs] - '0';
 					str_ofs++;
 
-				} while (_is_number(expression[str_ofs]));
+				} while (is_digit(expression[str_ofs]));
 
 				r_token.value = index;
 				return OK;
@@ -255,13 +243,13 @@ Error Expression::_get_token(Token &r_token) {
 										r_token.type = TK_ERROR;
 										return ERR_PARSE_ERROR;
 									}
-									if (!(_is_number(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
+									if (!is_hex_digit(c)) {
 										_set_error("Malformed hex constant in string");
 										r_token.type = TK_ERROR;
 										return ERR_PARSE_ERROR;
 									}
 									char32_t v;
-									if (_is_number(c)) {
+									if (is_digit(c)) {
 										v = c - '0';
 									} else if (c >= 'a' && c <= 'f') {
 										v = c - 'a';
@@ -336,7 +324,7 @@ Error Expression::_get_token(Token &r_token) {
 				}
 
 				char32_t next_char = (str_ofs >= expression.length()) ? 0 : expression[str_ofs];
-				if (_is_number(cchar) || (cchar == '.' && _is_number(next_char))) {
+				if (is_digit(cchar) || (cchar == '.' && is_digit(next_char))) {
 					//a number
 
 					String num;
@@ -360,7 +348,7 @@ Error Expression::_get_token(Token &r_token) {
 					while (true) {
 						switch (reading) {
 							case READING_INT: {
-								if (_is_number(c)) {
+								if (is_digit(c)) {
 									if (is_first_char && c == '0') {
 										if (next_char == 'b') {
 											reading = READING_BIN;
@@ -373,13 +361,14 @@ Error Expression::_get_token(Token &r_token) {
 									is_float = true;
 								} else if (c == 'e') {
 									reading = READING_EXP;
+									is_float = true;
 								} else {
 									reading = READING_DONE;
 								}
 
 							} break;
 							case READING_BIN: {
-								if (bin_beg && !_is_binary_digit(c)) {
+								if (bin_beg && !is_binary_digit(c)) {
 									reading = READING_DONE;
 								} else if (c == 'b') {
 									bin_beg = true;
@@ -387,7 +376,7 @@ Error Expression::_get_token(Token &r_token) {
 
 							} break;
 							case READING_HEX: {
-								if (hex_beg && !_is_hex_digit(c)) {
+								if (hex_beg && !is_hex_digit(c)) {
 									reading = READING_DONE;
 								} else if (c == 'x') {
 									hex_beg = true;
@@ -395,7 +384,7 @@ Error Expression::_get_token(Token &r_token) {
 
 							} break;
 							case READING_DEC: {
-								if (_is_number(c)) {
+								if (is_digit(c)) {
 								} else if (c == 'e') {
 									reading = READING_EXP;
 
@@ -405,13 +394,10 @@ Error Expression::_get_token(Token &r_token) {
 
 							} break;
 							case READING_EXP: {
-								if (_is_number(c)) {
+								if (is_digit(c)) {
 									exp_beg = true;
 
 								} else if ((c == '-' || c == '+') && !exp_sign && !exp_beg) {
-									if (c == '-') {
-										is_float = true;
-									}
 									exp_sign = true;
 
 								} else {
@@ -443,11 +429,11 @@ Error Expression::_get_token(Token &r_token) {
 					}
 					return OK;
 
-				} else if ((cchar >= 'A' && cchar <= 'Z') || (cchar >= 'a' && cchar <= 'z') || cchar == '_') {
+				} else if (is_ascii_char(cchar) || is_underscore(cchar)) {
 					String id;
 					bool first = true;
 
-					while ((cchar >= 'A' && cchar <= 'Z') || (cchar >= 'a' && cchar <= 'z') || cchar == '_' || (!first && _is_number(cchar))) {
+					while (is_ascii_char(cchar) || is_underscore(cchar) || (!first && is_digit(cchar))) {
 						id += String::chr(cchar);
 						cchar = GET_CHAR();
 						first = false;
diff --git a/core/math/face3.h b/core/math/face3.h
index 3dbbca09e0..8b123f078c 100644
--- a/core/math/face3.h
+++ b/core/math/face3.h
@@ -36,8 +36,7 @@
 #include "core/math/transform_3d.h"
 #include "core/math/vector3.h"
 
-class _NO_DISCARD_ Face3 {
-public:
+struct _NO_DISCARD_ Face3 {
 	enum Side {
 		SIDE_OVER,
 		SIDE_UNDER,
@@ -48,14 +47,11 @@ public:
 	Vector3 vertex[3];
 
 	/**
-	 *
 	 * @param p_plane plane used to split the face
 	 * @param p_res array of at least 3 faces, amount used in function return
 	 * @param p_is_point_over array of at least 3 booleans, determining which face is over the plane, amount used in function return
-	 * @param _epsilon constant used for numerical error rounding, to add "thickness" to the plane (so coplanar points can happen)
 	 * @return amount of faces generated by the split, either 0 (means no split possible), 2 or 3
 	 */
-
 	int split_by_plane(const Plane &p_plane, Face3 *p_res, bool *p_is_point_over) const;
 
 	Plane get_plane(ClockDirection p_dir = CLOCKWISE) const;
diff --git a/core/math/geometry_2d.h b/core/math/geometry_2d.h
index 7385dba438..a2881d5f60 100644
--- a/core/math/geometry_2d.h
+++ b/core/math/geometry_2d.h
@@ -32,7 +32,11 @@
 #define GEOMETRY_2D_H
 
 #include "core/math/delaunay_2d.h"
+#include "core/math/math_funcs.h"
 #include "core/math/triangulate.h"
+#include "core/math/vector2.h"
+#include "core/math/vector2i.h"
+#include "core/math/vector3.h"
 #include "core/math/vector3i.h"
 #include "core/templates/vector.h"
 
diff --git a/core/math/plane.h b/core/math/plane.h
index 8cb6f62b3b..66c1741662 100644
--- a/core/math/plane.h
+++ b/core/math/plane.h
@@ -35,13 +35,12 @@
 
 class Variant;
 
-class _NO_DISCARD_ Plane {
-public:
+struct _NO_DISCARD_ Plane {
 	Vector3 normal;
 	real_t d = 0;
 
 	void set_normal(const Vector3 &p_normal);
-	_FORCE_INLINE_ Vector3 get_normal() const { return normal; }; ///Point is coplanar, CMP_EPSILON for precision
+	_FORCE_INLINE_ Vector3 get_normal() const { return normal; };
 
 	void normalize();
 	Plane normalized() const;
diff --git a/core/math/quaternion.h b/core/math/quaternion.h
index 2575d7d229..7874e4f428 100644
--- a/core/math/quaternion.h
+++ b/core/math/quaternion.h
@@ -36,8 +36,7 @@
 #include "core/math/vector3.h"
 #include "core/string/ustring.h"
 
-class _NO_DISCARD_ Quaternion {
-public:
+struct _NO_DISCARD_ Quaternion {
 	union {
 		struct {
 			real_t x;
diff --git a/core/math/rect2.cpp b/core/math/rect2.cpp
index 9047c19434..d6e20bdc3c 100644
--- a/core/math/rect2.cpp
+++ b/core/math/rect2.cpp
@@ -28,7 +28,11 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#include "core/math/transform_2d.h" // Includes rect2.h but Rect2 needs Transform2D
+#include "rect2.h"
+
+#include "core/math/rect2i.h"
+#include "core/math/transform_2d.h"
+#include "core/string/ustring.h"
 
 bool Rect2::is_equal_approx(const Rect2 &p_rect) const {
 	return position.is_equal_approx(p_rect.position) && size.is_equal_approx(p_rect.size);
@@ -278,6 +282,6 @@ Rect2::operator String() const {
 	return "[P: " + position.operator String() + ", S: " + size + "]";
 }
 
-Rect2i::operator String() const {
-	return "[P: " + position.operator String() + ", S: " + size + "]";
+Rect2::operator Rect2i() const {
+	return Rect2i(position, size);
 }
diff --git a/core/math/rect2.h b/core/math/rect2.h
index b14c69302c..6ecc02336c 100644
--- a/core/math/rect2.h
+++ b/core/math/rect2.h
@@ -31,8 +31,11 @@
 #ifndef RECT2_H
 #define RECT2_H
 
-#include "core/math/vector2.h" // also includes math_funcs and ustring
+#include "core/error/error_macros.h"
+#include "core/math/vector2.h"
 
+class String;
+struct Rect2i;
 struct Transform2D;
 
 struct _NO_DISCARD_ Rect2 {
@@ -179,6 +182,7 @@ struct _NO_DISCARD_ Rect2 {
 
 		return new_rect;
 	}
+
 	inline bool has_point(const Point2 &p_point) const {
 #ifdef MATH_CHECKS
 		if (unlikely(size.x < 0 || size.y < 0)) {
@@ -201,6 +205,7 @@ struct _NO_DISCARD_ Rect2 {
 
 		return true;
 	}
+
 	bool is_equal_approx(const Rect2 &p_rect) const;
 
 	bool operator==(const Rect2 &p_rect) const { return position == p_rect.position && size == p_rect.size; }
@@ -351,6 +356,7 @@ struct _NO_DISCARD_ Rect2 {
 	}
 
 	operator String() const;
+	operator Rect2i() const;
 
 	Rect2() {}
 	Rect2(real_t p_x, real_t p_y, real_t p_width, real_t p_height) :
@@ -363,214 +369,4 @@ struct _NO_DISCARD_ Rect2 {
 	}
 };
 
-struct _NO_DISCARD_ Rect2i {
-	Point2i position;
-	Size2i size;
-
-	const Point2i &get_position() const { return position; }
-	void set_position(const Point2i &p_position) { position = p_position; }
-	const Size2i &get_size() const { return size; }
-	void set_size(const Size2i &p_size) { size = p_size; }
-
-	int get_area() const { return size.width * size.height; }
-
-	_FORCE_INLINE_ Vector2i get_center() const { return position + (size / 2); }
-
-	inline bool intersects(const Rect2i &p_rect) const {
-#ifdef MATH_CHECKS
-		if (unlikely(size.x < 0 || size.y < 0 || p_rect.size.x < 0 || p_rect.size.y < 0)) {
-			ERR_PRINT("Rect2i size is negative, this is not supported. Use Rect2i.abs() to get a Rect2i with a positive size.");
-		}
-#endif
-		if (position.x >= (p_rect.position.x + p_rect.size.width)) {
-			return false;
-		}
-		if ((position.x + size.width) <= p_rect.position.x) {
-			return false;
-		}
-		if (position.y >= (p_rect.position.y + p_rect.size.height)) {
-			return false;
-		}
-		if ((position.y + size.height) <= p_rect.position.y) {
-			return false;
-		}
-
-		return true;
-	}
-
-	inline bool encloses(const Rect2i &p_rect) const {
-#ifdef MATH_CHECKS
-		if (unlikely(size.x < 0 || size.y < 0 || p_rect.size.x < 0 || p_rect.size.y < 0)) {
-			ERR_PRINT("Rect2i size is negative, this is not supported. Use Rect2i.abs() to get a Rect2i with a positive size.");
-		}
-#endif
-		return (p_rect.position.x >= position.x) && (p_rect.position.y >= position.y) &&
-				((p_rect.position.x + p_rect.size.x) <= (position.x + size.x)) &&
-				((p_rect.position.y + p_rect.size.y) <= (position.y + size.y));
-	}
-
-	_FORCE_INLINE_ bool has_no_area() const {
-		return (size.x <= 0 || size.y <= 0);
-	}
-
-	// Returns the instersection between two Rect2is or an empty Rect2i if there is no intersection
-	inline Rect2i intersection(const Rect2i &p_rect) const {
-		Rect2i new_rect = p_rect;
-
-		if (!intersects(new_rect)) {
-			return Rect2i();
-		}
-
-		new_rect.position.x = MAX(p_rect.position.x, position.x);
-		new_rect.position.y = MAX(p_rect.position.y, position.y);
-
-		Point2i p_rect_end = p_rect.position + p_rect.size;
-		Point2i end = position + size;
-
-		new_rect.size.x = MIN(p_rect_end.x, end.x) - new_rect.position.x;
-		new_rect.size.y = MIN(p_rect_end.y, end.y) - new_rect.position.y;
-
-		return new_rect;
-	}
-
-	inline Rect2i merge(const Rect2i &p_rect) const { ///< return a merged rect
-#ifdef MATH_CHECKS
-		if (unlikely(size.x < 0 || size.y < 0 || p_rect.size.x < 0 || p_rect.size.y < 0)) {
-			ERR_PRINT("Rect2i size is negative, this is not supported. Use Rect2i.abs() to get a Rect2i with a positive size.");
-		}
-#endif
-		Rect2i new_rect;
-
-		new_rect.position.x = MIN(p_rect.position.x, position.x);
-		new_rect.position.y = MIN(p_rect.position.y, position.y);
-
-		new_rect.size.x = MAX(p_rect.position.x + p_rect.size.x, position.x + size.x);
-		new_rect.size.y = MAX(p_rect.position.y + p_rect.size.y, position.y + size.y);
-
-		new_rect.size = new_rect.size - new_rect.position; //make relative again
-
-		return new_rect;
-	}
-	bool has_point(const Point2i &p_point) const {
-#ifdef MATH_CHECKS
-		if (unlikely(size.x < 0 || size.y < 0)) {
-			ERR_PRINT("Rect2i size is negative, this is not supported. Use Rect2i.abs() to get a Rect2i with a positive size.");
-		}
-#endif
-		if (p_point.x < position.x) {
-			return false;
-		}
-		if (p_point.y < position.y) {
-			return false;
-		}
-
-		if (p_point.x >= (position.x + size.x)) {
-			return false;
-		}
-		if (p_point.y >= (position.y + size.y)) {
-			return false;
-		}
-
-		return true;
-	}
-
-	bool operator==(const Rect2i &p_rect) const { return position == p_rect.position && size == p_rect.size; }
-	bool operator!=(const Rect2i &p_rect) const { return position != p_rect.position || size != p_rect.size; }
-
-	Rect2i grow(int p_amount) const {
-		Rect2i g = *this;
-		g.position.x -= p_amount;
-		g.position.y -= p_amount;
-		g.size.width += p_amount * 2;
-		g.size.height += p_amount * 2;
-		return g;
-	}
-
-	inline Rect2i grow_side(Side p_side, int p_amount) const {
-		Rect2i g = *this;
-		g = g.grow_individual((SIDE_LEFT == p_side) ? p_amount : 0,
-				(SIDE_TOP == p_side) ? p_amount : 0,
-				(SIDE_RIGHT == p_side) ? p_amount : 0,
-				(SIDE_BOTTOM == p_side) ? p_amount : 0);
-		return g;
-	}
-
-	inline Rect2i grow_side_bind(uint32_t p_side, int p_amount) const {
-		return grow_side(Side(p_side), p_amount);
-	}
-
-	inline Rect2i grow_individual(int p_left, int p_top, int p_right, int p_bottom) const {
-		Rect2i g = *this;
-		g.position.x -= p_left;
-		g.position.y -= p_top;
-		g.size.width += p_left + p_right;
-		g.size.height += p_top + p_bottom;
-
-		return g;
-	}
-
-	_FORCE_INLINE_ Rect2i expand(const Vector2i &p_vector) const {
-		Rect2i r = *this;
-		r.expand_to(p_vector);
-		return r;
-	}
-
-	inline void expand_to(const Point2i &p_vector) {
-#ifdef MATH_CHECKS
-		if (unlikely(size.x < 0 || size.y < 0)) {
-			ERR_PRINT("Rect2i size is negative, this is not supported. Use Rect2i.abs() to get a Rect2i with a positive size.");
-		}
-#endif
-		Point2i begin = position;
-		Point2i end = position + size;
-
-		if (p_vector.x < begin.x) {
-			begin.x = p_vector.x;
-		}
-		if (p_vector.y < begin.y) {
-			begin.y = p_vector.y;
-		}
-
-		if (p_vector.x > end.x) {
-			end.x = p_vector.x;
-		}
-		if (p_vector.y > end.y) {
-			end.y = p_vector.y;
-		}
-
-		position = begin;
-		size = end - begin;
-	}
-
-	_FORCE_INLINE_ Rect2i abs() const {
-		return Rect2i(Point2i(position.x + MIN(size.x, 0), position.y + MIN(size.y, 0)), size.abs());
-	}
-
-	_FORCE_INLINE_ void set_end(const Vector2i &p_end) {
-		size = p_end - position;
-	}
-
-	_FORCE_INLINE_ Vector2i get_end() const {
-		return position + size;
-	}
-
-	operator String() const;
-
-	operator Rect2() const { return Rect2(position, size); }
-
-	Rect2i() {}
-	Rect2i(const Rect2 &p_r2) :
-			position(p_r2.position),
-			size(p_r2.size) {
-	}
-	Rect2i(int p_x, int p_y, int p_width, int p_height) :
-			position(Point2i(p_x, p_y)),
-			size(Size2i(p_width, p_height)) {
-	}
-	Rect2i(const Point2i &p_pos, const Size2i &p_size) :
-			position(p_pos),
-			size(p_size) {
-	}
-};
-
 #endif // RECT2_H
diff --git a/core/math/rect2i.cpp b/core/math/rect2i.cpp
new file mode 100644
index 0000000000..0782c450d0
--- /dev/null
+++ b/core/math/rect2i.cpp
@@ -0,0 +1,42 @@
+/*************************************************************************/
+/*  rect2i.cpp                                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "rect2i.h"
+
+#include "core/math/rect2.h"
+#include "core/string/ustring.h"
+
+Rect2i::operator String() const {
+	return "[P: " + position.operator String() + ", S: " + size + "]";
+}
+
+Rect2i::operator Rect2() const {
+	return Rect2(position, size);
+}
diff --git a/core/math/rect2i.h b/core/math/rect2i.h
new file mode 100644
index 0000000000..db1459a3e6
--- /dev/null
+++ b/core/math/rect2i.h
@@ -0,0 +1,245 @@
+/*************************************************************************/
+/*  rect2i.h                                                             */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef RECT2I_H
+#define RECT2I_H
+
+#include "core/error/error_macros.h"
+#include "core/math/vector2i.h"
+
+class String;
+struct Rect2;
+
+struct _NO_DISCARD_ Rect2i {
+	Point2i position;
+	Size2i size;
+
+	const Point2i &get_position() const { return position; }
+	void set_position(const Point2i &p_position) { position = p_position; }
+	const Size2i &get_size() const { return size; }
+	void set_size(const Size2i &p_size) { size = p_size; }
+
+	int get_area() const { return size.width * size.height; }
+
+	_FORCE_INLINE_ Vector2i get_center() const { return position + (size / 2); }
+
+	inline bool intersects(const Rect2i &p_rect) const {
+#ifdef MATH_CHECKS
+		if (unlikely(size.x < 0 || size.y < 0 || p_rect.size.x < 0 || p_rect.size.y < 0)) {
+			ERR_PRINT("Rect2i size is negative, this is not supported. Use Rect2i.abs() to get a Rect2i with a positive size.");
+		}
+#endif
+		if (position.x >= (p_rect.position.x + p_rect.size.width)) {
+			return false;
+		}
+		if ((position.x + size.width) <= p_rect.position.x) {
+			return false;
+		}
+		if (position.y >= (p_rect.position.y + p_rect.size.height)) {
+			return false;
+		}
+		if ((position.y + size.height) <= p_rect.position.y) {
+			return false;
+		}
+
+		return true;
+	}
+
+	inline bool encloses(const Rect2i &p_rect) const {
+#ifdef MATH_CHECKS
+		if (unlikely(size.x < 0 || size.y < 0 || p_rect.size.x < 0 || p_rect.size.y < 0)) {
+			ERR_PRINT("Rect2i size is negative, this is not supported. Use Rect2i.abs() to get a Rect2i with a positive size.");
+		}
+#endif
+		return (p_rect.position.x >= position.x) && (p_rect.position.y >= position.y) &&
+				((p_rect.position.x + p_rect.size.x) <= (position.x + size.x)) &&
+				((p_rect.position.y + p_rect.size.y) <= (position.y + size.y));
+	}
+
+	_FORCE_INLINE_ bool has_no_area() const {
+		return (size.x <= 0 || size.y <= 0);
+	}
+
+	// Returns the instersection between two Rect2is or an empty Rect2i if there is no intersection
+	inline Rect2i intersection(const Rect2i &p_rect) const {
+		Rect2i new_rect = p_rect;
+
+		if (!intersects(new_rect)) {
+			return Rect2i();
+		}
+
+		new_rect.position.x = MAX(p_rect.position.x, position.x);
+		new_rect.position.y = MAX(p_rect.position.y, position.y);
+
+		Point2i p_rect_end = p_rect.position + p_rect.size;
+		Point2i end = position + size;
+
+		new_rect.size.x = MIN(p_rect_end.x, end.x) - new_rect.position.x;
+		new_rect.size.y = MIN(p_rect_end.y, end.y) - new_rect.position.y;
+
+		return new_rect;
+	}
+
+	inline Rect2i merge(const Rect2i &p_rect) const { ///< return a merged rect
+#ifdef MATH_CHECKS
+		if (unlikely(size.x < 0 || size.y < 0 || p_rect.size.x < 0 || p_rect.size.y < 0)) {
+			ERR_PRINT("Rect2i size is negative, this is not supported. Use Rect2i.abs() to get a Rect2i with a positive size.");
+		}
+#endif
+		Rect2i new_rect;
+
+		new_rect.position.x = MIN(p_rect.position.x, position.x);
+		new_rect.position.y = MIN(p_rect.position.y, position.y);
+
+		new_rect.size.x = MAX(p_rect.position.x + p_rect.size.x, position.x + size.x);
+		new_rect.size.y = MAX(p_rect.position.y + p_rect.size.y, position.y + size.y);
+
+		new_rect.size = new_rect.size - new_rect.position; //make relative again
+
+		return new_rect;
+	}
+	bool has_point(const Point2i &p_point) const {
+#ifdef MATH_CHECKS
+		if (unlikely(size.x < 0 || size.y < 0)) {
+			ERR_PRINT("Rect2i size is negative, this is not supported. Use Rect2i.abs() to get a Rect2i with a positive size.");
+		}
+#endif
+		if (p_point.x < position.x) {
+			return false;
+		}
+		if (p_point.y < position.y) {
+			return false;
+		}
+
+		if (p_point.x >= (position.x + size.x)) {
+			return false;
+		}
+		if (p_point.y >= (position.y + size.y)) {
+			return false;
+		}
+
+		return true;
+	}
+
+	bool operator==(const Rect2i &p_rect) const { return position == p_rect.position && size == p_rect.size; }
+	bool operator!=(const Rect2i &p_rect) const { return position != p_rect.position || size != p_rect.size; }
+
+	Rect2i grow(int p_amount) const {
+		Rect2i g = *this;
+		g.position.x -= p_amount;
+		g.position.y -= p_amount;
+		g.size.width += p_amount * 2;
+		g.size.height += p_amount * 2;
+		return g;
+	}
+
+	inline Rect2i grow_side(Side p_side, int p_amount) const {
+		Rect2i g = *this;
+		g = g.grow_individual((SIDE_LEFT == p_side) ? p_amount : 0,
+				(SIDE_TOP == p_side) ? p_amount : 0,
+				(SIDE_RIGHT == p_side) ? p_amount : 0,
+				(SIDE_BOTTOM == p_side) ? p_amount : 0);
+		return g;
+	}
+
+	inline Rect2i grow_side_bind(uint32_t p_side, int p_amount) const {
+		return grow_side(Side(p_side), p_amount);
+	}
+
+	inline Rect2i grow_individual(int p_left, int p_top, int p_right, int p_bottom) const {
+		Rect2i g = *this;
+		g.position.x -= p_left;
+		g.position.y -= p_top;
+		g.size.width += p_left + p_right;
+		g.size.height += p_top + p_bottom;
+
+		return g;
+	}
+
+	_FORCE_INLINE_ Rect2i expand(const Vector2i &p_vector) const {
+		Rect2i r = *this;
+		r.expand_to(p_vector);
+		return r;
+	}
+
+	inline void expand_to(const Point2i &p_vector) {
+#ifdef MATH_CHECKS
+		if (unlikely(size.x < 0 || size.y < 0)) {
+			ERR_PRINT("Rect2i size is negative, this is not supported. Use Rect2i.abs() to get a Rect2i with a positive size.");
+		}
+#endif
+		Point2i begin = position;
+		Point2i end = position + size;
+
+		if (p_vector.x < begin.x) {
+			begin.x = p_vector.x;
+		}
+		if (p_vector.y < begin.y) {
+			begin.y = p_vector.y;
+		}
+
+		if (p_vector.x > end.x) {
+			end.x = p_vector.x;
+		}
+		if (p_vector.y > end.y) {
+			end.y = p_vector.y;
+		}
+
+		position = begin;
+		size = end - begin;
+	}
+
+	_FORCE_INLINE_ Rect2i abs() const {
+		return Rect2i(Point2i(position.x + MIN(size.x, 0), position.y + MIN(size.y, 0)), size.abs());
+	}
+
+	_FORCE_INLINE_ void set_end(const Vector2i &p_end) {
+		size = p_end - position;
+	}
+
+	_FORCE_INLINE_ Vector2i get_end() const {
+		return position + size;
+	}
+
+	operator String() const;
+	operator Rect2() const;
+
+	Rect2i() {}
+	Rect2i(int p_x, int p_y, int p_width, int p_height) :
+			position(Point2i(p_x, p_y)),
+			size(Size2i(p_width, p_height)) {
+	}
+	Rect2i(const Point2i &p_pos, const Size2i &p_size) :
+			position(p_pos),
+			size(p_size) {
+	}
+};
+
+#endif // RECT2I_H
diff --git a/core/math/transform_2d.cpp b/core/math/transform_2d.cpp
index 0201cf575c..e6e24e9b32 100644
--- a/core/math/transform_2d.cpp
+++ b/core/math/transform_2d.cpp
@@ -30,6 +30,8 @@
 
 #include "transform_2d.h"
 
+#include "core/string/ustring.h"
+
 void Transform2D::invert() {
 	// FIXME: this function assumes the basis is a rotation matrix, with no scaling.
 	// Transform2D::affine_inverse can handle matrices with scaling, so GDScript should eventually use that.
diff --git a/core/math/transform_2d.h b/core/math/transform_2d.h
index 6c2d51bd9b..f4546c13c8 100644
--- a/core/math/transform_2d.h
+++ b/core/math/transform_2d.h
@@ -31,7 +31,12 @@
 #ifndef TRANSFORM_2D_H
 #define TRANSFORM_2D_H
 
-#include "core/math/rect2.h" // also includes vector2, math_funcs, and ustring
+#include "core/math/math_funcs.h"
+#include "core/math/rect2.h"
+#include "core/math/vector2.h"
+#include "core/templates/vector.h"
+
+class String;
 
 struct _NO_DISCARD_ Transform2D {
 	// Warning #1: basis of Transform2D is stored differently from Basis. In terms of elements array, the basis matrix looks like "on paper":
diff --git a/core/math/transform_3d.h b/core/math/transform_3d.h
index c16c278e74..3b4762e221 100644
--- a/core/math/transform_3d.h
+++ b/core/math/transform_3d.h
@@ -28,15 +28,14 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 
-#ifndef TRANSFORM_H
-#define TRANSFORM_H
+#ifndef TRANSFORM_3D_H
+#define TRANSFORM_3D_H
 
 #include "core/math/aabb.h"
 #include "core/math/basis.h"
 #include "core/math/plane.h"
 
-class _NO_DISCARD_ Transform3D {
-public:
+struct _NO_DISCARD_ Transform3D {
 	Basis basis;
 	Vector3 origin;
 
@@ -265,4 +264,4 @@ _FORCE_INLINE_ Plane Transform3D::xform_inv_fast(const Plane &p_plane, const Tra
 	return Plane(normal, d);
 }
 
-#endif // TRANSFORM_H
+#endif // TRANSFORM_3D_H
diff --git a/core/math/triangulate.h b/core/math/triangulate.h
index d96bdb8cab..0bfcfcb978 100644
--- a/core/math/triangulate.h
+++ b/core/math/triangulate.h
@@ -32,6 +32,7 @@
 #define TRIANGULATE_H
 
 #include "core/math/vector2.h"
+#include "core/templates/vector.h"
 
 /*
 https://www.flipcode.com/archives/Efficient_Polygon_Triangulation.shtml
diff --git a/core/math/vector2.cpp b/core/math/vector2.cpp
index 676a0004ea..40149e8cc1 100644
--- a/core/math/vector2.cpp
+++ b/core/math/vector2.cpp
@@ -30,6 +30,9 @@
 
 #include "vector2.h"
 
+#include "core/math/vector2i.h"
+#include "core/string/ustring.h"
+
 real_t Vector2::angle() const {
 	return Math::atan2(y, x);
 }
@@ -202,91 +205,6 @@ Vector2::operator String() const {
 	return "(" + String::num_real(x, false) + ", " + String::num_real(y, false) + ")";
 }
 
-/* Vector2i */
-
-Vector2i Vector2i::clamp(const Vector2i &p_min, const Vector2i &p_max) const {
-	return Vector2i(
-			CLAMP(x, p_min.x, p_max.x),
-			CLAMP(y, p_min.y, p_max.y));
-}
-
-int64_t Vector2i::length_squared() const {
-	return x * (int64_t)x + y * (int64_t)y;
-}
-
-double Vector2i::length() const {
-	return Math::sqrt((double)length_squared());
-}
-
-Vector2i Vector2i::operator+(const Vector2i &p_v) const {
-	return Vector2i(x + p_v.x, y + p_v.y);
-}
-
-void Vector2i::operator+=(const Vector2i &p_v) {
-	x += p_v.x;
-	y += p_v.y;
-}
-
-Vector2i Vector2i::operator-(const Vector2i &p_v) const {
-	return Vector2i(x - p_v.x, y - p_v.y);
-}
-
-void Vector2i::operator-=(const Vector2i &p_v) {
-	x -= p_v.x;
-	y -= p_v.y;
-}
-
-Vector2i Vector2i::operator*(const Vector2i &p_v1) const {
-	return Vector2i(x * p_v1.x, y * p_v1.y);
-}
-
-Vector2i Vector2i::operator*(const int32_t &rvalue) const {
-	return Vector2i(x * rvalue, y * rvalue);
-}
-
-void Vector2i::operator*=(const int32_t &rvalue) {
-	x *= rvalue;
-	y *= rvalue;
-}
-
-Vector2i Vector2i::operator/(const Vector2i &p_v1) const {
-	return Vector2i(x / p_v1.x, y / p_v1.y);
-}
-
-Vector2i Vector2i::operator/(const int32_t &rvalue) const {
-	return Vector2i(x / rvalue, y / rvalue);
-}
-
-void Vector2i::operator/=(const int32_t &rvalue) {
-	x /= rvalue;
-	y /= rvalue;
-}
-
-Vector2i Vector2i::operator%(const Vector2i &p_v1) const {
-	return Vector2i(x % p_v1.x, y % p_v1.y);
-}
-
-Vector2i Vector2i::operator%(const int32_t &rvalue) const {
-	return Vector2i(x % rvalue, y % rvalue);
-}
-
-void Vector2i::operator%=(const int32_t &rvalue) {
-	x %= rvalue;
-	y %= rvalue;
-}
-
-Vector2i Vector2i::operator-() const {
-	return Vector2i(-x, -y);
-}
-
-bool Vector2i::operator==(const Vector2i &p_vec2) const {
-	return x == p_vec2.x && y == p_vec2.y;
-}
-
-bool Vector2i::operator!=(const Vector2i &p_vec2) const {
-	return x != p_vec2.x || y != p_vec2.y;
-}
-
-Vector2i::operator String() const {
-	return "(" + itos(x) + ", " + itos(y) + ")";
+Vector2::operator Vector2i() const {
+	return Vector2i(x, y);
 }
diff --git a/core/math/vector2.h b/core/math/vector2.h
index af40b9e68d..9edaaebf89 100644
--- a/core/math/vector2.h
+++ b/core/math/vector2.h
@@ -32,8 +32,8 @@
 #define VECTOR2_H
 
 #include "core/math/math_funcs.h"
-#include "core/string/ustring.h"
 
+class String;
 struct Vector2i;
 
 struct _NO_DISCARD_ Vector2 {
@@ -167,6 +167,7 @@ struct _NO_DISCARD_ Vector2 {
 	real_t aspect() const { return width / height; }
 
 	operator String() const;
+	operator Vector2i() const;
 
 	_FORCE_INLINE_ Vector2() {}
 	_FORCE_INLINE_ Vector2(const real_t p_x, const real_t p_y) {
@@ -282,113 +283,4 @@ Vector2 Vector2::direction_to(const Vector2 &p_to) const {
 typedef Vector2 Size2;
 typedef Vector2 Point2;
 
-/* INTEGER STUFF */
-
-struct _NO_DISCARD_ Vector2i {
-	enum Axis {
-		AXIS_X,
-		AXIS_Y,
-	};
-
-	union {
-		int32_t x = 0;
-		int32_t width;
-	};
-	union {
-		int32_t y = 0;
-		int32_t height;
-	};
-
-	_FORCE_INLINE_ int32_t &operator[](int p_idx) {
-		return p_idx ? y : x;
-	}
-	_FORCE_INLINE_ const int32_t &operator[](int p_idx) const {
-		return p_idx ? y : x;
-	}
-
-	_FORCE_INLINE_ Vector2i::Axis min_axis_index() const {
-		return x < y ? Vector2i::AXIS_X : Vector2i::AXIS_Y;
-	}
-
-	_FORCE_INLINE_ Vector2i::Axis max_axis_index() const {
-		return x < y ? Vector2i::AXIS_Y : Vector2i::AXIS_X;
-	}
-
-	Vector2i min(const Vector2i &p_vector2i) const {
-		return Vector2(MIN(x, p_vector2i.x), MIN(y, p_vector2i.y));
-	}
-
-	Vector2i max(const Vector2i &p_vector2i) const {
-		return Vector2(MAX(x, p_vector2i.x), MAX(y, p_vector2i.y));
-	}
-
-	Vector2i operator+(const Vector2i &p_v) const;
-	void operator+=(const Vector2i &p_v);
-	Vector2i operator-(const Vector2i &p_v) const;
-	void operator-=(const Vector2i &p_v);
-	Vector2i operator*(const Vector2i &p_v1) const;
-
-	Vector2i operator*(const int32_t &rvalue) const;
-	void operator*=(const int32_t &rvalue);
-
-	Vector2i operator/(const Vector2i &p_v1) const;
-	Vector2i operator/(const int32_t &rvalue) const;
-	void operator/=(const int32_t &rvalue);
-
-	Vector2i operator%(const Vector2i &p_v1) const;
-	Vector2i operator%(const int32_t &rvalue) const;
-	void operator%=(const int32_t &rvalue);
-
-	Vector2i operator-() const;
-	bool operator<(const Vector2i &p_vec2) const { return (x == p_vec2.x) ? (y < p_vec2.y) : (x < p_vec2.x); }
-	bool operator>(const Vector2i &p_vec2) const { return (x == p_vec2.x) ? (y > p_vec2.y) : (x > p_vec2.x); }
-
-	bool operator<=(const Vector2i &p_vec2) const { return x == p_vec2.x ? (y <= p_vec2.y) : (x < p_vec2.x); }
-	bool operator>=(const Vector2i &p_vec2) const { return x == p_vec2.x ? (y >= p_vec2.y) : (x > p_vec2.x); }
-
-	bool operator==(const Vector2i &p_vec2) const;
-	bool operator!=(const Vector2i &p_vec2) const;
-
-	int64_t length_squared() const;
-	double length() const;
-
-	real_t aspect() const { return width / (real_t)height; }
-	Vector2i sign() const { return Vector2i(SIGN(x), SIGN(y)); }
-	Vector2i abs() const { return Vector2i(ABS(x), ABS(y)); }
-	Vector2i clamp(const Vector2i &p_min, const Vector2i &p_max) const;
-
-	operator String() const;
-
-	operator Vector2() const { return Vector2(x, y); }
-
-	inline Vector2i() {}
-	inline Vector2i(const Vector2 &p_vec2) {
-		x = (int32_t)p_vec2.x;
-		y = (int32_t)p_vec2.y;
-	}
-	inline Vector2i(const int32_t p_x, const int32_t p_y) {
-		x = p_x;
-		y = p_y;
-	}
-};
-
-_FORCE_INLINE_ Vector2i operator*(const int32_t &p_scalar, const Vector2i &p_vector) {
-	return p_vector * p_scalar;
-}
-
-_FORCE_INLINE_ Vector2i operator*(const int64_t &p_scalar, const Vector2i &p_vector) {
-	return p_vector * p_scalar;
-}
-
-_FORCE_INLINE_ Vector2i operator*(const float &p_scalar, const Vector2i &p_vector) {
-	return p_vector * p_scalar;
-}
-
-_FORCE_INLINE_ Vector2i operator*(const double &p_scalar, const Vector2i &p_vector) {
-	return p_vector * p_scalar;
-}
-
-typedef Vector2i Size2i;
-typedef Vector2i Point2i;
-
 #endif // VECTOR2_H
diff --git a/core/math/vector2i.cpp b/core/math/vector2i.cpp
new file mode 100644
index 0000000000..dfed42e4d6
--- /dev/null
+++ b/core/math/vector2i.cpp
@@ -0,0 +1,125 @@
+/*************************************************************************/
+/*  vector2i.cpp                                                         */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "vector2i.h"
+
+#include "core/math/vector2.h"
+#include "core/string/ustring.h"
+
+Vector2i Vector2i::clamp(const Vector2i &p_min, const Vector2i &p_max) const {
+	return Vector2i(
+			CLAMP(x, p_min.x, p_max.x),
+			CLAMP(y, p_min.y, p_max.y));
+}
+
+int64_t Vector2i::length_squared() const {
+	return x * (int64_t)x + y * (int64_t)y;
+}
+
+double Vector2i::length() const {
+	return Math::sqrt((double)length_squared());
+}
+
+Vector2i Vector2i::operator+(const Vector2i &p_v) const {
+	return Vector2i(x + p_v.x, y + p_v.y);
+}
+
+void Vector2i::operator+=(const Vector2i &p_v) {
+	x += p_v.x;
+	y += p_v.y;
+}
+
+Vector2i Vector2i::operator-(const Vector2i &p_v) const {
+	return Vector2i(x - p_v.x, y - p_v.y);
+}
+
+void Vector2i::operator-=(const Vector2i &p_v) {
+	x -= p_v.x;
+	y -= p_v.y;
+}
+
+Vector2i Vector2i::operator*(const Vector2i &p_v1) const {
+	return Vector2i(x * p_v1.x, y * p_v1.y);
+}
+
+Vector2i Vector2i::operator*(const int32_t &rvalue) const {
+	return Vector2i(x * rvalue, y * rvalue);
+}
+
+void Vector2i::operator*=(const int32_t &rvalue) {
+	x *= rvalue;
+	y *= rvalue;
+}
+
+Vector2i Vector2i::operator/(const Vector2i &p_v1) const {
+	return Vector2i(x / p_v1.x, y / p_v1.y);
+}
+
+Vector2i Vector2i::operator/(const int32_t &rvalue) const {
+	return Vector2i(x / rvalue, y / rvalue);
+}
+
+void Vector2i::operator/=(const int32_t &rvalue) {
+	x /= rvalue;
+	y /= rvalue;
+}
+
+Vector2i Vector2i::operator%(const Vector2i &p_v1) const {
+	return Vector2i(x % p_v1.x, y % p_v1.y);
+}
+
+Vector2i Vector2i::operator%(const int32_t &rvalue) const {
+	return Vector2i(x % rvalue, y % rvalue);
+}
+
+void Vector2i::operator%=(const int32_t &rvalue) {
+	x %= rvalue;
+	y %= rvalue;
+}
+
+Vector2i Vector2i::operator-() const {
+	return Vector2i(-x, -y);
+}
+
+bool Vector2i::operator==(const Vector2i &p_vec2) const {
+	return x == p_vec2.x && y == p_vec2.y;
+}
+
+bool Vector2i::operator!=(const Vector2i &p_vec2) const {
+	return x != p_vec2.x || y != p_vec2.y;
+}
+
+Vector2i::operator String() const {
+	return "(" + itos(x) + ", " + itos(y) + ")";
+}
+
+Vector2i::operator Vector2() const {
+	return Vector2((int32_t)x, (int32_t)y);
+}
diff --git a/core/math/vector2i.h b/core/math/vector2i.h
new file mode 100644
index 0000000000..446e05f5dd
--- /dev/null
+++ b/core/math/vector2i.h
@@ -0,0 +1,141 @@
+/*************************************************************************/
+/*  vector2i.h                                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef VECTOR2I_H
+#define VECTOR2I_H
+
+#include "core/math/math_funcs.h"
+
+class String;
+struct Vector2;
+
+struct _NO_DISCARD_ Vector2i {
+	enum Axis {
+		AXIS_X,
+		AXIS_Y,
+	};
+
+	union {
+		int32_t x = 0;
+		int32_t width;
+	};
+	union {
+		int32_t y = 0;
+		int32_t height;
+	};
+
+	_FORCE_INLINE_ int32_t &operator[](int p_idx) {
+		return p_idx ? y : x;
+	}
+	_FORCE_INLINE_ const int32_t &operator[](int p_idx) const {
+		return p_idx ? y : x;
+	}
+
+	_FORCE_INLINE_ Vector2i::Axis min_axis_index() const {
+		return x < y ? Vector2i::AXIS_X : Vector2i::AXIS_Y;
+	}
+
+	_FORCE_INLINE_ Vector2i::Axis max_axis_index() const {
+		return x < y ? Vector2i::AXIS_Y : Vector2i::AXIS_X;
+	}
+
+	Vector2i min(const Vector2i &p_vector2i) const {
+		return Vector2i(MIN(x, p_vector2i.x), MIN(y, p_vector2i.y));
+	}
+
+	Vector2i max(const Vector2i &p_vector2i) const {
+		return Vector2i(MAX(x, p_vector2i.x), MAX(y, p_vector2i.y));
+	}
+
+	Vector2i operator+(const Vector2i &p_v) const;
+	void operator+=(const Vector2i &p_v);
+	Vector2i operator-(const Vector2i &p_v) const;
+	void operator-=(const Vector2i &p_v);
+	Vector2i operator*(const Vector2i &p_v1) const;
+
+	Vector2i operator*(const int32_t &rvalue) const;
+	void operator*=(const int32_t &rvalue);
+
+	Vector2i operator/(const Vector2i &p_v1) const;
+	Vector2i operator/(const int32_t &rvalue) const;
+	void operator/=(const int32_t &rvalue);
+
+	Vector2i operator%(const Vector2i &p_v1) const;
+	Vector2i operator%(const int32_t &rvalue) const;
+	void operator%=(const int32_t &rvalue);
+
+	Vector2i operator-() const;
+	bool operator<(const Vector2i &p_vec2) const { return (x == p_vec2.x) ? (y < p_vec2.y) : (x < p_vec2.x); }
+	bool operator>(const Vector2i &p_vec2) const { return (x == p_vec2.x) ? (y > p_vec2.y) : (x > p_vec2.x); }
+
+	bool operator<=(const Vector2i &p_vec2) const { return x == p_vec2.x ? (y <= p_vec2.y) : (x < p_vec2.x); }
+	bool operator>=(const Vector2i &p_vec2) const { return x == p_vec2.x ? (y >= p_vec2.y) : (x > p_vec2.x); }
+
+	bool operator==(const Vector2i &p_vec2) const;
+	bool operator!=(const Vector2i &p_vec2) const;
+
+	int64_t length_squared() const;
+	double length() const;
+
+	real_t aspect() const { return width / (real_t)height; }
+	Vector2i sign() const { return Vector2i(SIGN(x), SIGN(y)); }
+	Vector2i abs() const { return Vector2i(ABS(x), ABS(y)); }
+	Vector2i clamp(const Vector2i &p_min, const Vector2i &p_max) const;
+
+	operator String() const;
+	operator Vector2() const;
+
+	inline Vector2i() {}
+	inline Vector2i(const int32_t p_x, const int32_t p_y) {
+		x = p_x;
+		y = p_y;
+	}
+};
+
+_FORCE_INLINE_ Vector2i operator*(const int32_t &p_scalar, const Vector2i &p_vector) {
+	return p_vector * p_scalar;
+}
+
+_FORCE_INLINE_ Vector2i operator*(const int64_t &p_scalar, const Vector2i &p_vector) {
+	return p_vector * p_scalar;
+}
+
+_FORCE_INLINE_ Vector2i operator*(const float &p_scalar, const Vector2i &p_vector) {
+	return p_vector * p_scalar;
+}
+
+_FORCE_INLINE_ Vector2i operator*(const double &p_scalar, const Vector2i &p_vector) {
+	return p_vector * p_scalar;
+}
+
+typedef Vector2i Size2i;
+typedef Vector2i Point2i;
+
+#endif // VECTOR2I_H
diff --git a/core/math/vector3.h b/core/math/vector3.h
index b62edef40f..79ba5c4f15 100644
--- a/core/math/vector3.h
+++ b/core/math/vector3.h
@@ -35,7 +35,8 @@
 #include "core/math/vector2.h"
 #include "core/math/vector3i.h"
 #include "core/string/ustring.h"
-class Basis;
+
+struct Basis;
 
 struct _NO_DISCARD_ Vector3 {
 	static const int AXIS_COUNT = 3;
diff --git a/core/multiplayer/multiplayer_api.cpp b/core/multiplayer/multiplayer_api.cpp
index 627825246a..41d6d14696 100644
--- a/core/multiplayer/multiplayer_api.cpp
+++ b/core/multiplayer/multiplayer_api.cpp
@@ -32,7 +32,6 @@
 
 #include "core/debugger/engine_debugger.h"
 #include "core/io/marshalls.h"
-#include "core/multiplayer/multiplayer_replicator.h"
 #include "core/multiplayer/rpc_manager.h"
 #include "scene/main/node.h"
 
@@ -42,6 +41,8 @@
 #include "core/os/os.h"
 #endif
 
+MultiplayerReplicationInterface *(*MultiplayerAPI::create_default_replication_interface)(MultiplayerAPI *p_multiplayer) = nullptr;
+
 #ifdef DEBUG_ENABLED
 void MultiplayerAPI::profile_bandwidth(const String &p_inout, int p_size) {
 	if (EngineDebugger::is_profiling("multiplayer")) {
@@ -74,7 +75,7 @@ void MultiplayerAPI::poll() {
 		Error err = multiplayer_peer->get_packet(&packet, len);
 		if (err != OK) {
 			ERR_PRINT("Error getting packet!");
-			break; // Something is wrong!
+			return; // Something is wrong!
 		}
 
 		remote_sender_id = sender;
@@ -82,16 +83,13 @@ void MultiplayerAPI::poll() {
 		remote_sender_id = 0;
 
 		if (!multiplayer_peer.is_valid()) {
-			break; // It's also possible that a packet or RPC caused a disconnection, so also check here.
+			return; // It's also possible that a packet or RPC caused a disconnection, so also check here.
 		}
 	}
-	if (multiplayer_peer.is_valid() && multiplayer_peer->get_connection_status() == MultiplayerPeer::CONNECTION_CONNECTED) {
-		replicator->poll();
-	}
+	replicator->on_network_process();
 }
 
 void MultiplayerAPI::clear() {
-	replicator->clear();
 	connected_peers.clear();
 	path_get_cache.clear();
 	path_send_cache.clear();
@@ -133,6 +131,7 @@ void MultiplayerAPI::set_multiplayer_peer(const Ref<MultiplayerPeer> &p_peer) {
 		multiplayer_peer->connect("connection_failed", callable_mp(this, &MultiplayerAPI::_connection_failed));
 		multiplayer_peer->connect("server_disconnected", callable_mp(this, &MultiplayerAPI::_server_disconnected));
 	}
+	replicator->on_reset();
 }
 
 Ref<MultiplayerPeer> MultiplayerAPI::get_multiplayer_peer() const {
@@ -167,13 +166,13 @@ void MultiplayerAPI::_process_packet(int p_from, const uint8_t *p_packet, int p_
 			_process_raw(p_from, p_packet, p_packet_len);
 		} break;
 		case NETWORK_COMMAND_SPAWN: {
-			replicator->process_spawn_despawn(p_from, p_packet, p_packet_len, true);
+			replicator->on_spawn_receive(p_from, p_packet, p_packet_len);
 		} break;
 		case NETWORK_COMMAND_DESPAWN: {
-			replicator->process_spawn_despawn(p_from, p_packet, p_packet_len, false);
+			replicator->on_despawn_receive(p_from, p_packet, p_packet_len);
 		} break;
 		case NETWORK_COMMAND_SYNC: {
-			replicator->process_sync(p_from, p_packet, p_packet_len);
+			replicator->on_sync_receive(p_from, p_packet, p_packet_len);
 		} break;
 	}
 }
@@ -324,7 +323,7 @@ bool MultiplayerAPI::_send_confirm_path(Node *p_node, NodePath p_path, PathSentC
 #define ENCODE_16 1 << 5
 #define ENCODE_32 2 << 5
 #define ENCODE_64 3 << 5
-Error MultiplayerAPI::encode_and_compress_variant(const Variant &p_variant, uint8_t *r_buffer, int &r_len) {
+Error MultiplayerAPI::encode_and_compress_variant(const Variant &p_variant, uint8_t *r_buffer, int &r_len, bool p_allow_object_decoding) {
 	// Unreachable because `VARIANT_MAX` == 27 and `ENCODE_VARIANT_MASK` == 31
 	CRASH_COND(p_variant.get_type() > VARIANT_META_TYPE_MASK);
 
@@ -385,7 +384,7 @@ Error MultiplayerAPI::encode_and_compress_variant(const Variant &p_variant, uint
 		} break;
 		default:
 			// Any other case is not yet compressed.
-			Error err = encode_variant(p_variant, r_buffer, r_len, allow_object_decoding);
+			Error err = encode_variant(p_variant, r_buffer, r_len, p_allow_object_decoding);
 			if (err != OK) {
 				return err;
 			}
@@ -399,7 +398,7 @@ Error MultiplayerAPI::encode_and_compress_variant(const Variant &p_variant, uint
 	return OK;
 }
 
-Error MultiplayerAPI::decode_and_decompress_variant(Variant &r_variant, const uint8_t *p_buffer, int p_len, int *r_len) {
+Error MultiplayerAPI::decode_and_decompress_variant(Variant &r_variant, const uint8_t *p_buffer, int p_len, int *r_len, bool p_allow_object_decoding) {
 	const uint8_t *buf = p_buffer;
 	int len = p_len;
 
@@ -458,7 +457,7 @@ Error MultiplayerAPI::decode_and_decompress_variant(Variant &r_variant, const ui
 			}
 		} break;
 		default:
-			Error err = decode_variant(r_variant, p_buffer, p_len, r_len, allow_object_decoding);
+			Error err = decode_variant(r_variant, p_buffer, p_len, r_len, p_allow_object_decoding);
 			if (err != OK) {
 				return err;
 			}
@@ -467,17 +466,84 @@ Error MultiplayerAPI::decode_and_decompress_variant(Variant &r_variant, const ui
 	return OK;
 }
 
+Error MultiplayerAPI::encode_and_compress_variants(const Variant **p_variants, int p_count, uint8_t *p_buffer, int &r_len, bool *r_raw, bool p_allow_object_decoding) {
+	r_len = 0;
+	int size = 0;
+
+	if (p_count == 0) {
+		if (r_raw) {
+			*r_raw = true;
+		}
+		return OK;
+	}
+
+	// Try raw encoding optimization.
+	if (r_raw && p_count == 1) {
+		*r_raw = false;
+		const Variant &v = *(p_variants[0]);
+		if (v.get_type() == Variant::PACKED_BYTE_ARRAY) {
+			*r_raw = true;
+			const PackedByteArray pba = v;
+			if (p_buffer) {
+				memcpy(p_buffer, pba.ptr(), pba.size());
+			}
+			r_len += pba.size();
+		} else {
+			encode_and_compress_variant(v, p_buffer, size, p_allow_object_decoding);
+			r_len += size;
+		}
+		return OK;
+	}
+
+	// Regular encoding.
+	for (int i = 0; i < p_count; i++) {
+		const Variant &v = *(p_variants[i]);
+		encode_and_compress_variant(v, p_buffer ? p_buffer + r_len : nullptr, size, p_allow_object_decoding);
+		r_len += size;
+	}
+	return OK;
+}
+
+Error MultiplayerAPI::decode_and_decompress_variants(Vector<Variant> &r_variants, const uint8_t *p_buffer, int p_len, int &r_len, bool p_raw, bool p_allow_object_decoding) {
+	r_len = 0;
+	int argc = r_variants.size();
+	if (argc == 0 && p_raw) {
+		return OK;
+	}
+	ERR_FAIL_COND_V(p_raw && argc != 1, ERR_INVALID_DATA);
+	if (p_raw) {
+		r_len = p_len;
+		PackedByteArray pba;
+		pba.resize(p_len);
+		memcpy(pba.ptrw(), p_buffer, p_len);
+		r_variants.write[0] = pba;
+		return OK;
+	}
+
+	Vector<Variant> args;
+	Vector<const Variant *> argp;
+	args.resize(argc);
+
+	for (int i = 0; i < argc; i++) {
+		ERR_FAIL_COND_V_MSG(r_len >= p_len, ERR_INVALID_DATA, "Invalid packet received. Size too small.");
+
+		int vlen;
+		Error err = MultiplayerAPI::decode_and_decompress_variant(r_variants.write[i], &p_buffer[r_len], p_len - r_len, &vlen, p_allow_object_decoding);
+		ERR_FAIL_COND_V_MSG(err != OK, err, "Invalid packet received. Unable to decode state variable.");
+		r_len += vlen;
+	}
+	return OK;
+}
+
 void MultiplayerAPI::_add_peer(int p_id) {
 	connected_peers.insert(p_id);
 	path_get_cache.insert(p_id, PathGetCache());
-	if (is_server()) {
-		replicator->spawn_all(p_id);
-	}
+	replicator->on_peer_change(p_id, true);
 	emit_signal(SNAME("peer_connected"), p_id);
 }
 
 void MultiplayerAPI::_del_peer(int p_id) {
-	connected_peers.erase(p_id);
+	replicator->on_peer_change(p_id, false);
 	// Cleanup get cache.
 	path_get_cache.erase(p_id);
 	// Cleanup sent cache.
@@ -488,6 +554,7 @@ void MultiplayerAPI::_del_peer(int p_id) {
 		PathSentCache *psc = path_send_cache.getptr(E);
 		psc->confirmed_peers.erase(p_id);
 	}
+	connected_peers.erase(p_id);
 	emit_signal(SNAME("peer_disconnected"), p_id);
 }
 
@@ -500,6 +567,7 @@ void MultiplayerAPI::_connection_failed() {
 }
 
 void MultiplayerAPI::_server_disconnected() {
+	replicator->on_reset();
 	emit_signal(SNAME("server_disconnected"));
 }
 
@@ -612,14 +680,26 @@ bool MultiplayerAPI::is_object_decoding_allowed() const {
 	return allow_object_decoding;
 }
 
-void MultiplayerAPI::scene_enter_exit_notify(const String &p_scene, Node *p_node, bool p_enter) {
-	replicator->scene_enter_exit_notify(p_scene, p_node, p_enter);
-}
-
 void MultiplayerAPI::rpcp(Node *p_node, int p_peer_id, const StringName &p_method, const Variant **p_arg, int p_argcount) {
 	rpc_manager->rpcp(p_node, p_peer_id, p_method, p_arg, p_argcount);
 }
 
+Error MultiplayerAPI::spawn(Object *p_object, Variant p_config) {
+	return replicator->on_spawn(p_object, p_config);
+}
+
+Error MultiplayerAPI::despawn(Object *p_object, Variant p_config) {
+	return replicator->on_despawn(p_object, p_config);
+}
+
+Error MultiplayerAPI::replication_start(Object *p_object, Variant p_config) {
+	return replicator->on_replication_start(p_object, p_config);
+}
+
+Error MultiplayerAPI::replication_stop(Object *p_object, Variant p_config) {
+	return replicator->on_replication_stop(p_object, p_config);
+}
+
 void MultiplayerAPI::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_root_node", "node"), &MultiplayerAPI::set_root_node);
 	ClassDB::bind_method(D_METHOD("get_root_node"), &MultiplayerAPI::get_root_node);
@@ -638,14 +718,12 @@ void MultiplayerAPI::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("is_refusing_new_connections"), &MultiplayerAPI::is_refusing_new_connections);
 	ClassDB::bind_method(D_METHOD("set_allow_object_decoding", "enable"), &MultiplayerAPI::set_allow_object_decoding);
 	ClassDB::bind_method(D_METHOD("is_object_decoding_allowed"), &MultiplayerAPI::is_object_decoding_allowed);
-	ClassDB::bind_method(D_METHOD("get_replicator"), &MultiplayerAPI::get_replicator);
 
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "allow_object_decoding"), "set_allow_object_decoding", "is_object_decoding_allowed");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "refuse_new_connections"), "set_refuse_new_connections", "is_refusing_new_connections");
 	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "multiplayer_peer", PROPERTY_HINT_RESOURCE_TYPE, "MultiplayerPeer", PROPERTY_USAGE_NONE), "set_multiplayer_peer", "get_multiplayer_peer");
 	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "root_node", PROPERTY_HINT_RESOURCE_TYPE, "Node", PROPERTY_USAGE_NONE), "set_root_node", "get_root_node");
 	ADD_PROPERTY_DEFAULT("refuse_new_connections", false);
-	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "replicator", PROPERTY_HINT_RESOURCE_TYPE, "MultiplayerReplicator", PROPERTY_USAGE_NONE), "", "get_replicator");
 
 	ADD_SIGNAL(MethodInfo("peer_connected", PropertyInfo(Variant::INT, "id")));
 	ADD_SIGNAL(MethodInfo("peer_disconnected", PropertyInfo(Variant::INT, "id")));
@@ -656,13 +734,16 @@ void MultiplayerAPI::_bind_methods() {
 }
 
 MultiplayerAPI::MultiplayerAPI() {
-	replicator = memnew(MultiplayerReplicator(this));
+	if (create_default_replication_interface) {
+		replicator = Ref<MultiplayerReplicationInterface>(create_default_replication_interface(this));
+	} else {
+		replicator.instantiate();
+	}
 	rpc_manager = memnew(RPCManager(this));
 	clear();
 }
 
 MultiplayerAPI::~MultiplayerAPI() {
 	clear();
-	memdelete(replicator);
 	memdelete(rpc_manager);
 }
diff --git a/core/multiplayer/multiplayer_api.h b/core/multiplayer/multiplayer_api.h
index 713035428d..f4fdafc323 100644
--- a/core/multiplayer/multiplayer_api.h
+++ b/core/multiplayer/multiplayer_api.h
@@ -35,7 +35,26 @@
 #include "core/multiplayer/multiplayer_peer.h"
 #include "core/object/ref_counted.h"
 
-class MultiplayerReplicator;
+class MultiplayerAPI;
+
+class MultiplayerReplicationInterface : public RefCounted {
+	GDCLASS(MultiplayerReplicationInterface, RefCounted);
+
+public:
+	virtual void on_peer_change(int p_id, bool p_connected) {}
+	virtual void on_reset() {}
+	virtual Error on_spawn_receive(int p_from, const uint8_t *p_buffer, int p_buffer_len) { return ERR_UNAVAILABLE; }
+	virtual Error on_despawn_receive(int p_from, const uint8_t *p_buffer, int p_buffer_len) { return ERR_UNAVAILABLE; }
+	virtual Error on_sync_receive(int p_from, const uint8_t *p_buffer, int p_buffer_len) { return ERR_UNAVAILABLE; }
+	virtual Error on_spawn(Object *p_obj, Variant p_config) { return ERR_UNAVAILABLE; }
+	virtual Error on_despawn(Object *p_obj, Variant p_config) { return ERR_UNAVAILABLE; }
+	virtual Error on_replication_start(Object *p_obj, Variant p_config) { return ERR_UNAVAILABLE; }
+	virtual Error on_replication_stop(Object *p_obj, Variant p_config) { return ERR_UNAVAILABLE; }
+	virtual void on_network_process() {}
+
+	MultiplayerReplicationInterface() {}
+};
+
 class RPCManager;
 
 class MultiplayerAPI : public RefCounted {
@@ -95,7 +114,7 @@ private:
 	Node *root_node = nullptr;
 	bool allow_object_decoding = false;
 
-	MultiplayerReplicator *replicator = nullptr;
+	Ref<MultiplayerReplicationInterface> replicator;
 	RPCManager *rpc_manager = nullptr;
 
 protected:
@@ -108,6 +127,13 @@ protected:
 	void _process_raw(int p_from, const uint8_t *p_packet, int p_packet_len);
 
 public:
+	static MultiplayerReplicationInterface *(*create_default_replication_interface)(MultiplayerAPI *p_multiplayer);
+
+	static Error encode_and_compress_variant(const Variant &p_variant, uint8_t *p_buffer, int &r_len, bool p_allow_object_decoding);
+	static Error decode_and_decompress_variant(Variant &r_variant, const uint8_t *p_buffer, int p_len, int *r_len, bool p_allow_object_decoding);
+	static Error encode_and_compress_variants(const Variant **p_variants, int p_count, uint8_t *p_buffer, int &r_len, bool *r_raw = nullptr, bool p_allow_object_decoding = false);
+	static Error decode_and_decompress_variants(Vector<Variant> &r_variants, const uint8_t *p_buffer, int p_len, int &r_len, bool p_raw = false, bool p_allow_object_decoding = false);
+
 	void poll();
 	void clear();
 	void set_root_node(Node *p_node);
@@ -117,13 +143,13 @@ public:
 
 	Error send_bytes(Vector<uint8_t> p_data, int p_to = MultiplayerPeer::TARGET_PEER_BROADCAST, Multiplayer::TransferMode p_mode = Multiplayer::TRANSFER_MODE_RELIABLE, int p_channel = 0);
 
-	Error encode_and_compress_variant(const Variant &p_variant, uint8_t *p_buffer, int &r_len);
-	Error decode_and_decompress_variant(Variant &r_variant, const uint8_t *p_buffer, int p_len, int *r_len);
-
 	// Called by Node.rpc
 	void rpcp(Node *p_node, int p_peer_id, const StringName &p_method, const Variant **p_arg, int p_argcount);
-	// Called by Node._notification
-	void scene_enter_exit_notify(const String &p_scene, Node *p_node, bool p_enter);
+	// Replication API
+	Error spawn(Object *p_object, Variant p_config);
+	Error despawn(Object *p_object, Variant p_config);
+	Error replication_start(Object *p_object, Variant p_config);
+	Error replication_stop(Object *p_object, Variant p_config);
 	// Called by replicator
 	bool send_confirm_path(Node *p_node, NodePath p_path, int p_target, int &p_id);
 	Node *get_cached_node(int p_from, uint32_t p_node_id);
@@ -148,7 +174,6 @@ public:
 	void set_allow_object_decoding(bool p_enable);
 	bool is_object_decoding_allowed() const;
 
-	MultiplayerReplicator *get_replicator() const { return replicator; }
 	RPCManager *get_rpc_manager() const { return rpc_manager; }
 
 #ifdef DEBUG_ENABLED
diff --git a/core/multiplayer/multiplayer_replicator.cpp b/core/multiplayer/multiplayer_replicator.cpp
deleted file mode 100644
index e7de8219c7..0000000000
--- a/core/multiplayer/multiplayer_replicator.cpp
+++ /dev/null
@@ -1,791 +0,0 @@
-/*************************************************************************/
-/*  multiplayer_replicator.cpp                                           */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#include "core/multiplayer/multiplayer_replicator.h"
-
-#include "core/io/marshalls.h"
-#include "scene/main/node.h"
-#include "scene/resources/packed_scene.h"
-
-#define MAKE_ROOM(m_amount)             \
-	if (packet_cache.size() < m_amount) \
-		packet_cache.resize(m_amount);
-
-Error MultiplayerReplicator::_sync_all_default(const ResourceUID::ID &p_scene_id, int p_peer) {
-	ERR_FAIL_COND_V(!replications.has(p_scene_id), ERR_INVALID_PARAMETER);
-	SceneConfig &cfg = replications[p_scene_id];
-	int full_size = 0;
-	bool same_size = true;
-	int last_size = 0;
-	bool all_raw = true;
-	struct EncodeInfo {
-		int size = 0;
-		bool raw = false;
-		List<Variant> state;
-	};
-	Map<ObjectID, struct EncodeInfo> state;
-	if (tracked_objects.has(p_scene_id)) {
-		for (const ObjectID &obj_id : tracked_objects[p_scene_id]) {
-			Object *obj = ObjectDB::get_instance(obj_id);
-			if (obj) {
-				struct EncodeInfo info;
-				Error err = _get_state(cfg.sync_properties, obj, info.state);
-				ERR_CONTINUE(err);
-				err = _encode_state(info.state, nullptr, info.size, &info.raw);
-				ERR_CONTINUE(err);
-				state[obj_id] = info;
-				full_size += info.size;
-				if (last_size && info.size != last_size) {
-					same_size = false;
-				}
-				all_raw = all_raw && info.raw;
-				last_size = info.size;
-			}
-		}
-	}
-	// Default implementation do not send empty updates.
-	if (!full_size) {
-		return OK;
-	}
-#ifdef DEBUG_ENABLED
-	if (full_size > 4096 && cfg.sync_interval) {
-		WARN_PRINT_ONCE(vformat("The timed state update for scene %d is big (%d bytes) consider optimizing it", p_scene_id));
-	}
-#endif
-	if (same_size) {
-		// This is fast and small. Should we allow more than 256 objects per type?
-		// This costs us 1 byte.
-		MAKE_ROOM(SYNC_CMD_OFFSET + 1 + 2 + 2 + full_size);
-	} else {
-		MAKE_ROOM(SYNC_CMD_OFFSET + 1 + 2 + state.size() * 2 + full_size);
-	}
-	int ofs = 0;
-	uint8_t *ptr = packet_cache.ptrw();
-	ptr[0] = MultiplayerAPI::NETWORK_COMMAND_SYNC | (same_size ? BYTE_OR_ZERO_FLAG : 0);
-	ofs = 1;
-	ofs += encode_uint64(p_scene_id, &ptr[ofs]);
-	ptr[ofs] = cfg.sync_recv++;
-	ofs += 1;
-	ofs += encode_uint16(state.size(), &ptr[ofs]);
-	if (same_size) {
-		ofs += encode_uint16(last_size + (all_raw ? 1 << 15 : 0), &ptr[ofs]);
-	}
-	for (const ObjectID &obj_id : tracked_objects[p_scene_id]) {
-		if (!state.has(obj_id)) {
-			continue;
-		}
-		struct EncodeInfo &info = state[obj_id];
-		Object *obj = ObjectDB::get_instance(obj_id);
-		ERR_CONTINUE(!obj);
-		int size = 0;
-		if (!same_size) {
-			// We need to encode the size of every object.
-			ofs += encode_uint16(info.size + (info.raw ? 1 << 15 : 0), &ptr[ofs]);
-		}
-		Error err = _encode_state(info.state, &ptr[ofs], size, &info.raw);
-		ERR_CONTINUE(err);
-		ofs += size;
-	}
-	Ref<MultiplayerPeer> peer = multiplayer->get_multiplayer_peer();
-	peer->set_target_peer(p_peer);
-	peer->set_transfer_channel(0);
-	peer->set_transfer_mode(Multiplayer::TRANSFER_MODE_UNRELIABLE);
-	return peer->put_packet(ptr, ofs);
-}
-
-void MultiplayerReplicator::_process_default_sync(const ResourceUID::ID &p_id, const uint8_t *p_packet, int p_packet_len) {
-	ERR_FAIL_COND_MSG(p_packet_len < SYNC_CMD_OFFSET + 5, "Invalid spawn packet received");
-	ERR_FAIL_COND_MSG(!replications.has(p_id), "Invalid spawn ID received " + itos(p_id));
-	SceneConfig &cfg = replications[p_id];
-	ERR_FAIL_COND_MSG(cfg.mode != REPLICATION_MODE_SERVER || multiplayer->is_server(), "The default implementation only allows sync packets from the server");
-	const bool same_size = p_packet[0] & BYTE_OR_ZERO_FLAG;
-	int ofs = SYNC_CMD_OFFSET;
-	int time = p_packet[ofs];
-	// Skip old update.
-	if (time < cfg.sync_recv && cfg.sync_recv - time < 127) {
-		return;
-	}
-	cfg.sync_recv = time;
-	ofs += 1;
-	int count = decode_uint16(&p_packet[ofs]);
-	ofs += 2;
-#ifdef DEBUG_ENABLED
-	ERR_FAIL_COND(!tracked_objects.has(p_id) || tracked_objects[p_id].size() != count);
-#else
-	if (!tracked_objects.has(p_id) || tracked_objects[p_id].size() != count) {
-		return;
-	}
-#endif
-	int data_size = 0;
-	bool raw = false;
-	if (same_size) {
-		// This is fast and optimized.
-		data_size = decode_uint16(&p_packet[ofs]);
-		raw = (data_size & (1 << 15)) != 0;
-		data_size = data_size & ~(1 << 15);
-		ofs += 2;
-		ERR_FAIL_COND(p_packet_len - ofs < data_size * count);
-	}
-	for (const ObjectID &obj_id : tracked_objects[p_id]) {
-		Object *obj = ObjectDB::get_instance(obj_id);
-		ERR_CONTINUE(!obj);
-		if (!same_size) {
-			// This is slow and wasteful.
-			data_size = decode_uint16(&p_packet[ofs]);
-			raw = (data_size & (1 << 15)) != 0;
-			data_size = data_size & ~(1 << 15);
-			ofs += 2;
-			ERR_FAIL_COND(p_packet_len - ofs < data_size);
-		}
-		int size = 0;
-		Error err = _decode_state(cfg.sync_properties, obj, &p_packet[ofs], data_size, size, raw);
-		ofs += data_size;
-		ERR_CONTINUE(err);
-		ERR_CONTINUE(size != data_size);
-	}
-}
-
-Error MultiplayerReplicator::_send_default_spawn_despawn(int p_peer_id, const ResourceUID::ID &p_scene_id, Object *p_obj, const NodePath &p_path, bool p_spawn) {
-	ERR_FAIL_COND_V(p_spawn && !p_obj, ERR_INVALID_PARAMETER);
-	ERR_FAIL_COND_V(!replications.has(p_scene_id), ERR_INVALID_PARAMETER);
-	Error err;
-	// Prepare state
-	List<Variant> state_variants;
-	int state_len = 0;
-	const SceneConfig &cfg = replications[p_scene_id];
-	if (p_spawn) {
-		if ((err = _get_state(cfg.properties, p_obj, state_variants)) != OK) {
-			return err;
-		}
-	}
-
-	bool is_raw = false;
-	if (state_variants.size() == 1 && state_variants[0].get_type() == Variant::PACKED_BYTE_ARRAY) {
-		is_raw = true;
-		const PackedByteArray pba = state_variants[0];
-		state_len = pba.size();
-	} else if (state_variants.size()) {
-		err = _encode_state(state_variants, nullptr, state_len);
-		ERR_FAIL_COND_V(err, err);
-	} else {
-		is_raw = true;
-	}
-
-	int ofs = 0;
-
-	// Prepare simplified path
-	const Node *root_node = multiplayer->get_root_node();
-	ERR_FAIL_COND_V(!root_node, ERR_UNCONFIGURED);
-	NodePath rel_path = (root_node->get_path()).rel_path_to(p_path);
-	const Vector<StringName> names = rel_path.get_names();
-	ERR_FAIL_COND_V(names.size() < 2, ERR_INVALID_PARAMETER);
-
-	NodePath parent = NodePath(names.slice(0, names.size() - 1), false);
-	ERR_FAIL_COND_V_MSG(!root_node->has_node(parent), ERR_INVALID_PARAMETER, "Path not found: " + parent);
-
-	int path_id = 0;
-	multiplayer->send_confirm_path(root_node->get_node(parent), parent, p_peer_id, path_id);
-
-	// Encode name and parent ID.
-	CharString cname = String(names[names.size() - 1]).utf8();
-	int nlen = encode_cstring(cname.get_data(), nullptr);
-	MAKE_ROOM(SPAWN_CMD_OFFSET + 4 + 4 + nlen + state_len);
-	uint8_t *ptr = packet_cache.ptrw();
-	ptr[0] = (p_spawn ? MultiplayerAPI::NETWORK_COMMAND_SPAWN : MultiplayerAPI::NETWORK_COMMAND_DESPAWN) | (is_raw ? BYTE_OR_ZERO_FLAG : 0);
-	ofs = 1;
-	ofs += encode_uint64(p_scene_id, &ptr[ofs]);
-	ofs += encode_uint32(path_id, &ptr[ofs]);
-	ofs += encode_uint32(nlen, &ptr[ofs]);
-	ofs += encode_cstring(cname.get_data(), &ptr[ofs]);
-
-	// Encode state.
-	if (!is_raw) {
-		_encode_state(state_variants, &ptr[ofs], state_len);
-	} else if (state_len) {
-		PackedByteArray pba = state_variants[0];
-		memcpy(&ptr[ofs], pba.ptr(), state_len);
-	}
-
-	Ref<MultiplayerPeer> peer = multiplayer->get_multiplayer_peer();
-	peer->set_target_peer(p_peer_id);
-	peer->set_transfer_channel(0);
-	peer->set_transfer_mode(Multiplayer::TRANSFER_MODE_RELIABLE);
-	return peer->put_packet(ptr, ofs + state_len);
-}
-
-void MultiplayerReplicator::_process_default_spawn_despawn(int p_from, const ResourceUID::ID &p_scene_id, const uint8_t *p_packet, int p_packet_len, bool p_spawn) {
-	ERR_FAIL_COND_MSG(p_packet_len < SPAWN_CMD_OFFSET + 9, "Invalid spawn packet received");
-	int ofs = SPAWN_CMD_OFFSET;
-	uint32_t node_target = decode_uint32(&p_packet[ofs]);
-	Node *parent = multiplayer->get_cached_node(p_from, node_target);
-	ofs += 4;
-	ERR_FAIL_COND_MSG(parent == nullptr, "Invalid packet received. Requested node was not found.");
-
-	uint32_t name_len = decode_uint32(&p_packet[ofs]);
-	ofs += 4;
-	ERR_FAIL_COND_MSG(name_len > uint32_t(p_packet_len - ofs), vformat("Invalid spawn packet size: %d, wants: %d", p_packet_len, ofs + name_len));
-	ERR_FAIL_COND_MSG(name_len < 1, "Zero spawn name size.");
-
-	const String name = String::utf8((const char *)&p_packet[ofs], name_len);
-	// We need to make sure no trickery happens here (e.g. despawning a subpath), but we want to allow autogenerated ("@") node names.
-	ERR_FAIL_COND_MSG(name.validate_node_name() != name.replace("@", ""), vformat("Invalid node name received: '%s'", name));
-	ofs += name_len;
-
-	const SceneConfig &cfg = replications[p_scene_id];
-	if (cfg.mode == REPLICATION_MODE_SERVER && p_from == 1) {
-		String scene_path = ResourceUID::get_singleton()->get_id_path(p_scene_id);
-		if (p_spawn) {
-			const bool is_raw = ((p_packet[0] & BYTE_OR_ZERO_FLAG) >> BYTE_OR_ZERO_SHIFT) == 1;
-
-			ERR_FAIL_COND_MSG(parent->has_node(name), vformat("Unable to spawn node. Node already exists: %s/%s", parent->get_path(), name));
-			RES res = ResourceLoader::load(scene_path);
-			ERR_FAIL_COND_MSG(!res.is_valid(), "Unable to load scene to spawn at path: " + scene_path);
-			PackedScene *scene = Object::cast_to<PackedScene>(res.ptr());
-			ERR_FAIL_COND(!scene);
-			Node *node = scene->instantiate();
-			ERR_FAIL_COND(!node);
-			replicated_nodes[node->get_instance_id()] = p_scene_id;
-			_track(p_scene_id, node);
-			int size;
-			_decode_state(cfg.properties, node, &p_packet[ofs], p_packet_len - ofs, size, is_raw);
-			parent->_add_child_nocheck(node, name);
-			emit_signal(SNAME("spawned"), p_scene_id, node);
-		} else {
-			ERR_FAIL_COND_MSG(!parent->has_node(name), vformat("Path not found: %s/%s", parent->get_path(), name));
-			Node *node = parent->get_node(name);
-			ERR_FAIL_COND_MSG(!replicated_nodes.has(node->get_instance_id()), vformat("Trying to despawn a Node that was not replicated: %s/%s", parent->get_path(), name));
-			emit_signal(SNAME("despawned"), p_scene_id, node);
-			_untrack(p_scene_id, node);
-			replicated_nodes.erase(node->get_instance_id());
-			node->queue_delete();
-		}
-	} else {
-		PackedByteArray data;
-		if (p_packet_len > ofs) {
-			data.resize(p_packet_len - ofs);
-			memcpy(data.ptrw(), &p_packet[ofs], data.size());
-		}
-		if (p_spawn) {
-			emit_signal(SNAME("spawn_requested"), p_from, p_scene_id, parent, name, data);
-		} else {
-			emit_signal(SNAME("despawn_requested"), p_from, p_scene_id, parent, name, data);
-		}
-	}
-}
-
-void MultiplayerReplicator::process_spawn_despawn(int p_from, const uint8_t *p_packet, int p_packet_len, bool p_spawn) {
-	ERR_FAIL_COND_MSG(p_packet_len < SPAWN_CMD_OFFSET, "Invalid spawn packet received");
-	ResourceUID::ID id = decode_uint64(&p_packet[1]);
-	ERR_FAIL_COND_MSG(!replications.has(id), "Invalid spawn ID received " + itos(id));
-
-	const SceneConfig &cfg = replications[id];
-	if (cfg.on_spawn_despawn_receive.is_valid()) {
-		int ofs = SPAWN_CMD_OFFSET;
-		bool is_raw = ((p_packet[0] & BYTE_OR_ZERO_FLAG) >> BYTE_OR_ZERO_SHIFT) == 1;
-		Variant data;
-		int left = p_packet_len - ofs;
-		if (is_raw && left) {
-			PackedByteArray pba;
-			pba.resize(left);
-			memcpy(pba.ptrw(), &p_packet[ofs], pba.size());
-			data = pba;
-		} else if (left) {
-			ERR_FAIL_COND(decode_variant(data, &p_packet[ofs], left) != OK);
-		}
-
-		Variant args[4];
-		args[0] = p_from;
-		args[1] = id;
-		args[2] = data;
-		args[3] = p_spawn;
-		const Variant *argp[] = { &args[0], &args[1], &args[2], &args[3] };
-		Callable::CallError ce;
-		Variant ret;
-		cfg.on_spawn_despawn_receive.call(argp, 4, ret, ce);
-		ERR_FAIL_COND_MSG(ce.error != Callable::CallError::CALL_OK, "Custom receive function failed");
-	} else {
-		_process_default_spawn_despawn(p_from, id, p_packet, p_packet_len, p_spawn);
-	}
-}
-
-void MultiplayerReplicator::process_sync(int p_from, const uint8_t *p_packet, int p_packet_len) {
-	ERR_FAIL_COND_MSG(p_packet_len < SPAWN_CMD_OFFSET, "Invalid spawn packet received");
-	ResourceUID::ID id = decode_uint64(&p_packet[1]);
-	ERR_FAIL_COND_MSG(!replications.has(id), "Invalid spawn ID received " + itos(id));
-	const SceneConfig &cfg = replications[id];
-	if (cfg.on_sync_receive.is_valid()) {
-		Array objs;
-		if (tracked_objects.has(id)) {
-			objs.resize(tracked_objects[id].size());
-			int idx = 0;
-			for (const ObjectID &obj_id : tracked_objects[id]) {
-				objs[idx++] = ObjectDB::get_instance(obj_id);
-			}
-		}
-		PackedByteArray pba;
-		pba.resize(p_packet_len - SYNC_CMD_OFFSET);
-		if (pba.size()) {
-			memcpy(pba.ptrw(), p_packet + SYNC_CMD_OFFSET, p_packet_len - SYNC_CMD_OFFSET);
-		}
-		Variant args[4] = { p_from, id, objs, pba };
-		Variant *argp[4] = { args, &args[1], &args[2], &args[3] };
-		Callable::CallError ce;
-		Variant ret;
-		cfg.on_sync_receive.call((const Variant **)argp, 4, ret, ce);
-		ERR_FAIL_COND_MSG(ce.error != Callable::CallError::CALL_OK, "Custom sync function failed");
-	} else {
-		ERR_FAIL_COND_MSG(p_from != 1, "Default sync implementation only allow syncing from server to client");
-		_process_default_sync(id, p_packet, p_packet_len);
-	}
-}
-
-Error MultiplayerReplicator::_get_state(const List<StringName> &p_properties, const Object *p_obj, List<Variant> &r_variant) {
-	ERR_FAIL_COND_V_MSG(!p_obj, ERR_INVALID_PARAMETER, "Cannot encode null object");
-	for (const StringName &prop : p_properties) {
-		bool valid = false;
-		const Variant v = p_obj->get(prop, &valid);
-		ERR_FAIL_COND_V_MSG(!valid, ERR_INVALID_DATA, vformat("Property '%s' not found.", prop));
-		r_variant.push_back(v);
-	}
-	return OK;
-}
-
-Error MultiplayerReplicator::_encode_state(const List<Variant> &p_variants, uint8_t *p_buffer, int &r_len, bool *r_raw) {
-	r_len = 0;
-	int size = 0;
-
-	// Try raw encoding optimization.
-	if (r_raw && p_variants.size() == 1) {
-		*r_raw = false;
-		const Variant v = p_variants[0];
-		if (v.get_type() == Variant::PACKED_BYTE_ARRAY) {
-			*r_raw = true;
-			const PackedByteArray pba = v;
-			if (p_buffer) {
-				memcpy(p_buffer, pba.ptr(), pba.size());
-			}
-			r_len += pba.size();
-		} else {
-			multiplayer->encode_and_compress_variant(v, p_buffer, size);
-			r_len += size;
-		}
-		return OK;
-	}
-
-	// Regular encoding.
-	for (const Variant &v : p_variants) {
-		multiplayer->encode_and_compress_variant(v, p_buffer ? p_buffer + r_len : nullptr, size);
-		r_len += size;
-	}
-	return OK;
-}
-
-Error MultiplayerReplicator::_decode_state(const List<StringName> &p_properties, Object *p_obj, const uint8_t *p_buffer, int p_len, int &r_len, bool p_raw) {
-	r_len = 0;
-	int argc = p_properties.size();
-	if (argc == 0 && p_raw) {
-		ERR_FAIL_COND_V_MSG(p_len != 0, ERR_INVALID_DATA, "Buffer has trailing bytes.");
-		return OK;
-	}
-	ERR_FAIL_COND_V(p_raw && argc != 1, ERR_INVALID_DATA);
-	if (p_raw) {
-		r_len = p_len;
-		PackedByteArray pba;
-		pba.resize(p_len);
-		memcpy(pba.ptrw(), p_buffer, p_len);
-		p_obj->set(p_properties[0], pba);
-		return OK;
-	}
-
-	Vector<Variant> args;
-	Vector<const Variant *> argp;
-	args.resize(argc);
-
-	for (int i = 0; i < argc; i++) {
-		ERR_FAIL_COND_V_MSG(r_len >= p_len, ERR_INVALID_DATA, "Invalid packet received. Size too small.");
-
-		int vlen;
-		Error err = multiplayer->decode_and_decompress_variant(args.write[i], &p_buffer[r_len], p_len - r_len, &vlen);
-		ERR_FAIL_COND_V_MSG(err != OK, err, "Invalid packet received. Unable to decode state variable.");
-		r_len += vlen;
-	}
-	ERR_FAIL_COND_V_MSG(p_len - r_len != 0, ERR_INVALID_DATA, "Buffer has trailing bytes.");
-
-	int i = 0;
-	for (const StringName &prop : p_properties) {
-		p_obj->set(prop, args[i]);
-		i += 1;
-	}
-	return OK;
-}
-
-Error MultiplayerReplicator::spawn_config(const ResourceUID::ID &p_id, ReplicationMode p_mode, const TypedArray<StringName> &p_props, const Callable &p_on_send, const Callable &p_on_recv) {
-	ERR_FAIL_COND_V(p_mode < REPLICATION_MODE_NONE || p_mode > REPLICATION_MODE_CUSTOM, ERR_INVALID_PARAMETER);
-	ERR_FAIL_COND_V(!ResourceUID::get_singleton()->has_id(p_id), ERR_INVALID_PARAMETER);
-	ERR_FAIL_COND_V_MSG(p_on_send.is_valid() != p_on_recv.is_valid(), ERR_INVALID_PARAMETER, "Send and receive custom callables must be both valid or both empty");
-#ifdef TOOLS_ENABLED
-	if (!p_on_send.is_valid()) {
-		// We allow non scene spawning with custom callables.
-		String path = ResourceUID::get_singleton()->get_id_path(p_id);
-		RES res = ResourceLoader::load(path);
-		ERR_FAIL_COND_V(!res->is_class("PackedScene"), ERR_INVALID_PARAMETER);
-	}
-#endif
-	if (p_mode == REPLICATION_MODE_NONE) {
-		if (replications.has(p_id)) {
-			replications.erase(p_id);
-		}
-	} else {
-		SceneConfig cfg;
-		cfg.mode = p_mode;
-		for (int i = 0; i < p_props.size(); i++) {
-			cfg.properties.push_back(p_props[i]);
-		}
-		cfg.on_spawn_despawn_send = p_on_send;
-		cfg.on_spawn_despawn_receive = p_on_recv;
-		replications[p_id] = cfg;
-	}
-	return OK;
-}
-
-Error MultiplayerReplicator::sync_config(const ResourceUID::ID &p_id, uint64_t p_interval, const TypedArray<StringName> &p_props, const Callable &p_on_send, const Callable &p_on_recv) {
-	ERR_FAIL_COND_V(!ResourceUID::get_singleton()->has_id(p_id), ERR_INVALID_PARAMETER);
-	ERR_FAIL_COND_V_MSG(p_on_send.is_valid() != p_on_recv.is_valid(), ERR_INVALID_PARAMETER, "Send and receive custom callables must be both valid or both empty");
-	ERR_FAIL_COND_V(!replications.has(p_id), ERR_UNCONFIGURED);
-	SceneConfig &cfg = replications[p_id];
-	ERR_FAIL_COND_V_MSG(p_interval && cfg.mode != REPLICATION_MODE_SERVER && !p_on_send.is_valid(), ERR_INVALID_PARAMETER, "Timed updates in custom mode are only allowed if custom callbacks are also specified");
-	for (int i = 0; i < p_props.size(); i++) {
-		cfg.sync_properties.push_back(p_props[i]);
-	}
-	cfg.on_sync_send = p_on_send;
-	cfg.on_sync_receive = p_on_recv;
-	cfg.sync_interval = p_interval * 1000;
-	return OK;
-}
-
-Error MultiplayerReplicator::_send_spawn_despawn(int p_peer_id, const ResourceUID::ID &p_scene_id, const Variant &p_data, bool p_spawn) {
-	int data_size = 0;
-	int is_raw = false;
-	if (p_data.get_type() == Variant::PACKED_BYTE_ARRAY) {
-		const PackedByteArray pba = p_data;
-		is_raw = true;
-		data_size = p_data.operator PackedByteArray().size();
-	} else if (p_data.get_type() == Variant::NIL) {
-		is_raw = true;
-	} else {
-		Error err = encode_variant(p_data, nullptr, data_size);
-		ERR_FAIL_COND_V(err, err);
-	}
-	MAKE_ROOM(SPAWN_CMD_OFFSET + data_size);
-	uint8_t *ptr = packet_cache.ptrw();
-	ptr[0] = (p_spawn ? MultiplayerAPI::NETWORK_COMMAND_SPAWN : MultiplayerAPI::NETWORK_COMMAND_DESPAWN) + ((is_raw ? 1 : 0) << BYTE_OR_ZERO_SHIFT);
-	encode_uint64(p_scene_id, &ptr[1]);
-	if (p_data.get_type() == Variant::PACKED_BYTE_ARRAY) {
-		const PackedByteArray pba = p_data;
-		memcpy(&ptr[SPAWN_CMD_OFFSET], pba.ptr(), pba.size());
-	} else if (data_size) {
-		encode_variant(p_data, &ptr[SPAWN_CMD_OFFSET], data_size);
-	}
-	Ref<MultiplayerPeer> peer = multiplayer->get_multiplayer_peer();
-	peer->set_target_peer(p_peer_id);
-	peer->set_transfer_channel(0);
-	peer->set_transfer_mode(Multiplayer::TRANSFER_MODE_RELIABLE);
-	return peer->put_packet(ptr, SPAWN_CMD_OFFSET + data_size);
-}
-
-Error MultiplayerReplicator::send_despawn(int p_peer_id, const ResourceUID::ID &p_scene_id, const Variant &p_data, const NodePath &p_path) {
-	ERR_FAIL_COND_V(!multiplayer->has_multiplayer_peer(), ERR_UNCONFIGURED);
-	ERR_FAIL_COND_V_MSG(!replications.has(p_scene_id), ERR_INVALID_PARAMETER, vformat("Spawnable not found: %d", p_scene_id));
-	const SceneConfig &cfg = replications[p_scene_id];
-	if (cfg.on_spawn_despawn_send.is_valid()) {
-		return _send_spawn_despawn(p_peer_id, p_scene_id, p_data, true);
-	} else {
-		ERR_FAIL_COND_V_MSG(cfg.mode == REPLICATION_MODE_SERVER && multiplayer->is_server(), ERR_UNAVAILABLE, "Manual despawn is restricted in default server mode implementation. Use custom mode if you desire control over server spawn requests.");
-		NodePath path = p_path;
-		Object *obj = p_data.get_type() == Variant::OBJECT ? p_data.get_validated_object() : nullptr;
-		if (path.is_empty() && obj) {
-			Node *node = Object::cast_to<Node>(obj);
-			if (node && node->is_inside_tree()) {
-				path = node->get_path();
-			}
-		}
-		ERR_FAIL_COND_V_MSG(path.is_empty(), ERR_INVALID_PARAMETER, "Despawn default implementation requires a despawn path, or the data to be a node inside the SceneTree");
-		return _send_default_spawn_despawn(p_peer_id, p_scene_id, obj, path, false);
-	}
-}
-
-Error MultiplayerReplicator::send_spawn(int p_peer_id, const ResourceUID::ID &p_scene_id, const Variant &p_data, const NodePath &p_path) {
-	ERR_FAIL_COND_V(!multiplayer->has_multiplayer_peer(), ERR_UNCONFIGURED);
-	ERR_FAIL_COND_V_MSG(!replications.has(p_scene_id), ERR_INVALID_PARAMETER, vformat("Spawnable not found: %d", p_scene_id));
-	const SceneConfig &cfg = replications[p_scene_id];
-	if (cfg.on_spawn_despawn_send.is_valid()) {
-		return _send_spawn_despawn(p_peer_id, p_scene_id, p_data, false);
-	} else {
-		ERR_FAIL_COND_V_MSG(cfg.mode == REPLICATION_MODE_SERVER && multiplayer->is_server(), ERR_UNAVAILABLE, "Manual spawn is restricted in default server mode implementation. Use custom mode if you desire control over server spawn requests.");
-		NodePath path = p_path;
-		Object *obj = p_data.get_type() == Variant::OBJECT ? p_data.get_validated_object() : nullptr;
-		ERR_FAIL_COND_V_MSG(!obj, ERR_INVALID_PARAMETER, "Spawn default implementation requires the data to be an object.");
-		if (path.is_empty()) {
-			Node *node = Object::cast_to<Node>(obj);
-			if (node && node->is_inside_tree()) {
-				path = node->get_path();
-			}
-		}
-		ERR_FAIL_COND_V_MSG(path.is_empty(), ERR_INVALID_PARAMETER, "Spawn default implementation requires a spawn path, or the data to be a node inside the SceneTree");
-		return _send_default_spawn_despawn(p_peer_id, p_scene_id, obj, path, true);
-	}
-}
-
-Error MultiplayerReplicator::_spawn_despawn(ResourceUID::ID p_scene_id, Object *p_obj, int p_peer, bool p_spawn) {
-	ERR_FAIL_COND_V_MSG(!replications.has(p_scene_id), ERR_INVALID_PARAMETER, vformat("Spawnable not found: %d", p_scene_id));
-
-	const SceneConfig &cfg = replications[p_scene_id];
-	if (cfg.on_spawn_despawn_send.is_valid()) {
-		Variant args[4];
-		args[0] = p_peer;
-		args[1] = p_scene_id;
-		args[2] = p_obj;
-		args[3] = p_spawn;
-		const Variant *argp[] = { &args[0], &args[1], &args[2], &args[3] };
-		Callable::CallError ce;
-		Variant ret;
-		cfg.on_spawn_despawn_send.call(argp, 4, ret, ce);
-		ERR_FAIL_COND_V_MSG(ce.error != Callable::CallError::CALL_OK, FAILED, "Custom send function failed");
-		return OK;
-	} else {
-		Node *node = Object::cast_to<Node>(p_obj);
-		ERR_FAIL_COND_V_MSG(!p_obj, ERR_INVALID_PARAMETER, "Only nodes can be replicated by the default implementation");
-		return _send_default_spawn_despawn(p_peer, p_scene_id, node, node->get_path(), p_spawn);
-	}
-}
-
-Error MultiplayerReplicator::spawn(ResourceUID::ID p_scene_id, Object *p_obj, int p_peer) {
-	return _spawn_despawn(p_scene_id, p_obj, p_peer, true);
-}
-
-Error MultiplayerReplicator::despawn(ResourceUID::ID p_scene_id, Object *p_obj, int p_peer) {
-	return _spawn_despawn(p_scene_id, p_obj, p_peer, false);
-}
-
-PackedByteArray MultiplayerReplicator::encode_state(const ResourceUID::ID &p_scene_id, const Object *p_obj, bool p_initial) {
-	PackedByteArray state;
-	ERR_FAIL_COND_V_MSG(!replications.has(p_scene_id), state, vformat("Spawnable not found: %d", p_scene_id));
-	const SceneConfig &cfg = replications[p_scene_id];
-	int len = 0;
-	List<Variant> state_vars;
-	const List<StringName> props = p_initial ? cfg.properties : cfg.sync_properties;
-	Error err = _get_state(props, p_obj, state_vars);
-	ERR_FAIL_COND_V_MSG(err != OK, state, "Unable to retrieve object state.");
-	err = _encode_state(state_vars, nullptr, len);
-	ERR_FAIL_COND_V_MSG(err != OK, state, "Unable to encode object state.");
-	state.resize(len);
-	_encode_state(state_vars, state.ptrw(), len);
-	return state;
-}
-
-Error MultiplayerReplicator::decode_state(const ResourceUID::ID &p_scene_id, Object *p_obj, const PackedByteArray p_data, bool p_initial) {
-	ERR_FAIL_COND_V_MSG(!replications.has(p_scene_id), ERR_INVALID_PARAMETER, vformat("Spawnable not found: %d", p_scene_id));
-	const SceneConfig &cfg = replications[p_scene_id];
-	const List<StringName> props = p_initial ? cfg.properties : cfg.sync_properties;
-	int size;
-	return _decode_state(props, p_obj, p_data.ptr(), p_data.size(), size);
-}
-
-void MultiplayerReplicator::scene_enter_exit_notify(const String &p_scene, Node *p_node, bool p_enter) {
-	if (!multiplayer->has_multiplayer_peer()) {
-		return;
-	}
-	Node *root_node = multiplayer->get_root_node();
-	ERR_FAIL_COND(!p_node || !p_node->get_parent() || !root_node);
-	NodePath path = (root_node->get_path()).rel_path_to(p_node->get_parent()->get_path());
-	if (path.is_empty()) {
-		return;
-	}
-	ResourceUID::ID id = ResourceLoader::get_resource_uid(p_scene);
-	if (!replications.has(id)) {
-		return;
-	}
-	const SceneConfig &cfg = replications[id];
-	if (p_enter) {
-		if (cfg.mode == REPLICATION_MODE_SERVER && multiplayer->is_server()) {
-			replicated_nodes[p_node->get_instance_id()] = id;
-			_track(id, p_node);
-			spawn(id, p_node, 0);
-		}
-		emit_signal(SNAME("replicated_instance_added"), id, p_node);
-	} else {
-		if (cfg.mode == REPLICATION_MODE_SERVER && multiplayer->is_server() && replicated_nodes.has(p_node->get_instance_id())) {
-			replicated_nodes.erase(p_node->get_instance_id());
-			_untrack(id, p_node);
-			despawn(id, p_node, 0);
-		}
-		emit_signal(SNAME("replicated_instance_removed"), id, p_node);
-	}
-}
-
-void MultiplayerReplicator::spawn_all(int p_peer) {
-	for (const KeyValue<ObjectID, ResourceUID::ID> &E : replicated_nodes) {
-		// Only server mode adds to replicated_nodes, no need to check it.
-		Object *obj = ObjectDB::get_instance(E.key);
-		ERR_CONTINUE(!obj);
-		Node *node = Object::cast_to<Node>(obj);
-		ERR_CONTINUE(!node);
-		spawn(E.value, node, p_peer);
-	}
-}
-
-void MultiplayerReplicator::poll() {
-	for (KeyValue<ResourceUID::ID, SceneConfig> &E : replications) {
-		if (!E.value.sync_interval) {
-			continue;
-		}
-		if (E.value.mode == REPLICATION_MODE_SERVER && !multiplayer->is_server()) {
-			continue;
-		}
-		uint64_t time = OS::get_singleton()->get_ticks_usec();
-		if (E.value.sync_last + E.value.sync_interval <= time) {
-			sync_all(E.key, 0);
-			E.value.sync_last = time;
-		}
-		// Handle wrapping.
-		if (E.value.sync_last > time) {
-			E.value.sync_last = time;
-		}
-	}
-}
-
-void MultiplayerReplicator::track(const ResourceUID::ID &p_scene_id, Object *p_obj) {
-	ERR_FAIL_COND(!replications.has(p_scene_id));
-	const SceneConfig &cfg = replications[p_scene_id];
-	ERR_FAIL_COND_MSG(cfg.mode == REPLICATION_MODE_SERVER, "Manual object tracking is not allowed in server mode.");
-	_track(p_scene_id, p_obj);
-}
-
-void MultiplayerReplicator::_track(const ResourceUID::ID &p_scene_id, Object *p_obj) {
-	ERR_FAIL_COND(!p_obj);
-	ERR_FAIL_COND(!replications.has(p_scene_id));
-	if (!tracked_objects.has(p_scene_id)) {
-		tracked_objects[p_scene_id] = List<ObjectID>();
-	}
-	tracked_objects[p_scene_id].push_back(p_obj->get_instance_id());
-}
-
-void MultiplayerReplicator::untrack(const ResourceUID::ID &p_scene_id, Object *p_obj) {
-	ERR_FAIL_COND(!replications.has(p_scene_id));
-	const SceneConfig &cfg = replications[p_scene_id];
-	ERR_FAIL_COND_MSG(cfg.mode == REPLICATION_MODE_SERVER, "Manual object tracking is not allowed in server mode.");
-	_untrack(p_scene_id, p_obj);
-}
-
-void MultiplayerReplicator::_untrack(const ResourceUID::ID &p_scene_id, Object *p_obj) {
-	ERR_FAIL_COND(!p_obj);
-	ERR_FAIL_COND(!replications.has(p_scene_id));
-	if (tracked_objects.has(p_scene_id)) {
-		tracked_objects[p_scene_id].erase(p_obj->get_instance_id());
-	}
-}
-
-Error MultiplayerReplicator::sync_all(const ResourceUID::ID &p_scene_id, int p_peer) {
-	ERR_FAIL_COND_V(!replications.has(p_scene_id), ERR_INVALID_PARAMETER);
-	if (!tracked_objects.has(p_scene_id)) {
-		return OK;
-	}
-	const SceneConfig &cfg = replications[p_scene_id];
-	if (cfg.on_sync_send.is_valid()) {
-		Array objs;
-		if (tracked_objects.has(p_scene_id)) {
-			objs.resize(tracked_objects[p_scene_id].size());
-			int idx = 0;
-			for (const ObjectID &obj_id : tracked_objects[p_scene_id]) {
-				objs[idx++] = ObjectDB::get_instance(obj_id);
-			}
-		}
-		Variant args[3] = { p_scene_id, objs, p_peer };
-		Variant *argp[3] = { args, &args[1], &args[2] };
-		Callable::CallError ce;
-		Variant ret;
-		cfg.on_sync_send.call((const Variant **)argp, 3, ret, ce);
-		ERR_FAIL_COND_V_MSG(ce.error != Callable::CallError::CALL_OK, FAILED, "Custom sync function failed");
-		return OK;
-	} else if (cfg.sync_properties.size()) {
-		return _sync_all_default(p_scene_id, p_peer);
-	}
-	return OK;
-}
-
-Error MultiplayerReplicator::send_sync(int p_peer_id, const ResourceUID::ID &p_scene_id, PackedByteArray p_data, Multiplayer::TransferMode p_transfer_mode, int p_channel) {
-	ERR_FAIL_COND_V(!multiplayer->has_multiplayer_peer(), ERR_UNCONFIGURED);
-	ERR_FAIL_COND_V(!replications.has(p_scene_id), ERR_INVALID_PARAMETER);
-	const SceneConfig &cfg = replications[p_scene_id];
-	ERR_FAIL_COND_V_MSG(!cfg.on_sync_send.is_valid(), ERR_UNCONFIGURED, "Sending raw sync messages is only available with custom functions");
-	MAKE_ROOM(SYNC_CMD_OFFSET + p_data.size());
-	uint8_t *ptr = packet_cache.ptrw();
-	ptr[0] = MultiplayerAPI::NETWORK_COMMAND_SYNC;
-	encode_uint64(p_scene_id, &ptr[1]);
-	if (p_data.size()) {
-		memcpy(&ptr[SYNC_CMD_OFFSET], p_data.ptr(), p_data.size());
-	}
-	Ref<MultiplayerPeer> peer = multiplayer->get_multiplayer_peer();
-	peer->set_target_peer(p_peer_id);
-	peer->set_transfer_channel(p_channel);
-	peer->set_transfer_mode(p_transfer_mode);
-	return peer->put_packet(ptr, SYNC_CMD_OFFSET + p_data.size());
-}
-
-void MultiplayerReplicator::clear() {
-	tracked_objects.clear();
-	replicated_nodes.clear();
-}
-
-void MultiplayerReplicator::_bind_methods() {
-	ClassDB::bind_method(D_METHOD("spawn_config", "scene_id", "spawn_mode", "properties", "custom_send", "custom_receive"), &MultiplayerReplicator::spawn_config, DEFVAL(TypedArray<StringName>()), DEFVAL(Callable()), DEFVAL(Callable()));
-	ClassDB::bind_method(D_METHOD("sync_config", "scene_id", "interval", "properties", "custom_send", "custom_receive"), &MultiplayerReplicator::sync_config, DEFVAL(TypedArray<StringName>()), DEFVAL(Callable()), DEFVAL(Callable()));
-	ClassDB::bind_method(D_METHOD("despawn", "scene_id", "object", "peer_id"), &MultiplayerReplicator::despawn, DEFVAL(0));
-	ClassDB::bind_method(D_METHOD("spawn", "scene_id", "object", "peer_id"), &MultiplayerReplicator::spawn, DEFVAL(0));
-	ClassDB::bind_method(D_METHOD("send_despawn", "peer_id", "scene_id", "data", "path"), &MultiplayerReplicator::send_despawn, DEFVAL(Variant()), DEFVAL(NodePath()));
-	ClassDB::bind_method(D_METHOD("send_spawn", "peer_id", "scene_id", "data", "path"), &MultiplayerReplicator::send_spawn, DEFVAL(Variant()), DEFVAL(NodePath()));
-	ClassDB::bind_method(D_METHOD("send_sync", "peer_id", "scene_id", "data", "transfer_mode", "channel"), &MultiplayerReplicator::send_sync, DEFVAL(Multiplayer::TRANSFER_MODE_RELIABLE), DEFVAL(0));
-	ClassDB::bind_method(D_METHOD("sync_all", "scene_id", "peer_id"), &MultiplayerReplicator::sync_all, DEFVAL(0));
-	ClassDB::bind_method(D_METHOD("track", "scene_id", "object"), &MultiplayerReplicator::track);
-	ClassDB::bind_method(D_METHOD("untrack", "scene_id", "object"), &MultiplayerReplicator::untrack);
-	ClassDB::bind_method(D_METHOD("encode_state", "scene_id", "object", "initial"), &MultiplayerReplicator::encode_state, DEFVAL(true));
-	ClassDB::bind_method(D_METHOD("decode_state", "scene_id", "object", "data", "initial"), &MultiplayerReplicator::decode_state, DEFVAL(true));
-
-	ADD_SIGNAL(MethodInfo("despawned", PropertyInfo(Variant::INT, "scene_id"), PropertyInfo(Variant::OBJECT, "node", PROPERTY_HINT_RESOURCE_TYPE, "Node")));
-	ADD_SIGNAL(MethodInfo("spawned", PropertyInfo(Variant::INT, "scene_id"), PropertyInfo(Variant::OBJECT, "node", PROPERTY_HINT_RESOURCE_TYPE, "Node")));
-	ADD_SIGNAL(MethodInfo("despawn_requested", PropertyInfo(Variant::INT, "id"), PropertyInfo(Variant::INT, "scene_id"), PropertyInfo(Variant::OBJECT, "parent", PROPERTY_HINT_RESOURCE_TYPE, "Node"), PropertyInfo(Variant::STRING, "name"), PropertyInfo(Variant::PACKED_BYTE_ARRAY, "data")));
-	ADD_SIGNAL(MethodInfo("spawn_requested", PropertyInfo(Variant::INT, "id"), PropertyInfo(Variant::INT, "scene_id"), PropertyInfo(Variant::OBJECT, "parent", PROPERTY_HINT_RESOURCE_TYPE, "Node"), PropertyInfo(Variant::STRING, "name"), PropertyInfo(Variant::PACKED_BYTE_ARRAY, "data")));
-	ADD_SIGNAL(MethodInfo("replicated_instance_added", PropertyInfo(Variant::INT, "scene_id"), PropertyInfo(Variant::OBJECT, "node", PROPERTY_HINT_RESOURCE_TYPE, "Node")));
-	ADD_SIGNAL(MethodInfo("replicated_instance_removed", PropertyInfo(Variant::INT, "scene_id"), PropertyInfo(Variant::OBJECT, "node", PROPERTY_HINT_RESOURCE_TYPE, "Node")));
-
-	BIND_ENUM_CONSTANT(REPLICATION_MODE_NONE);
-	BIND_ENUM_CONSTANT(REPLICATION_MODE_SERVER);
-	BIND_ENUM_CONSTANT(REPLICATION_MODE_CUSTOM);
-}
diff --git a/core/multiplayer/multiplayer_replicator.h b/core/multiplayer/multiplayer_replicator.h
deleted file mode 100644
index a9cd6e211e..0000000000
--- a/core/multiplayer/multiplayer_replicator.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*************************************************************************/
-/*  multiplayer_replicator.h                                             */
-/*************************************************************************/
-/*                       This file is part of:                           */
-/*                           GODOT ENGINE                                */
-/*                      https://godotengine.org                          */
-/*************************************************************************/
-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
-/*                                                                       */
-/* Permission is hereby granted, free of charge, to any person obtaining */
-/* a copy of this software and associated documentation files (the       */
-/* "Software"), to deal in the Software without restriction, including   */
-/* without limitation the rights to use, copy, modify, merge, publish,   */
-/* distribute, sublicense, and/or sell copies of the Software, and to    */
-/* permit persons to whom the Software is furnished to do so, subject to */
-/* the following conditions:                                             */
-/*                                                                       */
-/* The above copyright notice and this permission notice shall be        */
-/* included in all copies or substantial portions of the Software.       */
-/*                                                                       */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
-/*************************************************************************/
-
-#ifndef MULTIPLAYER_REPLICATOR_H
-#define MULTIPLAYER_REPLICATOR_H
-
-#include "core/multiplayer/multiplayer_api.h"
-
-#include "core/io/resource_uid.h"
-#include "core/templates/hash_map.h"
-#include "core/variant/typed_array.h"
-
-class MultiplayerReplicator : public Object {
-	GDCLASS(MultiplayerReplicator, Object);
-
-public:
-	enum {
-		SPAWN_CMD_OFFSET = 9,
-		SYNC_CMD_OFFSET = 9,
-	};
-
-	enum ReplicationMode {
-		REPLICATION_MODE_NONE,
-		REPLICATION_MODE_SERVER,
-		REPLICATION_MODE_CUSTOM,
-	};
-
-	struct SceneConfig {
-		ReplicationMode mode;
-		uint64_t sync_interval = 0;
-		uint64_t sync_last = 0;
-		uint8_t sync_recv = 0;
-		List<StringName> properties;
-		List<StringName> sync_properties;
-		Callable on_spawn_despawn_send;
-		Callable on_spawn_despawn_receive;
-		Callable on_sync_send;
-		Callable on_sync_receive;
-	};
-
-protected:
-	static void _bind_methods();
-
-private:
-	enum {
-		BYTE_OR_ZERO_SHIFT = MultiplayerAPI::CMD_FLAG_0_SHIFT,
-	};
-
-	enum {
-		BYTE_OR_ZERO_FLAG = 1 << BYTE_OR_ZERO_SHIFT,
-	};
-
-	MultiplayerAPI *multiplayer = nullptr;
-	Vector<uint8_t> packet_cache;
-	Map<ResourceUID::ID, SceneConfig> replications;
-	Map<ObjectID, ResourceUID::ID> replicated_nodes;
-	HashMap<ResourceUID::ID, List<ObjectID>> tracked_objects;
-
-	// Encoding
-	Error _get_state(const List<StringName> &p_properties, const Object *p_obj, List<Variant> &r_variant);
-	Error _encode_state(const List<Variant> &p_variants, uint8_t *p_buffer, int &r_len, bool *r_raw = nullptr);
-	Error _decode_state(const List<StringName> &p_cfg, Object *p_obj, const uint8_t *p_buffer, int p_len, int &r_len, bool p_raw = false);
-
-	// Spawn
-	Error _spawn_despawn(ResourceUID::ID p_scene_id, Object *p_obj, int p_peer, bool p_spawn);
-	Error _send_spawn_despawn(int p_peer_id, const ResourceUID::ID &p_scene_id, const Variant &p_data, bool p_spawn);
-	void _process_default_spawn_despawn(int p_from, const ResourceUID::ID &p_scene_id, const uint8_t *p_packet, int p_packet_len, bool p_spawn);
-	Error _send_default_spawn_despawn(int p_peer_id, const ResourceUID::ID &p_scene_id, Object *p_obj, const NodePath &p_path, bool p_spawn);
-
-	// Sync
-	void _process_default_sync(const ResourceUID::ID &p_id, const uint8_t *p_packet, int p_packet_len);
-	Error _sync_all_default(const ResourceUID::ID &p_scene_id, int p_peer);
-	void _track(const ResourceUID::ID &p_scene_id, Object *p_object);
-	void _untrack(const ResourceUID::ID &p_scene_id, Object *p_object);
-
-public:
-	void clear();
-
-	// Encoding
-	PackedByteArray encode_state(const ResourceUID::ID &p_scene_id, const Object *p_node, bool p_initial);
-	Error decode_state(const ResourceUID::ID &p_scene_id, Object *p_node, PackedByteArray p_data, bool p_initial);
-
-	// Spawn
-	Error spawn_config(const ResourceUID::ID &p_id, ReplicationMode p_mode, const TypedArray<StringName> &p_props = TypedArray<StringName>(), const Callable &p_on_send = Callable(), const Callable &p_on_recv = Callable());
-	Error spawn(ResourceUID::ID p_scene_id, Object *p_obj, int p_peer = 0);
-	Error despawn(ResourceUID::ID p_scene_id, Object *p_obj, int p_peer = 0);
-	Error send_despawn(int p_peer_id, const ResourceUID::ID &p_scene_id, const Variant &p_data = Variant(), const NodePath &p_path = NodePath());
-	Error send_spawn(int p_peer_id, const ResourceUID::ID &p_scene_id, const Variant &p_data = Variant(), const NodePath &p_path = NodePath());
-
-	// Sync
-	Error sync_config(const ResourceUID::ID &p_id, uint64_t p_interval, const TypedArray<StringName> &p_props = TypedArray<StringName>(), const Callable &p_on_send = Callable(), const Callable &p_on_recv = Callable());
-	Error sync_all(const ResourceUID::ID &p_scene_id, int p_peer);
-	Error send_sync(int p_peer_id, const ResourceUID::ID &p_scene_id, PackedByteArray p_data, Multiplayer::TransferMode p_mode, int p_channel);
-	void track(const ResourceUID::ID &p_scene_id, Object *p_object);
-	void untrack(const ResourceUID::ID &p_scene_id, Object *p_object);
-
-	// Used by MultiplayerAPI
-	void spawn_all(int p_peer);
-	void process_spawn_despawn(int p_from, const uint8_t *p_packet, int p_packet_len, bool p_spawn);
-	void process_sync(int p_from, const uint8_t *p_packet, int p_packet_len);
-	void scene_enter_exit_notify(const String &p_scene, Node *p_node, bool p_enter);
-	void poll();
-
-	MultiplayerReplicator(MultiplayerAPI *p_multiplayer) {
-		multiplayer = p_multiplayer;
-	}
-};
-
-VARIANT_ENUM_CAST(MultiplayerReplicator::ReplicationMode);
-
-#endif // MULTIPLAYER_REPLICATOR_H
diff --git a/core/multiplayer/rpc_manager.cpp b/core/multiplayer/rpc_manager.cpp
index 7736637349..1e6d2108be 100644
--- a/core/multiplayer/rpc_manager.cpp
+++ b/core/multiplayer/rpc_manager.cpp
@@ -235,16 +235,12 @@ void RPCManager::_process_rpc(Node *p_node, const uint16_t p_rpc_method_id, int
 	ERR_FAIL_COND_MSG(!can_call, "RPC '" + String(config.name) + "' is not allowed on node " + p_node->get_path() + " from: " + itos(p_from) + ". Mode is " + itos((int)config.rpc_mode) + ", authority is " + itos(p_node->get_multiplayer_authority()) + ".");
 
 	int argc = 0;
-	bool byte_only = false;
 
 	const bool byte_only_or_no_args = p_packet[0] & BYTE_ONLY_OR_NO_ARGS_FLAG;
 	if (byte_only_or_no_args) {
 		if (p_offset < p_packet_len) {
 			// This packet contains only bytes.
 			argc = 1;
-			byte_only = true;
-		} else {
-			// This rpc calls a method without parameters.
 		}
 	} else {
 		// Normal variant, takes the argument count from the packet.
@@ -262,25 +258,10 @@ void RPCManager::_process_rpc(Node *p_node, const uint16_t p_rpc_method_id, int
 	_profile_node_data("in_rpc", p_node->get_instance_id());
 #endif
 
-	if (byte_only) {
-		Vector<uint8_t> pure_data;
-		const int len = p_packet_len - p_offset;
-		pure_data.resize(len);
-		memcpy(pure_data.ptrw(), &p_packet[p_offset], len);
-		args.write[0] = pure_data;
-		argp.write[0] = &args[0];
-		p_offset += len;
-	} else {
-		for (int i = 0; i < argc; i++) {
-			ERR_FAIL_COND_MSG(p_offset >= p_packet_len, "Invalid packet received. Size too small.");
-
-			int vlen;
-			Error err = multiplayer->decode_and_decompress_variant(args.write[i], &p_packet[p_offset], p_packet_len - p_offset, &vlen);
-			ERR_FAIL_COND_MSG(err != OK, "Invalid packet received. Unable to decode RPC argument.");
-
-			argp.write[i] = &args[i];
-			p_offset += vlen;
-		}
+	int out;
+	MultiplayerAPI::decode_and_decompress_variants(args, &p_packet[p_offset], p_packet_len - p_offset, out, byte_only_or_no_args, multiplayer->is_object_decoding_allowed());
+	for (int i = 0; i < argc; i++) {
+		argp.write[i] = &args[i];
 	}
 
 	Callable::CallError ce;
@@ -380,28 +361,19 @@ void RPCManager::_send_rpc(Node *p_from, int p_to, uint16_t p_rpc_id, const Mult
 		ofs += 2;
 	}
 
-	if (p_argcount == 0) {
-		byte_only_or_no_args = true;
-	} else if (p_argcount == 1 && p_arg[0]->get_type() == Variant::PACKED_BYTE_ARRAY) {
-		byte_only_or_no_args = true;
-		// Special optimization when only the byte vector is sent.
-		const Vector<uint8_t> data = *p_arg[0];
-		MAKE_ROOM(ofs + data.size());
-		memcpy(&(packet_cache.write[ofs]), data.ptr(), sizeof(uint8_t) * data.size());
-		ofs += data.size();
+	int len;
+	Error err = MultiplayerAPI::encode_and_compress_variants(p_arg, p_argcount, nullptr, len, &byte_only_or_no_args, multiplayer->is_object_decoding_allowed());
+	ERR_FAIL_COND_MSG(err != OK, "Unable to encode RPC arguments. THIS IS LIKELY A BUG IN THE ENGINE!");
+	if (byte_only_or_no_args) {
+		MAKE_ROOM(ofs + len);
 	} else {
-		// Arguments
-		MAKE_ROOM(ofs + 1);
+		MAKE_ROOM(ofs + 1 + len);
 		packet_cache.write[ofs] = p_argcount;
 		ofs += 1;
-		for (int i = 0; i < p_argcount; i++) {
-			int len(0);
-			Error err = multiplayer->encode_and_compress_variant(*p_arg[i], nullptr, len);
-			ERR_FAIL_COND_MSG(err != OK, "Unable to encode RPC argument. THIS IS LIKELY A BUG IN THE ENGINE!");
-			MAKE_ROOM(ofs + len);
-			multiplayer->encode_and_compress_variant(*p_arg[i], &(packet_cache.write[ofs]), len);
-			ofs += len;
-		}
+	}
+	if (len) {
+		MultiplayerAPI::encode_and_compress_variants(p_arg, p_argcount, &packet_cache.write[ofs], len, &byte_only_or_no_args, multiplayer->is_object_decoding_allowed());
+		ofs += len;
 	}
 
 	ERR_FAIL_COND(command_type > 7);
diff --git a/core/register_core_types.cpp b/core/register_core_types.cpp
index a18ec4d6ad..388368d181 100644
--- a/core/register_core_types.cpp
+++ b/core/register_core_types.cpp
@@ -69,7 +69,6 @@
 #include "core/math/triangle_mesh.h"
 #include "core/multiplayer/multiplayer_api.h"
 #include "core/multiplayer/multiplayer_peer.h"
-#include "core/multiplayer/multiplayer_replicator.h"
 #include "core/object/class_db.h"
 #include "core/object/undo_redo.h"
 #include "core/os/main_loop.h"
@@ -200,7 +199,6 @@ void register_core_types() {
 
 	GDREGISTER_VIRTUAL_CLASS(MultiplayerPeer);
 	GDREGISTER_CLASS(MultiplayerPeerExtension);
-	GDREGISTER_VIRTUAL_CLASS(MultiplayerReplicator);
 	GDREGISTER_CLASS(MultiplayerAPI);
 	GDREGISTER_CLASS(MainLoop);
 	GDREGISTER_CLASS(Translation);
diff --git a/core/string/char_utils.h b/core/string/char_utils.h
new file mode 100644
index 0000000000..0afd058f01
--- /dev/null
+++ b/core/string/char_utils.h
@@ -0,0 +1,92 @@
+/*************************************************************************/
+/*  char_utils.h                                                         */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef CHAR_UTILS_H
+#define CHAR_UTILS_H
+
+#include "core/typedefs.h"
+
+static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) {
+	return (c >= 'A' && c <= 'Z');
+}
+
+static _FORCE_INLINE_ bool is_ascii_lower_case(char32_t c) {
+	return (c >= 'a' && c <= 'z');
+}
+
+static _FORCE_INLINE_ bool is_digit(char32_t c) {
+	return (c >= '0' && c <= '9');
+}
+
+static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
+	return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
+}
+
+static _FORCE_INLINE_ bool is_binary_digit(char32_t c) {
+	return (c == '0' || c == '1');
+}
+
+static _FORCE_INLINE_ bool is_ascii_char(char32_t c) {
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static _FORCE_INLINE_ bool is_ascii_alphanumeric_char(char32_t c) {
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
+}
+
+static _FORCE_INLINE_ bool is_ascii_identifier_char(char32_t c) {
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
+}
+
+static _FORCE_INLINE_ bool is_symbol(char32_t c) {
+	return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
+}
+
+static _FORCE_INLINE_ bool is_control(char32_t p_char) {
+	return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009f);
+}
+
+static _FORCE_INLINE_ bool is_whitespace(char32_t p_char) {
+	return (p_char == ' ') || (p_char == 0x00a0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085);
+}
+
+static _FORCE_INLINE_ bool is_linebreak(char32_t p_char) {
+	return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029);
+}
+
+static _FORCE_INLINE_ bool is_punct(char32_t p_char) {
+	return (p_char >= ' ' && p_char <= '/') || (p_char >= ':' && p_char <= '@') || (p_char >= '[' && p_char <= '^') || (p_char == '`') || (p_char >= '{' && p_char <= '~') || (p_char >= 0x2000 && p_char <= 0x206f) || (p_char >= 0x3000 && p_char <= 0x303f);
+}
+
+static _FORCE_INLINE_ bool is_underscore(char32_t p_char) {
+	return (p_char == '_');
+}
+
+#endif // CHAR_UTILS_H
diff --git a/core/string/translation.cpp b/core/string/translation.cpp
index 355ee238e8..7cc41df9ef 100644
--- a/core/string/translation.cpp
+++ b/core/string/translation.cpp
@@ -213,14 +213,6 @@ static _character_accent_pair _character_to_accented[] = {
 	{ 'z', U"ź" },
 };
 
-static _FORCE_INLINE_ bool is_upper_case(char32_t c) {
-	return (c >= 'A' && c <= 'Z');
-}
-
-static _FORCE_INLINE_ bool is_lower_case(char32_t c) {
-	return (c >= 'a' && c <= 'z');
-}
-
 Vector<TranslationServer::LocaleScriptInfo> TranslationServer::locale_script_info;
 
 Map<String, String> TranslationServer::language_map;
@@ -309,15 +301,15 @@ String TranslationServer::standardize_locale(const String &p_locale) const {
 	Vector<String> locale_elements = univ_locale.get_slice("@", 0).split("_");
 	lang = locale_elements[0];
 	if (locale_elements.size() >= 2) {
-		if (locale_elements[1].length() == 4 && is_upper_case(locale_elements[1][0]) && is_lower_case(locale_elements[1][1]) && is_lower_case(locale_elements[1][2]) && is_lower_case(locale_elements[1][3])) {
+		if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
 			script = locale_elements[1];
 		}
-		if (locale_elements[1].length() == 2 && is_upper_case(locale_elements[1][0]) && is_upper_case(locale_elements[1][1])) {
+		if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
 			country = locale_elements[1];
 		}
 	}
 	if (locale_elements.size() >= 3) {
-		if (locale_elements[2].length() == 2 && is_upper_case(locale_elements[2][0]) && is_upper_case(locale_elements[2][1])) {
+		if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
 			country = locale_elements[2];
 		} else if (variant_map.has(locale_elements[2].to_lower()) && variant_map[locale_elements[2].to_lower()] == lang) {
 			variant = locale_elements[2].to_lower();
@@ -434,15 +426,15 @@ String TranslationServer::get_locale_name(const String &p_locale) const {
 	Vector<String> locale_elements = locale.split("_");
 	lang = locale_elements[0];
 	if (locale_elements.size() >= 2) {
-		if (locale_elements[1].length() == 4 && is_upper_case(locale_elements[1][0]) && is_lower_case(locale_elements[1][1]) && is_lower_case(locale_elements[1][2]) && is_lower_case(locale_elements[1][3])) {
+		if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
 			script = locale_elements[1];
 		}
-		if (locale_elements[1].length() == 2 && is_upper_case(locale_elements[1][0]) && is_upper_case(locale_elements[1][1])) {
+		if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
 			country = locale_elements[1];
 		}
 	}
 	if (locale_elements.size() >= 3) {
-		if (locale_elements[2].length() == 2 && is_upper_case(locale_elements[2][0]) && is_upper_case(locale_elements[2][1])) {
+		if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
 			country = locale_elements[2];
 		}
 	}
@@ -911,7 +903,7 @@ String TranslationServer::add_padding(String &p_message, int p_length) const {
 }
 
 const char32_t *TranslationServer::get_accented_version(char32_t p_character) const {
-	if (!((p_character >= 'a' && p_character <= 'z') || (p_character >= 'A' && p_character <= 'Z'))) {
+	if (!is_ascii_char(p_character)) {
 		return nullptr;
 	}
 
diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp
index 6e0a7c7022..c4edc8c086 100644
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -54,34 +54,14 @@
 
 static const int MAX_DECIMALS = 32;
 
-static _FORCE_INLINE_ bool is_digit(char32_t c) {
-	return (c >= '0' && c <= '9');
-}
-
-static _FORCE_INLINE_ bool is_hex_digit(char32_t c) {
-	return (is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
-}
-
-static _FORCE_INLINE_ bool is_upper_case(char32_t c) {
-	return (c >= 'A' && c <= 'Z');
-}
-
-static _FORCE_INLINE_ bool is_lower_case(char32_t c) {
-	return (c >= 'a' && c <= 'z');
-}
-
 static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
-	return (is_upper_case(c) ? (c + ('a' - 'A')) : c);
+	return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
 }
 
 const char CharString::_null = 0;
 const char16_t Char16String::_null = 0;
 const char32_t String::_null = 0;
 
-bool is_symbol(char32_t c) {
-	return c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t' || c == ' ');
-}
-
 bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
 	const String &s = p_s;
 	int beg = CLAMP(p_col, 0, s.length());
@@ -974,21 +954,21 @@ String String::camelcase_to_underscore(bool lowercase) const {
 	int start_index = 0;
 
 	for (int i = 1; i < this->size(); i++) {
-		bool is_upper = is_upper_case(cstr[i]);
+		bool is_upper = is_ascii_upper_case(cstr[i]);
 		bool is_number = is_digit(cstr[i]);
 
 		bool are_next_2_lower = false;
 		bool is_next_lower = false;
 		bool is_next_number = false;
-		bool was_precedent_upper = is_upper_case(cstr[i - 1]);
+		bool was_precedent_upper = is_ascii_upper_case(cstr[i - 1]);
 		bool was_precedent_number = is_digit(cstr[i - 1]);
 
 		if (i + 2 < this->size()) {
-			are_next_2_lower = is_lower_case(cstr[i + 1]) && is_lower_case(cstr[i + 2]);
+			are_next_2_lower = is_ascii_lower_case(cstr[i + 1]) && is_ascii_lower_case(cstr[i + 2]);
 		}
 
 		if (i + 1 < this->size()) {
-			is_next_lower = is_lower_case(cstr[i + 1]);
+			is_next_lower = is_ascii_lower_case(cstr[i + 1]);
 			is_next_number = is_digit(cstr[i + 1]);
 		}
 
@@ -2212,7 +2192,7 @@ bool String::is_numeric() const {
 				return false;
 			}
 			dot = true;
-		} else if (c < '0' || c > '9') {
+		} else if (!is_digit(c)) {
 			return false;
 		}
 	}
@@ -3691,7 +3671,7 @@ bool String::is_valid_identifier() const {
 			}
 		}
 
-		bool valid_char = is_digit(str[i]) || is_lower_case(str[i]) || is_upper_case(str[i]) || str[i] == '_';
+		bool valid_char = is_ascii_identifier_char(str[i]);
 
 		if (!valid_char) {
 			return false;
@@ -3716,7 +3696,7 @@ String String::uri_encode() const {
 	String res;
 	for (int i = 0; i < temp.length(); ++i) {
 		char ord = temp[i];
-		if (ord == '.' || ord == '-' || ord == '_' || ord == '~' || is_lower_case(ord) || is_upper_case(ord) || is_digit(ord)) {
+		if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) {
 			res += ord;
 		} else {
 			char h_Val[3];
@@ -3738,9 +3718,9 @@ String String::uri_decode() const {
 	for (int i = 0; i < src.length(); ++i) {
 		if (src[i] == '%' && i + 2 < src.length()) {
 			char ord1 = src[i + 1];
-			if (is_digit(ord1) || is_upper_case(ord1)) {
+			if (is_digit(ord1) || is_ascii_upper_case(ord1)) {
 				char ord2 = src[i + 2];
-				if (is_digit(ord2) || is_upper_case(ord2)) {
+				if (is_digit(ord2) || is_ascii_upper_case(ord2)) {
 					char bytes[3] = { (char)ord1, (char)ord2, 0 };
 					res += (char)strtol(bytes, nullptr, 16);
 					i += 2;
@@ -3867,7 +3847,7 @@ static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, ch
 					for (int i = 2; i < p_src_len; i++) {
 						eat = i + 1;
 						char32_t ct = p_src[i];
-						if (ct == ';' || ct < '0' || ct > '9') {
+						if (ct == ';' || !is_digit(ct)) {
 							break;
 						}
 					}
@@ -3997,7 +3977,7 @@ String String::pad_zeros(int p_digits) const {
 
 	int begin = 0;
 
-	while (begin < end && (s[begin] < '0' || s[begin] > '9')) {
+	while (begin < end && !is_digit(s[begin])) {
 		begin++;
 	}
 
@@ -4042,7 +4022,7 @@ bool String::is_valid_int() const {
 	}
 
 	for (int i = from; i < len; i++) {
-		if (operator[](i) < '0' || operator[](i) > '9') {
+		if (!is_digit(operator[](i))) {
 			return false; // no start with number plz
 		}
 	}
diff --git a/core/string/ustring.h b/core/string/ustring.h
index b685e3929f..1d302b65a7 100644
--- a/core/string/ustring.h
+++ b/core/string/ustring.h
@@ -32,6 +32,7 @@
 #define USTRING_GODOT_H
 // Note: Renamed to avoid conflict with ICU header with the same name.
 
+#include "core/string/char_utils.h"
 #include "core/templates/cowdata.h"
 #include "core/templates/vector.h"
 #include "core/typedefs.h"
@@ -533,7 +534,6 @@ String DTRN(const String &p_text, const String &p_text_plural, int p_n, const St
 String RTR(const String &p_text, const String &p_context = "");
 String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context = "");
 
-bool is_symbol(char32_t c);
 bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end);
 
 _FORCE_INLINE_ void sarray_add_str(Vector<String> &arr) {
diff --git a/core/templates/rid_owner.h b/core/templates/rid_owner.h
index 3ed81e76fd..95632cdec2 100644
--- a/core/templates/rid_owner.h
+++ b/core/templates/rid_owner.h
@@ -292,43 +292,32 @@ public:
 	_FORCE_INLINE_ uint32_t get_rid_count() const {
 		return alloc_count;
 	}
-
-	_FORCE_INLINE_ T *get_ptr_by_index(uint32_t p_index) {
-		ERR_FAIL_UNSIGNED_INDEX_V(p_index, alloc_count, nullptr);
+	void get_owned_list(List<RID> *p_owned) {
 		if (THREAD_SAFE) {
 			spin_lock.lock();
 		}
-		uint64_t idx = free_list_chunks[p_index / elements_in_chunk][p_index % elements_in_chunk];
-		T *ptr = &chunks[idx / elements_in_chunk][idx % elements_in_chunk];
-		if (THREAD_SAFE) {
-			spin_lock.unlock();
-		}
-		return ptr;
-	}
-
-	_FORCE_INLINE_ RID get_rid_by_index(uint32_t p_index) {
-		ERR_FAIL_INDEX_V(p_index, alloc_count, RID());
-		if (THREAD_SAFE) {
-			spin_lock.lock();
+		for (size_t i = 0; i < max_alloc; i++) {
+			uint64_t validator = validator_chunks[i / elements_in_chunk][i % elements_in_chunk];
+			if (validator != 0xFFFFFFFF) {
+				p_owned->push_back(_make_from_id((validator << 32) | i));
+			}
 		}
-		uint64_t idx = free_list_chunks[p_index / elements_in_chunk][p_index % elements_in_chunk];
-		uint64_t validator = validator_chunks[idx / elements_in_chunk][idx % elements_in_chunk];
-
-		RID rid = _make_from_id((validator << 32) | idx);
 		if (THREAD_SAFE) {
 			spin_lock.unlock();
 		}
-		return rid;
 	}
 
-	void get_owned_list(List<RID> *p_owned) {
+	//used for fast iteration in the elements or RIDs
+	void fill_owned_buffer(RID *p_rid_buffer) {
 		if (THREAD_SAFE) {
 			spin_lock.lock();
 		}
+		uint32_t idx = 0;
 		for (size_t i = 0; i < max_alloc; i++) {
 			uint64_t validator = validator_chunks[i / elements_in_chunk][i % elements_in_chunk];
 			if (validator != 0xFFFFFFFF) {
-				p_owned->push_back(_make_from_id((validator << 32) | i));
+				p_rid_buffer[idx] = _make_from_id((validator << 32) | i);
+				idx++;
 			}
 		}
 		if (THREAD_SAFE) {
@@ -425,18 +414,14 @@ public:
 		return alloc.get_rid_count();
 	}
 
-	_FORCE_INLINE_ RID get_rid_by_index(uint32_t p_index) {
-		return alloc.get_rid_by_index(p_index);
-	}
-
-	_FORCE_INLINE_ T *get_ptr_by_index(uint32_t p_index) {
-		return *alloc.get_ptr_by_index(p_index);
-	}
-
 	_FORCE_INLINE_ void get_owned_list(List<RID> *p_owned) {
 		return alloc.get_owned_list(p_owned);
 	}
 
+	void fill_owned_buffer(RID *p_rid_buffer) {
+		alloc.fill_owned_buffer(p_rid_buffer);
+	}
+
 	void set_description(const char *p_descrption) {
 		alloc.set_description(p_descrption);
 	}
@@ -485,17 +470,12 @@ public:
 		return alloc.get_rid_count();
 	}
 
-	_FORCE_INLINE_ RID get_rid_by_index(uint32_t p_index) {
-		return alloc.get_rid_by_index(p_index);
-	}
-
-	_FORCE_INLINE_ T *get_ptr_by_index(uint32_t p_index) {
-		return alloc.get_ptr_by_index(p_index);
-	}
-
 	_FORCE_INLINE_ void get_owned_list(List<RID> *p_owned) {
 		return alloc.get_owned_list(p_owned);
 	}
+	void fill_owned_buffer(RID *p_rid_buffer) {
+		alloc.fill_owned_buffer(p_rid_buffer);
+	}
 
 	void set_description(const char *p_descrption) {
 		alloc.set_description(p_descrption);
diff --git a/core/variant/method_ptrcall.h b/core/variant/method_ptrcall.h
index 75a93ac4c8..d0acf60c22 100644
--- a/core/variant/method_ptrcall.h
+++ b/core/variant/method_ptrcall.h
@@ -31,7 +31,6 @@
 #ifndef METHOD_PTRCALL_H
 #define METHOD_PTRCALL_H
 
-#include "core/math/transform_2d.h"
 #include "core/object/object_id.h"
 #include "core/typedefs.h"
 #include "core/variant/variant.h"
diff --git a/core/variant/variant.h b/core/variant/variant.h
index 36fa755647..b75882a87c 100644
--- a/core/variant/variant.h
+++ b/core/variant/variant.h
@@ -39,8 +39,12 @@
 #include "core/math/face3.h"
 #include "core/math/plane.h"
 #include "core/math/quaternion.h"
+#include "core/math/rect2.h"
+#include "core/math/rect2i.h"
 #include "core/math/transform_2d.h"
 #include "core/math/transform_3d.h"
+#include "core/math/vector2.h"
+#include "core/math/vector2i.h"
 #include "core/math/vector3.h"
 #include "core/math/vector3i.h"
 #include "core/object/object_id.h"
diff --git a/core/variant/variant_parser.cpp b/core/variant/variant_parser.cpp
index 55fc9212b7..e889a1bb40 100644
--- a/core/variant/variant_parser.cpp
+++ b/core/variant/variant_parser.cpp
@@ -188,7 +188,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
 					if (p_stream->is_eof()) {
 						r_token.type = TK_EOF;
 						return OK;
-					} else if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
+					} else if (is_hex_digit(ch)) {
 						color_str += ch;
 
 					} else {
@@ -265,13 +265,13 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
 										r_token.type = TK_ERROR;
 										return ERR_PARSE_ERROR;
 									}
-									if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
+									if (!is_hex_digit(c)) {
 										r_err_str = "Malformed hex constant in string";
 										r_token.type = TK_ERROR;
 										return ERR_PARSE_ERROR;
 									}
 									char32_t v;
-									if (c >= '0' && c <= '9') {
+									if (is_digit(c)) {
 										v = c - '0';
 									} else if (c >= 'a' && c <= 'f') {
 										v = c - 'a';
@@ -381,7 +381,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
 					while (true) {
 						switch (reading) {
 							case READING_INT: {
-								if (c >= '0' && c <= '9') {
+								if (is_digit(c)) {
 									//pass
 								} else if (c == '.') {
 									reading = READING_DEC;
@@ -395,7 +395,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
 
 							} break;
 							case READING_DEC: {
-								if (c >= '0' && c <= '9') {
+								if (is_digit(c)) {
 								} else if (c == 'e') {
 									reading = READING_EXP;
 								} else {
@@ -404,7 +404,7 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
 
 							} break;
 							case READING_EXP: {
-								if (c >= '0' && c <= '9') {
+								if (is_digit(c)) {
 									exp_beg = true;
 
 								} else if ((c == '-' || c == '+') && !exp_sign && !exp_beg) {
@@ -433,11 +433,11 @@ Error VariantParser::get_token(Stream *p_stream, Token &r_token, int &line, Stri
 						r_token.value = num.as_int();
 					}
 					return OK;
-				} else if ((cchar >= 'A' && cchar <= 'Z') || (cchar >= 'a' && cchar <= 'z') || cchar == '_') {
+				} else if (is_ascii_char(cchar) || is_underscore(cchar)) {
 					StringBuffer<> id;
 					bool first = true;
 
-					while ((cchar >= 'A' && cchar <= 'Z') || (cchar >= 'a' && cchar <= 'z') || cchar == '_' || (!first && cchar >= '0' && cchar <= '9')) {
+					while (is_ascii_char(cchar) || is_underscore(cchar) || (!first && is_digit(cchar))) {
 						id += cchar;
 						cchar = p_stream->get_char();
 						first = false;
diff --git a/doc/classes/CanvasLayer.xml b/doc/classes/CanvasLayer.xml
index 9ee5ce0dcb..614bd558e8 100644
--- a/doc/classes/CanvasLayer.xml
+++ b/doc/classes/CanvasLayer.xml
@@ -44,5 +44,16 @@
 		<member name="transform" type="Transform2D" setter="set_transform" getter="get_transform" default="Transform2D(1, 0, 0, 1, 0, 0)">
 			The layer's transform.
 		</member>
+		<member name="visible" type="bool" setter="set_visible" getter="is_visible" default="true">
+			If [code]false[/code], any [CanvasItem] under this [CanvasLayer] will be hidden.
+			Unlike [member CanvasItem.visible], visibility of a [CanvasLayer] isn't propagated to underlying layers.
+		</member>
 	</members>
+	<signals>
+		<signal name="visibility_changed">
+			<description>
+				Emitted when visibility of the layer is changed. See [member visible].
+			</description>
+		</signal>
+	</signals>
 </class>
diff --git a/doc/classes/DisplayServer.xml b/doc/classes/DisplayServer.xml
index 8be944b105..281c218d0d 100644
--- a/doc/classes/DisplayServer.xml
+++ b/doc/classes/DisplayServer.xml
@@ -399,6 +399,14 @@
 			<description>
 			</description>
 		</method>
+		<method name="screen_get_refresh_rate" qualifiers="const">
+			<return type="float" />
+			<argument index="0" name="screen" type="int" default="-1" />
+			<description>
+				Returns the current refresh rate of the specified screen. If [code]screen[/code] is [code]SCREEN_OF_MAIN_WINDOW[/code] (the default value), a screen with the main window will be used.
+				[b]Note:[/b] Returns [code]60.0[/code] if the DisplayServer fails to find the refresh rate for the specified screen. On HTML5, [method screen_get_refresh_rate] will always return [code]60.0[/code] as there is no way to retrieve the refresh rate on that platform.
+			</description>
+		</method>
 		<method name="screen_get_scale" qualifiers="const">
 			<return type="float" />
 			<argument index="0" name="screen" type="int" default="-1" />
@@ -900,6 +908,11 @@
 			Fullscreen window mode. Note that this is not [i]exclusive[/i] fullscreen. On Windows and Linux, a borderless window is used to emulate fullscreen. On macOS, a new desktop is used to display the running project.
 			Regardless of the platform, enabling fullscreen will change the window size to match the monitor's size. Therefore, make sure your project supports [url=$DOCS_URL/tutorials/rendering/multiple_resolutions.html]multiple resolutions[/url] when enabling fullscreen mode.
 		</constant>
+		<constant name="WINDOW_MODE_EXCLUSIVE_FULLSCREEN" value="4" enum="WindowMode">
+			Exclusive fullscreen window mode. This mode is implemented on Windows only. On other platforms, it is equivalent to [constant WINDOW_MODE_FULLSCREEN].
+			Only one window in exclusive fullscreen mode can be visible on a given screen at a time. If multiple windows are in exclusive fullscreen mode for the same screen, the last one being set to this mode takes precedence.
+			Regardless of the platform, enabling fullscreen will change the window size to match the monitor's size. Therefore, make sure your project supports [url=$DOCS_URL/tutorials/rendering/multiple_resolutions.html]multiple resolutions[/url] when enabling fullscreen mode.
+		</constant>
 		<constant name="WINDOW_FLAG_RESIZE_DISABLED" value="0" enum="WindowFlags">
 		</constant>
 		<constant name="WINDOW_FLAG_BORDERLESS" value="1" enum="WindowFlags">
diff --git a/doc/classes/Image.xml b/doc/classes/Image.xml
index 60d4b664d2..2f4a0079c9 100644
--- a/doc/classes/Image.xml
+++ b/doc/classes/Image.xml
@@ -88,6 +88,15 @@
 			<description>
 			</description>
 		</method>
+		<method name="compute_image_metrics">
+			<return type="Dictionary" />
+			<argument index="0" name="compared_image" type="Image" />
+			<argument index="1" name="use_luma" type="bool" />
+			<description>
+				Compute image metrics on the current image and the compared image.
+				The dictionary contains [code]max[/code], [code]mean[/code], [code]mean_squared[/code], [code]root_mean_squared[/code] and [code]peak_snr[/code].
+			</description>
+		</method>
 		<method name="convert">
 			<return type="void" />
 			<argument index="0" name="format" type="int" enum="Image.Format" />
diff --git a/doc/classes/MultiplayerAPI.xml b/doc/classes/MultiplayerAPI.xml
index e0da08f5bd..426d902983 100644
--- a/doc/classes/MultiplayerAPI.xml
+++ b/doc/classes/MultiplayerAPI.xml
@@ -79,8 +79,6 @@
 		<member name="refuse_new_connections" type="bool" setter="set_refuse_new_connections" getter="is_refusing_new_connections" default="false">
 			If [code]true[/code], the MultiplayerAPI's [member multiplayer_peer] refuses new incoming connections.
 		</member>
-		<member name="replicator" type="MultiplayerReplicator" setter="" getter="get_replicator">
-		</member>
 		<member name="root_node" type="Node" setter="set_root_node" getter="get_root_node">
 			The root node to use for RPCs. Instead of an absolute path, a relative path will be used to find the node upon which the RPC should be executed.
 			This effectively allows to have different branches of the scene tree to be managed by different MultiplayerAPI, allowing for example to run both client and server in the same scene.
diff --git a/doc/classes/MultiplayerReplicator.xml b/doc/classes/MultiplayerReplicator.xml
deleted file mode 100644
index c2e93ddeab..0000000000
--- a/doc/classes/MultiplayerReplicator.xml
+++ /dev/null
@@ -1,191 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<class name="MultiplayerReplicator" inherits="Object" version="4.0">
-	<brief_description>
-	</brief_description>
-	<description>
-	</description>
-	<tutorials>
-	</tutorials>
-	<methods>
-		<method name="decode_state">
-			<return type="int" enum="Error" />
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="object" type="Object" />
-			<argument index="2" name="data" type="PackedByteArray" />
-			<argument index="3" name="initial" type="bool" default="true" />
-			<description>
-				Decode the given [code]data[/code] representing a spawnable state into [code]object[/code] using the configuration associated with the provided [code]scene_id[/code]. This function is called automatically when a client receives a server spawn for a scene with [constant REPLICATION_MODE_SERVER]. See [method spawn_config].
-				Tip: You may find this function useful in servers when parsing spawn requests from clients, or when implementing your own logic with [constant REPLICATION_MODE_CUSTOM].
-			</description>
-		</method>
-		<method name="despawn">
-			<return type="int" enum="Error" />
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="object" type="Object" />
-			<argument index="2" name="peer_id" type="int" default="0" />
-			<description>
-				Request a despawn for the scene identified by [code]scene_id[/code] to the given [code]peer_id[/code]. This will either trigger the default behavior, or invoke the custom spawn/despawn callables specified in [method spawn_config]. See [method send_despawn] for the default behavior.
-			</description>
-		</method>
-		<method name="encode_state">
-			<return type="PackedByteArray" />
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="object" type="Object" />
-			<argument index="2" name="initial" type="bool" default="true" />
-			<description>
-				Encode the given [code]object[/code] using the configuration associated with the provided [code]scene_id[/code]. This function is called automatically when the server spawns scenes with [constant REPLICATION_MODE_SERVER]. See [method spawn_config].
-				Tip: You may find this function useful when requesting spawns from clients to server, or when implementing your own logic with [constant REPLICATION_MODE_CUSTOM].
-			</description>
-		</method>
-		<method name="send_despawn">
-			<return type="int" enum="Error" />
-			<argument index="0" name="peer_id" type="int" />
-			<argument index="1" name="scene_id" type="int" />
-			<argument index="2" name="data" type="Variant" default="null" />
-			<argument index="3" name="path" type="NodePath" default="NodePath(&quot;&quot;)" />
-			<description>
-				Sends a despawn request for the scene identified by [code]scene_id[/code] to the given [code]peer_id[/code] (see [method MultiplayerPeer.set_target_peer]). If the scene is configured as [constant REPLICATION_MODE_SERVER] (see [method spawn_config]) and the request is sent by the server (see [method MultiplayerAPI.is_server]), the receiving peer(s) will automatically queue for deletion the node at [code]path[/code] and emit the signal [signal despawned]. In all other cases no deletion happens, and the signal [signal despawn_requested] is emitted instead.
-			</description>
-		</method>
-		<method name="send_spawn">
-			<return type="int" enum="Error" />
-			<argument index="0" name="peer_id" type="int" />
-			<argument index="1" name="scene_id" type="int" />
-			<argument index="2" name="data" type="Variant" default="null" />
-			<argument index="3" name="path" type="NodePath" default="NodePath(&quot;&quot;)" />
-			<description>
-				Sends a spawn request for the scene identified by [code]scene_id[/code] to the given [code]peer_id[/code] (see [method MultiplayerPeer.set_target_peer]). If the scene is configured as [constant REPLICATION_MODE_SERVER] (see [method spawn_config]) and the request is sent by the server (see [method MultiplayerAPI.is_server]), the receiving peer(s) will automatically instantiate that scene, add it to the [SceneTree] at the given [code]path[/code] and emit the signal [signal spawned]. In all other cases no instantiation happens, and the signal [signal spawn_requested] is emitted instead.
-			</description>
-		</method>
-		<method name="send_sync">
-			<return type="int" enum="Error" />
-			<argument index="0" name="peer_id" type="int" />
-			<argument index="1" name="scene_id" type="int" />
-			<argument index="2" name="data" type="PackedByteArray" />
-			<argument index="3" name="transfer_mode" type="int" enum="TransferMode" default="2" />
-			<argument index="4" name="channel" type="int" default="0" />
-			<description>
-				Sends a sync request for the instances of the scene identified by [code]scene_id[/code] to the given [code]peer_id[/code] (see [method MultiplayerPeer.set_target_peer]). This function can only be called manually when overriding the send and receive sync functions (see [method sync_config]).
-			</description>
-		</method>
-		<method name="spawn">
-			<return type="int" enum="Error" />
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="object" type="Object" />
-			<argument index="2" name="peer_id" type="int" default="0" />
-			<description>
-				Request a spawn for the scene identified by [code]scene_id[/code] to the given [code]peer_id[/code]. This will either trigger the default behavior, or invoke the custom spawn/despawn callables specified in [method spawn_config]. See [method send_spawn] for the default behavior.
-			</description>
-		</method>
-		<method name="spawn_config">
-			<return type="int" enum="Error" />
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="spawn_mode" type="int" enum="MultiplayerReplicator.ReplicationMode" />
-			<argument index="2" name="properties" type="StringName[]" default="[]" />
-			<argument index="3" name="custom_send" type="Callable" />
-			<argument index="4" name="custom_receive" type="Callable" />
-			<description>
-				Configures the MultiplayerReplicator to track instances of the [PackedScene] identified by [code]scene_id[/code] (see [method ResourceLoader.get_resource_uid]) for the purpose of network replication. When [code]mode[/code] is [constant REPLICATION_MODE_SERVER], the specified [code]properties[/code] will also be replicated to clients during the initial spawn. You can optionally specify a [code]custom_send[/code] and a [code]custom_receive[/code] to override the default behavior and customize the spawn/despawn proecess.
-				Tip: You can use a custom property in the scene main script to return a customly optimized state representation.
-			</description>
-		</method>
-		<method name="sync_all">
-			<return type="int" enum="Error" />
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="peer_id" type="int" default="0" />
-			<description>
-				Manually request a sync for all the instances of the scene identified by [code]scene_id[/code]. This function will trigger the default sync behavior, or call your send custom send callable if specified in [method sync_config].
-				[b]Note:[/b] The default implementation only allow syncing from server to clients.
-			</description>
-		</method>
-		<method name="sync_config">
-			<return type="int" enum="Error" />
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="interval" type="int" />
-			<argument index="2" name="properties" type="StringName[]" default="[]" />
-			<argument index="3" name="custom_send" type="Callable" />
-			<argument index="4" name="custom_receive" type="Callable" />
-			<description>
-				Configures the MultiplayerReplicator to sync instances of the [PackedScene] identified by [code]scene_id[/code] (see [method ResourceLoader.get_resource_uid]) for the purpose of network replication at the desired [code]interval[/code] (in milliseconds). The specified [code]properties[/code] will be part of the state sync. You can optionally specify a [code]custom_send[/code] and a [code]custom_receive[/code] to override the default behavior and customize the synchronization proecess.
-				Tip: You can use a custom property in the scene main script to return a customly optimized state representation (having a single property that returns a PackedByteArray is highly recommended when dealing with many instances).
-			</description>
-		</method>
-		<method name="track">
-			<return type="void" />
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="object" type="Object" />
-			<description>
-				Track the given [code]object[/code] as an instance of the scene identified by [code]scene_id[/code]. This object will be passed to your custom sync callables (see [method sync_config]). Tracking and untracking is automatic in [constant REPLICATION_MODE_SERVER].
-			</description>
-		</method>
-		<method name="untrack">
-			<return type="void" />
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="object" type="Object" />
-			<description>
-				Untrack the given [code]object[/code]. This object will no longer be passed to your custom sync callables (see [method sync_config]). Tracking and untracking is automatic in [constant REPLICATION_MODE_SERVER].
-			</description>
-		</method>
-	</methods>
-	<signals>
-		<signal name="despawn_requested">
-			<argument index="0" name="id" type="int" />
-			<argument index="1" name="scene_id" type="int" />
-			<argument index="2" name="parent" type="Node" />
-			<argument index="3" name="name" type="String" />
-			<argument index="4" name="data" type="PackedByteArray" />
-			<description>
-				Emitted when a network despawn request has been received from a client, or for a [PackedScene] that has been configured as [constant REPLICATION_MODE_CUSTOM].
-			</description>
-		</signal>
-		<signal name="despawned">
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="node" type="Node" />
-			<description>
-				Emitted on a client before deleting a local Node upon receiving a despawn request from the server.
-			</description>
-		</signal>
-		<signal name="replicated_instance_added">
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="node" type="Node" />
-			<description>
-				Emitted when an instance of a [PackedScene] that has been configured for networking enters the [SceneTree]. See [method spawn_config].
-			</description>
-		</signal>
-		<signal name="replicated_instance_removed">
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="node" type="Node" />
-			<description>
-				Emitted when an instance of a [PackedScene] that has been configured for networking leaves the [SceneTree]. See [method spawn_config].
-			</description>
-		</signal>
-		<signal name="spawn_requested">
-			<argument index="0" name="id" type="int" />
-			<argument index="1" name="scene_id" type="int" />
-			<argument index="2" name="parent" type="Node" />
-			<argument index="3" name="name" type="String" />
-			<argument index="4" name="data" type="PackedByteArray" />
-			<description>
-				Emitted when a network spawn request has been received from a client, or for a [PackedScene] that has been configured as [constant REPLICATION_MODE_CUSTOM].
-			</description>
-		</signal>
-		<signal name="spawned">
-			<argument index="0" name="scene_id" type="int" />
-			<argument index="1" name="node" type="Node" />
-			<description>
-				Emitted on a client after a new Node is instantiated locally and added to the SceneTree upon receiving a spawn request from the server.
-			</description>
-		</signal>
-	</signals>
-	<constants>
-		<constant name="REPLICATION_MODE_NONE" value="0" enum="ReplicationMode">
-			Used with [method spawn_config] to identify a [PackedScene] that should not be replicated.
-		</constant>
-		<constant name="REPLICATION_MODE_SERVER" value="1" enum="ReplicationMode">
-			Used with [method spawn_config] to identify a [PackedScene] that should be automatically replicated from server to clients.
-		</constant>
-		<constant name="REPLICATION_MODE_CUSTOM" value="2" enum="ReplicationMode">
-			Used with [method spawn_config] to identify a [PackedScene] that can be manually replicated among peers.
-		</constant>
-	</constants>
-</class>
diff --git a/doc/classes/MultiplayerSpawner.xml b/doc/classes/MultiplayerSpawner.xml
new file mode 100644
index 0000000000..8bfecfce41
--- /dev/null
+++ b/doc/classes/MultiplayerSpawner.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<class name="MultiplayerSpawner" inherits="Node" version="4.0">
+	<brief_description>
+	</brief_description>
+	<description>
+	</description>
+	<tutorials>
+	</tutorials>
+	<methods>
+		<method name="_spawn_custom" qualifiers="virtual">
+			<return type="Object" />
+			<argument index="0" name="data" type="Variant" />
+			<description>
+			</description>
+		</method>
+		<method name="spawn">
+			<return type="Node" />
+			<argument index="0" name="data" type="Variant" default="null" />
+			<description>
+			</description>
+		</method>
+	</methods>
+	<members>
+		<member name="auto_spawn" type="bool" setter="set_auto_spawning" getter="is_auto_spawning" default="false">
+		</member>
+		<member name="replication" type="PackedScene[]" setter="set_spawnable_scenes" getter="get_spawnable_scenes" default="[]">
+		</member>
+		<member name="spawn_limit" type="int" setter="set_spawn_limit" getter="get_spawn_limit" default="0">
+		</member>
+		<member name="spawn_path" type="NodePath" setter="set_spawn_path" getter="get_spawn_path" default="NodePath(&quot;&quot;)">
+		</member>
+	</members>
+	<signals>
+		<signal name="despawned">
+			<argument index="0" name="scene_id" type="int" />
+			<argument index="1" name="node" type="Node" />
+			<description>
+			</description>
+		</signal>
+		<signal name="spawned">
+			<argument index="0" name="scene_id" type="int" />
+			<argument index="1" name="node" type="Node" />
+			<description>
+			</description>
+		</signal>
+	</signals>
+</class>
diff --git a/doc/classes/MultiplayerSynchronizer.xml b/doc/classes/MultiplayerSynchronizer.xml
new file mode 100644
index 0000000000..242d4589a4
--- /dev/null
+++ b/doc/classes/MultiplayerSynchronizer.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<class name="MultiplayerSynchronizer" inherits="Node" version="4.0">
+	<brief_description>
+	</brief_description>
+	<description>
+	</description>
+	<tutorials>
+	</tutorials>
+	<members>
+		<member name="replication_interval" type="float" setter="set_replication_interval" getter="get_replication_interval" default="0.0">
+		</member>
+		<member name="resource" type="SceneReplicationConfig" setter="set_replication_config" getter="get_replication_config">
+		</member>
+		<member name="root_path" type="NodePath" setter="set_root_path" getter="get_root_path" default="NodePath(&quot;&quot;)">
+		</member>
+	</members>
+</class>
diff --git a/doc/classes/Node.xml b/doc/classes/Node.xml
index 9c37fe8639..89bc905e69 100644
--- a/doc/classes/Node.xml
+++ b/doc/classes/Node.xml
@@ -79,7 +79,7 @@
 			<return type="void" />
 			<description>
 				Called when the node is "ready", i.e. when both the node and its children have entered the scene tree. If the node has children, their [method _ready] callbacks get triggered first, and the parent node will receive the ready notification afterwards.
-				Corresponds to the [constant NOTIFICATION_READY] notification in [method Object._notification]. See also the [code]onready[/code] keyword for variables.
+				Corresponds to the [constant NOTIFICATION_READY] notification in [method Object._notification]. See also the [code]@onready[/code] annotation for variables.
 				Usually used for initialization. For even earlier initialization, [method Object._init] may be used. See also [method _enter_tree].
 				[b]Note:[/b] [method _ready] may be called only once for each node. After removing a node from the scene tree and adding it again, [code]_ready[/code] will not be called a second time. This can be bypassed by requesting another call with [method request_ready], which may be called anywhere before adding the node again.
 			</description>
diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml
index dc40d2fd1b..ed124d1d15 100644
--- a/doc/classes/ProjectSettings.xml
+++ b/doc/classes/ProjectSettings.xml
@@ -356,6 +356,8 @@
 		<member name="debug/gdscript/warnings/incompatible_ternary" type="bool" setter="" getter="" default="true">
 			If [code]true[/code], enables warnings when a ternary operator may emit values with incompatible types.
 		</member>
+		<member name="debug/gdscript/warnings/int_assigned_to_enum" type="bool" setter="" getter="" default="true">
+		</member>
 		<member name="debug/gdscript/warnings/integer_division" type="bool" setter="" getter="" default="true">
 			If [code]true[/code], enables warnings when dividing an integer by another integer (the decimal part will be discarded).
 		</member>
diff --git a/doc/classes/SceneReplicationConfig.xml b/doc/classes/SceneReplicationConfig.xml
new file mode 100644
index 0000000000..e846740dd3
--- /dev/null
+++ b/doc/classes/SceneReplicationConfig.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<class name="SceneReplicationConfig" inherits="Resource" version="4.0">
+	<brief_description>
+	</brief_description>
+	<description>
+	</description>
+	<tutorials>
+	</tutorials>
+	<methods>
+		<method name="add_property">
+			<return type="void" />
+			<argument index="0" name="path" type="NodePath" />
+			<argument index="1" name="index" type="int" default="-1" />
+			<description>
+			</description>
+		</method>
+		<method name="get_properties" qualifiers="const">
+			<return type="NodePath[]" />
+			<description>
+			</description>
+		</method>
+		<method name="property_get_index" qualifiers="const">
+			<return type="int" />
+			<argument index="0" name="path" type="NodePath" />
+			<description>
+			</description>
+		</method>
+		<method name="property_get_spawn">
+			<return type="bool" />
+			<argument index="0" name="path" type="NodePath" />
+			<description>
+			</description>
+		</method>
+		<method name="property_get_sync">
+			<return type="bool" />
+			<argument index="0" name="path" type="NodePath" />
+			<description>
+			</description>
+		</method>
+		<method name="property_set_spawn">
+			<return type="void" />
+			<argument index="0" name="path" type="NodePath" />
+			<argument index="1" name="enabled" type="bool" />
+			<description>
+			</description>
+		</method>
+		<method name="property_set_sync">
+			<return type="void" />
+			<argument index="0" name="path" type="NodePath" />
+			<argument index="1" name="enabled" type="bool" />
+			<description>
+			</description>
+		</method>
+		<method name="remove_property">
+			<return type="void" />
+			<argument index="0" name="path" type="NodePath" />
+			<description>
+			</description>
+		</method>
+	</methods>
+</class>
diff --git a/doc/classes/TextureButton.xml b/doc/classes/TextureButton.xml
index 476ab2d1bf..5f081b95f5 100644
--- a/doc/classes/TextureButton.xml
+++ b/doc/classes/TextureButton.xml
@@ -12,17 +12,17 @@
 		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
 	</tutorials>
 	<members>
-		<member name="expand" type="bool" setter="set_expand" getter="get_expand" default="false">
-			If [code]true[/code], the texture stretches to the edges of the node's bounding rectangle using the [member stretch_mode]. If [code]false[/code], the texture will not scale with the node.
-		</member>
 		<member name="flip_h" type="bool" setter="set_flip_h" getter="is_flipped_h" default="false">
 			If [code]true[/code], texture is flipped horizontally.
 		</member>
 		<member name="flip_v" type="bool" setter="set_flip_v" getter="is_flipped_v" default="false">
 			If [code]true[/code], texture is flipped vertically.
 		</member>
-		<member name="stretch_mode" type="int" setter="set_stretch_mode" getter="get_stretch_mode" enum="TextureButton.StretchMode" default="0">
-			Controls the texture's behavior when you resize the node's bounding rectangle, [b]only if[/b] [member expand] is [code]true[/code]. Set it to one of the [enum StretchMode] constants. See the constants to learn more.
+		<member name="ignore_texture_size" type="bool" setter="set_ignore_texture_size" getter="get_ignore_texture_size" default="false">
+			If [code]true[/code], the size of the texture won't be considered for minimum size calculation, so the [TextureButton] can be shrunk down past the texture size.
+		</member>
+		<member name="stretch_mode" type="int" setter="set_stretch_mode" getter="get_stretch_mode" enum="TextureButton.StretchMode" default="2">
+			Controls the texture's behavior when you resize the node's bounding rectangle. See the [enum StretchMode] constants for available options.
 		</member>
 		<member name="texture_click_mask" type="BitMap" setter="set_click_mask" getter="get_click_mask">
 			Pure black and white [BitMap] image to use for click detection. On the mask, white pixels represent the button's clickable area. Use it to create buttons with curved shapes.
diff --git a/doc/classes/Theme.xml b/doc/classes/Theme.xml
index d1a48fda55..b1367be263 100644
--- a/doc/classes/Theme.xml
+++ b/doc/classes/Theme.xml
@@ -1,20 +1,22 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="Theme" inherits="Resource" version="4.0">
 	<brief_description>
-		Theme for controls.
+		Theme resource for styling/skinning [Control]s and [Window]s.
 	</brief_description>
 	<description>
-		A theme for skinning controls. Controls can be skinned individually, but for complex applications, it's more practical to just create a global theme that defines everything. This theme can be applied to any [Control]; the Control and its children will automatically use it.
-		Theme resources can alternatively be loaded by writing them in a [code].theme[/code] file, see the documentation for more information.
+		A theme resource is used for styling/skinning [Control] and [Window] nodes. While individual controls can be styled using their local theme overrides (see [method Control.add_theme_color_override]), theme resources allow you to store and apply the same settings between all controls sharing the same type (e.g. style all [Button]s the same). One theme resource can be used for the entire project, but you can also set a separate theme resource to a branch of control nodes. A theme resources assigned to a control node applies to the control itself, as well as all of its direct and indirect children (as long as a chain of controls is uninterrupted).
+		Use [member ProjectSettings.gui/theme/custom] to set up a project-scope theme that will be available to every control in your project.
+		Use [member Control.theme] of any control node to set up a theme that will be available to that control and all of its direct and indirect children.
 	</description>
 	<tutorials>
 		<link title="GUI skinning">$DOCS_URL/tutorials/ui/gui_skinning.html</link>
+		<link title="Using the theme editor">$DOCS_URL/tutorials/ui/gui_using_theme_editor.html</link>
 	</tutorials>
 	<methods>
 		<method name="clear">
 			<return type="void" />
 			<description>
-				Clears all values on the theme.
+				Removes all the theme properties defined on the theme resource.
 			</description>
 		</method>
 		<method name="clear_color">
@@ -22,7 +24,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Clears the [Color] at [code]name[/code] if the theme has [code]theme_type[/code].
+				Removes the [Color] property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Fails if it doesn't exist. Use [method has_color] to check for existence.
 			</description>
 		</method>
 		<method name="clear_constant">
@@ -30,7 +33,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Clears the constant at [code]name[/code] if the theme has [code]theme_type[/code].
+				Removes the constant property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Fails if it doesn't exist. Use [method has_constant] to check for existence.
 			</description>
 		</method>
 		<method name="clear_font">
@@ -38,7 +42,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Clears the [Font] at [code]name[/code] if the theme has [code]theme_type[/code].
+				Removes the [Font] property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Fails if it doesn't exist. Use [method has_font] to check for existence.
 			</description>
 		</method>
 		<method name="clear_font_size">
@@ -46,7 +51,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Clears the font size [code]name[/code] if the theme has [code]theme_type[/code].
+				Removes the font size property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Fails if it doesn't exist. Use [method has_font_size] to check for existence.
 			</description>
 		</method>
 		<method name="clear_icon">
@@ -54,7 +60,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Clears the icon at [code]name[/code] if the theme has [code]theme_type[/code].
+				Removes the icon property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Fails if it doesn't exist. Use [method has_icon] to check for existence.
 			</description>
 		</method>
 		<method name="clear_stylebox">
@@ -62,7 +69,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Clears [StyleBox] at [code]name[/code] if the theme has [code]theme_type[/code].
+				Removes the [StyleBox] property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Fails if it doesn't exist. Use [method has_stylebox] to check for existence.
 			</description>
 		</method>
 		<method name="clear_theme_item">
@@ -71,14 +79,16 @@
 			<argument index="1" name="name" type="StringName" />
 			<argument index="2" name="theme_type" type="StringName" />
 			<description>
-				Clears the theme item of [code]data_type[/code] at [code]name[/code] if the theme has [code]theme_type[/code].
+				Removes the theme property of [code]data_type[/code] defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Fails if it doesn't exist. Use [method has_theme_item] to check for existence.
+				[b]Note:[/b] This method is analogous to calling the corresponding data type specific method, but can be used for more generalized logic.
 			</description>
 		</method>
 		<method name="clear_type_variation">
 			<return type="void" />
 			<argument index="0" name="theme_type" type="StringName" />
 			<description>
-				Unmarks [code]theme_type[/code] as being a variation of any other type.
+				Unmarks [code]theme_type[/code] as being a variation of another theme type. See [method set_type_variation].
 			</description>
 		</method>
 		<method name="get_color" qualifiers="const">
@@ -86,20 +96,21 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns the [Color] at [code]name[/code] if the theme has [code]theme_type[/code].
+				Returns the [Color] property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Returns the default color value if the property doesn't exist. Use [method has_color] to check for existence.
 			</description>
 		</method>
 		<method name="get_color_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<argument index="0" name="theme_type" type="String" />
 			<description>
-				Returns all the [Color]s as a [PackedStringArray] filled with each [Color]'s name, for use in [method get_color], if the theme has [code]theme_type[/code].
+				Returns a list of names for [Color] properties defined with [code]theme_type[/code]. Use [method get_color_type_list] to get a list of possible theme type names.
 			</description>
 		</method>
 		<method name="get_color_type_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<description>
-				Returns all the [Color] types as a [PackedStringArray] filled with unique type names, for use in [method get_color] and/or [method get_color_list].
+				Returns a list of all unique theme type names for [Color] properties. Use [method get_type_list] to get a list of all unique theme types.
 			</description>
 		</method>
 		<method name="get_constant" qualifiers="const">
@@ -107,20 +118,21 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns the constant at [code]name[/code] if the theme has [code]theme_type[/code].
+				Returns the constant property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Returns [code]0[/code] if the property doesn't exist. Use [method has_constant] to check for existence.
 			</description>
 		</method>
 		<method name="get_constant_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<argument index="0" name="theme_type" type="String" />
 			<description>
-				Returns all the constants as a [PackedStringArray] filled with each constant's name, for use in [method get_constant], if the theme has [code]theme_type[/code].
+				Returns a list of names for constant properties defined with [code]theme_type[/code]. Use [method get_constant_type_list] to get a list of possible theme type names.
 			</description>
 		</method>
 		<method name="get_constant_type_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<description>
-				Returns all the constant types as a [PackedStringArray] filled with unique type names, for use in [method get_constant] and/or [method get_constant_list].
+				Returns a list of all unique theme type names for constant properties. Use [method get_type_list] to get a list of all unique theme types.
 			</description>
 		</method>
 		<method name="get_font" qualifiers="const">
@@ -128,14 +140,16 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns the [Font] at [code]name[/code] if the theme has [code]theme_type[/code].
+				Returns the [Font] property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Returns the default theme font if the property doesn't exist and the default theme font is set up (see [member default_font]). Use [method has_font] to check for existence of the property and [method has_default_font] to check for existence of the default theme font.
+				Returns the engine fallback font value, if neither exist.
 			</description>
 		</method>
 		<method name="get_font_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<argument index="0" name="theme_type" type="String" />
 			<description>
-				Returns all the [Font]s as a [PackedStringArray] filled with each [Font]'s name, for use in [method get_font], if the theme has [code]theme_type[/code].
+				Returns a list of names for [Font] properties defined with [code]theme_type[/code]. Use [method get_font_type_list] to get a list of possible theme type names.
 			</description>
 		</method>
 		<method name="get_font_size" qualifiers="const">
@@ -143,26 +157,28 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns the font size at [code]name[/code] if the theme has [code]theme_type[/code].
+				Returns the font size property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Returns the default theme font size if the property doesn't exist and the default theme font size is set up (see [member default_font_size]). Use [method has_font_size] to check for existence of the property and [method has_default_font_size] to check for existence of the default theme font.
+				Returns the engine fallback font size value, if neither exist.
 			</description>
 		</method>
 		<method name="get_font_size_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<argument index="0" name="theme_type" type="String" />
 			<description>
-				Returns all the font sizes as a [PackedStringArray] filled with each font size name, for use in [method get_font_size], if the theme has [code]theme_type[/code].
+				Returns a list of names for font size properties defined with [code]theme_type[/code]. Use [method get_font_size_type_list] to get a list of possible theme type names.
 			</description>
 		</method>
 		<method name="get_font_size_type_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<description>
-				Returns all the font size types as a [PackedStringArray] filled with unique type names, for use in [method get_font_size] and/or [method get_font_size_list].
+				Returns a list of all unique theme type names for font size properties. Use [method get_type_list] to get a list of all unique theme types.
 			</description>
 		</method>
 		<method name="get_font_type_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<description>
-				Returns all the [Font] types as a [PackedStringArray] filled with unique type names, for use in [method get_font] and/or [method get_font_list].
+				Returns a list of all unique theme type names for [Font] properties. Use [method get_type_list] to get a list of all unique theme types.
 			</description>
 		</method>
 		<method name="get_icon" qualifiers="const">
@@ -170,20 +186,21 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns the icon [Texture2D] at [code]name[/code] if the theme has [code]theme_type[/code].
+				Returns the icon property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Returns the engine fallback icon value if the property doesn't exist. Use [method has_icon] to check for existence.
 			</description>
 		</method>
 		<method name="get_icon_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<argument index="0" name="theme_type" type="String" />
 			<description>
-				Returns all the icons as a [PackedStringArray] filled with each [Texture2D]'s name, for use in [method get_icon], if the theme has [code]theme_type[/code].
+				Returns a list of names for icon properties defined with [code]theme_type[/code]. Use [method get_icon_type_list] to get a list of possible theme type names.
 			</description>
 		</method>
 		<method name="get_icon_type_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<description>
-				Returns all the icon types as a [PackedStringArray] filled with unique type names, for use in [method get_icon] and/or [method get_icon_list].
+				Returns a list of all unique theme type names for icon properties. Use [method get_type_list] to get a list of all unique theme types.
 			</description>
 		</method>
 		<method name="get_stylebox" qualifiers="const">
@@ -191,22 +208,21 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns the [StyleBox] at [code]name[/code] if the theme has [code]theme_type[/code].
-				Valid [code]name[/code]s may be found using [method get_stylebox_list]. Valid [code]theme_type[/code]s may be found using [method get_stylebox_type_list].
+				Returns the [StyleBox] property defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Returns the engine fallback stylebox value if the property doesn't exist. Use [method has_stylebox] to check for existence.
 			</description>
 		</method>
 		<method name="get_stylebox_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<argument index="0" name="theme_type" type="String" />
 			<description>
-				Returns all the [StyleBox]s as a [PackedStringArray] filled with each [StyleBox]'s name, for use in [method get_stylebox], if the theme has [code]theme_type[/code].
-				Valid [code]theme_type[/code]s may be found using [method get_stylebox_type_list].
+				Returns a list of names for [StyleBox] properties defined with [code]theme_type[/code]. Use [method get_stylebox_type_list] to get a list of possible theme type names.
 			</description>
 		</method>
 		<method name="get_stylebox_type_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<description>
-				Returns all the [StyleBox] types as a [PackedStringArray] filled with unique type names, for use in [method get_stylebox] and/or [method get_stylebox_list].
+				Returns a list of all unique theme type names for [StyleBox] properties. Use [method get_type_list] to get a list of all unique theme types.
 			</description>
 		</method>
 		<method name="get_theme_item" qualifiers="const">
@@ -215,8 +231,9 @@
 			<argument index="1" name="name" type="StringName" />
 			<argument index="2" name="theme_type" type="StringName" />
 			<description>
-				Returns the theme item of [code]data_type[/code] at [code]name[/code] if the theme has [code]theme_type[/code].
-				Valid [code]name[/code]s may be found using [method get_theme_item_list] or a data type specific method. Valid [code]theme_type[/code]s may be found using [method get_theme_item_type_list] or a data type specific method.
+				Returns the theme property of [code]data_type[/code] defined by [code]name[/code] and [code]theme_type[/code], if it exists.
+				Returns the engine fallback icon value if the property doesn't exist. Use [method has_theme_item] to check for existence.
+				[b]Note:[/b] This method is analogous to calling the corresponding data type specific method, but can be used for more generalized logic.
 			</description>
 		</method>
 		<method name="get_theme_item_list" qualifiers="const">
@@ -224,35 +241,36 @@
 			<argument index="0" name="data_type" type="int" enum="Theme.DataType" />
 			<argument index="1" name="theme_type" type="String" />
 			<description>
-				Returns all the theme items of [code]data_type[/code] as a [PackedStringArray] filled with each theme items's name, for use in [method get_theme_item] or a data type specific method, if the theme has [code]theme_type[/code].
-				Valid [code]theme_type[/code]s may be found using [method get_theme_item_type_list] or a data type specific method.
+				Returns a list of names for properties of [code]data_type[/code] defined with [code]theme_type[/code]. Use [method get_theme_item_type_list] to get a list of possible theme type names.
+				[b]Note:[/b] This method is analogous to calling the corresponding data type specific method, but can be used for more generalized logic.
 			</description>
 		</method>
 		<method name="get_theme_item_type_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<argument index="0" name="data_type" type="int" enum="Theme.DataType" />
 			<description>
-				Returns all the theme items of [code]data_type[/code] types as a [PackedStringArray] filled with unique type names, for use in [method get_theme_item], [method get_theme_item_list] or data type specific methods.
+				Returns a list of all unique theme type names for [code]data_type[/code] properties. Use [method get_type_list] to get a list of all unique theme types.
+				[b]Note:[/b] This method is analogous to calling the corresponding data type specific method, but can be used for more generalized logic.
 			</description>
 		</method>
 		<method name="get_type_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<description>
-				Returns all the theme types as a [PackedStringArray] filled with unique type names, for use in other [code]get_*[/code] functions of this theme.
+				Returns a list of all unique theme type names. Use the appropriate [code]get_*_type_list[/code] method to get a list of unique theme types for a single data type.
 			</description>
 		</method>
 		<method name="get_type_variation_base" qualifiers="const">
 			<return type="StringName" />
 			<argument index="0" name="theme_type" type="StringName" />
 			<description>
-				Returns the base theme type if [code]theme_type[/code] is a valid variation type. Returns an empty string otherwise.
+				Returns the name of the base theme type if [code]theme_type[/code] is a valid variation type. Returns an empty string otherwise.
 			</description>
 		</method>
 		<method name="get_type_variation_list" qualifiers="const">
 			<return type="PackedStringArray" />
 			<argument index="0" name="base_type" type="StringName" />
 			<description>
-				Returns a list of all variation for the given [code]base_type[/code].
+				Returns a list of all type variations for the given [code]base_type[/code].
 			</description>
 		</method>
 		<method name="has_color" qualifiers="const">
@@ -260,8 +278,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns [code]true[/code] if [Color] with [code]name[/code] is in [code]theme_type[/code].
-				Returns [code]false[/code] if the theme does not have [code]theme_type[/code].
+				Returns [code]true[/code] if the [Color] property defined by [code]name[/code] and [code]theme_type[/code] exists.
+				Returns [code]false[/code] if it doesn't exist. Use [method set_color] to define it.
 			</description>
 		</method>
 		<method name="has_constant" qualifiers="const">
@@ -269,26 +287,29 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns [code]true[/code] if constant with [code]name[/code] is in [code]theme_type[/code].
-				Returns [code]false[/code] if the theme does not have [code]theme_type[/code].
+				Returns [code]true[/code] if the constant property defined by [code]name[/code] and [code]theme_type[/code] exists.
+				Returns [code]false[/code] if it doesn't exist. Use [method set_constant] to define it.
 			</description>
 		</method>
 		<method name="has_default_base_scale" qualifiers="const">
 			<return type="bool" />
 			<description>
-				Returns [code]true[/code] if this theme has a valid [member default_base_scale] value.
+				Returns [code]true[/code] if [member default_base_scale] has a valid value.
+				Returns [code]false[/code] if it doesn't. The value must be greater than [code]0.0[/code] to be considered valid.
 			</description>
 		</method>
 		<method name="has_default_font" qualifiers="const">
 			<return type="bool" />
 			<description>
-				Returns [code]true[/code] if this theme has a valid [member default_font] value.
+				Returns [code]true[/code] if [member default_font] has a valid value.
+				Returns [code]false[/code] if it doesn't.
 			</description>
 		</method>
 		<method name="has_default_font_size" qualifiers="const">
 			<return type="bool" />
 			<description>
-				Returns [code]true[/code] if this theme has a valid [member default_font_size] value.
+				Returns [code]true[/code] if [member default_font_size] has a valid value.
+				Returns [code]false[/code] if it doesn't. The value must be greater than [code]0[/code] to be considered valid.
 			</description>
 		</method>
 		<method name="has_font" qualifiers="const">
@@ -296,8 +317,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns [code]true[/code] if [Font] with [code]name[/code] is in [code]theme_type[/code].
-				Returns [code]false[/code] if the theme does not have [code]theme_type[/code].
+				Returns [code]true[/code] if the [Font] property defined by [code]name[/code] and [code]theme_type[/code] exists, or if the default theme font is set up (see [method has_default_font]).
+				Returns [code]false[/code] if neither exist. Use [method set_font] to define the property.
 			</description>
 		</method>
 		<method name="has_font_size" qualifiers="const">
@@ -305,8 +326,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns [code]true[/code] if font size with [code]name[/code] is in [code]theme_type[/code].
-				Returns [code]false[/code] if the theme does not have [code]theme_type[/code].
+				Returns [code]true[/code] if the font size property defined by [code]name[/code] and [code]theme_type[/code] exists, or if the default theme font size is set up (see [method has_default_font_size]).
+				Returns [code]false[/code] if neither exist. Use [method set_font_size] to define the property.
 			</description>
 		</method>
 		<method name="has_icon" qualifiers="const">
@@ -314,8 +335,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns [code]true[/code] if icon [Texture2D] with [code]name[/code] is in [code]theme_type[/code].
-				Returns [code]false[/code] if the theme does not have [code]theme_type[/code].
+				Returns [code]true[/code] if the icon property defined by [code]name[/code] and [code]theme_type[/code] exists.
+				Returns [code]false[/code] if it doesn't exist. Use [method set_icon] to define it.
 			</description>
 		</method>
 		<method name="has_stylebox" qualifiers="const">
@@ -323,8 +344,8 @@
 			<argument index="0" name="name" type="StringName" />
 			<argument index="1" name="theme_type" type="StringName" />
 			<description>
-				Returns [code]true[/code] if [StyleBox] with [code]name[/code] is in [code]theme_type[/code].
-				Returns [code]false[/code] if the theme does not have [code]theme_type[/code].
+				Returns [code]true[/code] if the [StyleBox] property defined by [code]name[/code] and [code]theme_type[/code] exists.
+				Returns [code]false[/code] if it doesn't exist. Use [method set_stylebox] to define it.
 			</description>
 		</method>
 		<method name="has_theme_item" qualifiers="const">
@@ -333,8 +354,9 @@
 			<argument index="1" name="name" type="StringName" />
 			<argument index="2" name="theme_type" type="StringName" />
 			<description>
-				Returns [code]true[/code] if a theme item of [code]data_type[/code] with [code]name[/code] is in [code]theme_type[/code].
-				Returns [code]false[/code] if the theme does not have [code]theme_type[/code].
+				Returns [code]true[/code] if the theme property of [code]data_type[/code] defined by [code]name[/code] and [code]theme_type[/code] exists.
+				Returns [code]false[/code] if it doesn't exist. Use [method set_theme_item] to define it.
+				[b]Note:[/b] This method is analogous to calling the corresponding data type specific method, but can be used for more generalized logic.
 			</description>
 		</method>
 		<method name="is_type_variation" qualifiers="const">
@@ -342,14 +364,14 @@
 			<argument index="0" name="theme_type" type="StringName" />
 			<argument index="1" name="base_type" type="StringName" />
 			<description>
-				Returns [code]true[/code] if [code]theme_type[/code] is marked as a variation of [code]base_type[/code] in this theme.
+				Returns [code]true[/code] if [code]theme_type[/code] is marked as a variation of [code]base_type[/code].
 			</description>
 		</method>
 		<method name="merge_with">
 			<return type="void" />
 			<argument index="0" name="other" type="Theme" />
 			<description>
-				Adds missing and overrides existing definitions with values from the [code]other[/code] [Theme].
+				Adds missing and overrides existing definitions with values from the [code]other[/code] theme resource.
 				[b]Note:[/b] This modifies the current theme. If you want to merge two themes together without modifying either one, create a new empty theme and merge the other two into it one after another.
 			</description>
 		</method>
@@ -359,7 +381,8 @@
 			<argument index="1" name="name" type="StringName" />
 			<argument index="2" name="theme_type" type="StringName" />
 			<description>
-				Renames the [Color] at [code]old_name[/code] to [code]name[/code] if the theme has [code]theme_type[/code]. If [code]name[/code] is already taken, this method fails.
+				Renames the [Color] property defined by [code]old_name[/code] and [code]theme_type[/code] to [code]name[/code], if it exists.
+				Fails if it doesn't exist, or if a similar property with the new name already exists. Use [method has_color] to check for existence, and [method clear_color] to remove the existing property.
 			</description>
 		</method>
 		<method name="rename_constant">
@@ -368,7 +391,8 @@
 			<argument index="1" name="name" type="StringName" />
 			<argument index="2" name="theme_type" type="StringName" />
 			<description>
-				Renames the constant at [code]old_name[/code] to [code]name[/code] if the theme has [code]theme_type[/code]. If [code]name[/code] is already taken, this method fails.
+				Renames the constant property defined by [code]old_name[/code] and [code]theme_type[/code] to [code]name[/code], if it exists.
+				Fails if it doesn't exist, or if a similar property with the new name already exists. Use [method has_constant] to check for existence, and [method clear_constant] to remove the existing property.
 			</description>
 		</method>
 		<method name="rename_font">
@@ -377,7 +401,8 @@
 			<argument index="1" name="name" type="StringName" />
 			<argument index="2" name="theme_type" type="StringName" />
 			<description>
-				Renames the [Font] at [code]old_name[/code] to [code]name[/code] if the theme has [code]theme_type[/code]. If [code]name[/code] is already taken, this method fails.
+				Renames the [Font] property defined by [code]old_name[/code] and [code]theme_type[/code] to [code]name[/code], if it exists.
+				Fails if it doesn't exist, or if a similar property with the new name already exists. Use [method has_font] to check for existence, and [method clear_font] to remove the existing property.
 			</description>
 		</method>
 		<method name="rename_font_size">
@@ -386,7 +411,8 @@
 			<argument index="1" name="name" type="StringName" />
 			<argument index="2" name="theme_type" type="StringName" />
 			<description>
-				Renames the font size [code]old_name[/code] to [code]name[/code] if the theme has [code]theme_type[/code]. If [code]name[/code] is already taken, this method fails.
+				Renames the font size property defined by [code]old_name[/code] and [code]theme_type[/code] to [code]name[/code], if it exists.
+				Fails if it doesn't exist, or if a similar property with the new name already exists. Use [method has_font_size] to check for existence, and [method clear_font_size] to remove the existing property.
 			</description>
 		</method>
 		<method name="rename_icon">
@@ -395,7 +421,8 @@
 			<argument index="1" name="name" type="StringName" />
 			<argument index="2" name="theme_type" type="StringName" />
 			<description>
-				Renames the icon at [code]old_name[/code] to [code]name[/code] if the theme has [code]theme_type[/code]. If [code]name[/code] is already taken, this method fails.
+				Renames the icon property defined by [code]old_name[/code] and [code]theme_type[/code] to [code]name[/code], if it exists.
+				Fails if it doesn't exist, or if a similar property with the new name already exists. Use [method has_icon] to check for existence, and [method clear_icon] to remove the existing property.
 			</description>
 		</method>
 		<method name="rename_stylebox">
@@ -404,7 +431,8 @@
 			<argument index="1" name="name" type="StringName" />
 			<argument index="2" name="theme_type" type="StringName" />
 			<description>
-				Renames [StyleBox] at [code]old_name[/code] to [code]name[/code] if the theme has [code]theme_type[/code]. If [code]name[/code] is already taken, this method fails.
+				Renames the [StyleBox] property defined by [code]old_name[/code] and [code]theme_type[/code] to [code]name[/code], if it exists.
+				Fails if it doesn't exist, or if a similar property with the new name already exists. Use [method has_stylebox] to check for existence, and [method clear_stylebox] to remove the existing property.
 			</description>
 		</method>
 		<method name="rename_theme_item">
@@ -414,7 +442,9 @@
 			<argument index="2" name="name" type="StringName" />
 			<argument index="3" name="theme_type" type="StringName" />
 			<description>
-				Renames the theme item of [code]data_type[/code] at [code]old_name[/code] to [code]name[/code] if the theme has [code]theme_type[/code]. If [code]name[/code] is already taken, this method fails.
+				Renames the theme property of [code]data_type[/code] defined by [code]old_name[/code] and [code]theme_type[/code] to [code]name[/code], if it exists.
+				Fails if it doesn't exist, or if a similar property with the new name already exists. Use [method has_theme_item] to check for existence, and [method clear_theme_item] to remove the existing property.
+				[b]Note:[/b] This method is analogous to calling the corresponding data type specific method, but can be used for more generalized logic.
 			</description>
 		</method>
 		<method name="set_color">
@@ -423,8 +453,7 @@
 			<argument index="1" name="theme_type" type="StringName" />
 			<argument index="2" name="color" type="Color" />
 			<description>
-				Sets the theme's [Color] to [code]color[/code] at [code]name[/code] in [code]theme_type[/code].
-				Creates [code]theme_type[/code] if the theme does not have it.
+				Creates or changes the value of the [Color] property defined by [code]name[/code] and [code]theme_type[/code]. Use [method clear_color] to remove the property.
 			</description>
 		</method>
 		<method name="set_constant">
@@ -433,8 +462,7 @@
 			<argument index="1" name="theme_type" type="StringName" />
 			<argument index="2" name="constant" type="int" />
 			<description>
-				Sets the theme's constant to [code]constant[/code] at [code]name[/code] in [code]theme_type[/code].
-				Creates [code]theme_type[/code] if the theme does not have it.
+				Creates or changes the value of the constant property defined by [code]name[/code] and [code]theme_type[/code]. Use [method clear_constant] to remove the property.
 			</description>
 		</method>
 		<method name="set_font">
@@ -443,8 +471,7 @@
 			<argument index="1" name="theme_type" type="StringName" />
 			<argument index="2" name="font" type="Font" />
 			<description>
-				Sets the theme's [Font] to [code]font[/code] at [code]name[/code] in [code]theme_type[/code].
-				Creates [code]theme_type[/code] if the theme does not have it.
+				Creates or changes the value of the [Font] property defined by [code]name[/code] and [code]theme_type[/code]. Use [method clear_font] to remove the property.
 			</description>
 		</method>
 		<method name="set_font_size">
@@ -453,8 +480,7 @@
 			<argument index="1" name="theme_type" type="StringName" />
 			<argument index="2" name="font_size" type="int" />
 			<description>
-				Sets the theme's font size to [code]font_size[/code] at [code]name[/code] in [code]theme_type[/code].
-				Creates [code]theme_type[/code] if the theme does not have it.
+				Creates or changes the value of the font size property defined by [code]name[/code] and [code]theme_type[/code]. Use [method clear_font_size] to remove the property.
 			</description>
 		</method>
 		<method name="set_icon">
@@ -463,8 +489,7 @@
 			<argument index="1" name="theme_type" type="StringName" />
 			<argument index="2" name="texture" type="Texture2D" />
 			<description>
-				Sets the theme's icon [Texture2D] to [code]texture[/code] at [code]name[/code] in [code]theme_type[/code].
-				Creates [code]theme_type[/code] if the theme does not have it.
+				Creates or changes the value of the icon property defined by [code]name[/code] and [code]theme_type[/code]. Use [method clear_icon] to remove the property.
 			</description>
 		</method>
 		<method name="set_stylebox">
@@ -473,8 +498,7 @@
 			<argument index="1" name="theme_type" type="StringName" />
 			<argument index="2" name="texture" type="StyleBox" />
 			<description>
-				Sets theme's [StyleBox] to [code]stylebox[/code] at [code]name[/code] in [code]theme_type[/code].
-				Creates [code]theme_type[/code] if the theme does not have it.
+				Creates or changes the value of the [StyleBox] property defined by [code]name[/code] and [code]theme_type[/code]. Use [method clear_stylebox] to remove the property.
 			</description>
 		</method>
 		<method name="set_theme_item">
@@ -484,9 +508,9 @@
 			<argument index="2" name="theme_type" type="StringName" />
 			<argument index="3" name="value" type="Variant" />
 			<description>
-				Sets the theme item of [code]data_type[/code] to [code]value[/code] at [code]name[/code] in [code]theme_type[/code].
-				Does nothing if the [code]value[/code] type does not match [code]data_type[/code].
-				Creates [code]theme_type[/code] if the theme does not have it.
+				Creates or changes the value of the theme property of [code]data_type[/code] defined by [code]name[/code] and [code]theme_type[/code]. Use [method clear_theme_item] to remove the property.
+				Fails if the [code]value[/code] type is not accepted by [code]data_type[/code].
+				[b]Note:[/b] This method is analogous to calling the corresponding data type specific method, but can be used for more generalized logic.
 			</description>
 		</method>
 		<method name="set_type_variation">
@@ -494,25 +518,25 @@
 			<argument index="0" name="theme_type" type="StringName" />
 			<argument index="1" name="base_type" type="StringName" />
 			<description>
-				Marks [code]theme_type[/code] as being a variation of [code]base_type[/code].
+				Marks [code]theme_type[/code] as a variation of [code]base_type[/code].
 				This adds [code]theme_type[/code] as a suggested option for [member Control.theme_type_variation] on a [Control] that is of the [code]base_type[/code] class.
-				Variations can also be nested, i.e. [code]base_type[/code] can be another variation. If a chain of variations ends with a [code]base_type[/code] matching a class of a [Control], the whole chain is going to be suggested as options.
-				[b]Note:[/b] Suggestions only show up if this [Theme] is set as the project default theme. See [member ProjectSettings.gui/theme/custom].
+				Variations can also be nested, i.e. [code]base_type[/code] can be another variation. If a chain of variations ends with a [code]base_type[/code] matching the class of the [Control], the whole chain is going to be suggested as options.
+				[b]Note:[/b] Suggestions only show up if this theme resource is set as the project default theme. See [member ProjectSettings.gui/theme/custom].
 			</description>
 		</method>
 	</methods>
 	<members>
 		<member name="default_base_scale" type="float" setter="set_default_base_scale" getter="get_default_base_scale" default="0.0">
-			The default base scale factor of this [Theme] resource. Used by some controls to scale their visual properties based on a global scale factor. If this value is set to [code]0.0[/code], the global scale factor is used.
+			The default base scale factor of this theme resource. Used by some controls to scale their visual properties based on the global scale factor. If this value is set to [code]0.0[/code], the global scale factor is used.
 			Use [method has_default_base_scale] to check if this value is valid.
 		</member>
 		<member name="default_font" type="Font" setter="set_default_font" getter="get_default_font">
-			The default font of this [Theme] resource. Used as a fallback value for font items defined in this theme, but having invalid values. If this value is also invalid, the global default value is used.
+			The default font of this theme resource. Used as the default value when trying to fetch a font resource that doesn't exist in this theme or is in invalid state. If the default font is also missing or invalid, the engine fallback value is used.
 			Use [method has_default_font] to check if this value is valid.
 		</member>
 		<member name="default_font_size" type="int" setter="set_default_font_size" getter="get_default_font_size" default="-1">
-			The default font size of this [Theme] resource. Used as a fallback value for font size items defined in this theme, but having invalid values. If this value is set to [code]-1[/code], the global default value is used.
-			Use [method has_default_font_size] to check if this value is valid.
+			The default font size of this theme resource. Used as the default value when trying to fetch a font size value that doesn't exist in this theme or is in invalid state. If the default font size is also missing or invalid, the engine fallback value is used.
+			Values below [code]0[/code] are invalid and can be used to unset the property. Use [method has_default_font_size] to check if this value is valid.
 		</member>
 	</members>
 	<constants>
diff --git a/doc/classes/Window.xml b/doc/classes/Window.xml
index 3bb6603646..ab8f51ced5 100644
--- a/doc/classes/Window.xml
+++ b/doc/classes/Window.xml
@@ -398,6 +398,11 @@
 			Fullscreen window mode. Note that this is not [i]exclusive[/i] fullscreen. On Windows and Linux, a borderless window is used to emulate fullscreen. On macOS, a new desktop is used to display the running project.
 			Regardless of the platform, enabling fullscreen will change the window size to match the monitor's size. Therefore, make sure your project supports [url=$DOCS_URL/tutorials/rendering/multiple_resolutions.html]multiple resolutions[/url] when enabling fullscreen mode.
 		</constant>
+		<constant name="MODE_EXCLUSIVE_FULLSCREEN" value="4" enum="Mode">
+			Exclusive fullscreen window mode. This mode is implemented on Windows only. On other platforms, it is equivalent to [constant MODE_FULLSCREEN].
+			Only one window in exclusive fullscreen mode can be visible on a given screen at a time. If multiple windows are in exclusive fullscreen mode for the same screen, the last one being set to this mode takes precedence.
+			Regardless of the platform, enabling fullscreen will change the window size to match the monitor's size. Therefore, make sure your project supports [url=$DOCS_URL/tutorials/rendering/multiple_resolutions.html]multiple resolutions[/url] when enabling fullscreen mode.
+		</constant>
 		<constant name="FLAG_RESIZE_DISABLED" value="0" enum="Flags">
 			The window's ability to be resized.
 		</constant>
diff --git a/doc/classes/bool.xml b/doc/classes/bool.xml
index 49f2d2dd7f..243d19d94f 100644
--- a/doc/classes/bool.xml
+++ b/doc/classes/bool.xml
@@ -52,7 +52,7 @@
 		[codeblocks]
 		[gdscript]
 		var _can_shoot = true
-		onready var _cool_down = $CoolDownTimer
+		@onready var _cool_down = $CoolDownTimer
 
 		func shoot():
 		    if _can_shoot and Input.is_action_pressed("shoot"):
diff --git a/editor/connections_dialog.cpp b/editor/connections_dialog.cpp
index 21aa5153d1..7c54558cd0 100644
--- a/editor/connections_dialog.cpp
+++ b/editor/connections_dialog.cpp
@@ -764,7 +764,7 @@ void ConnectionsDock::_open_connection_dialog(TreeItem &p_item) {
 	String node_name = selected_node->get_name();
 	for (int i = 0; i < node_name.length(); i++) { // TODO: Regex filter may be cleaner.
 		char32_t c = node_name[i];
-		if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_')) {
+		if (!is_ascii_identifier_char(c)) {
 			if (c == ' ') {
 				// Replace spaces with underlines.
 				c = '_';
diff --git a/editor/editor_atlas_packer.cpp b/editor/editor_atlas_packer.cpp
index b6ec5d1bad..aad32968de 100644
--- a/editor/editor_atlas_packer.cpp
+++ b/editor/editor_atlas_packer.cpp
@@ -30,6 +30,9 @@
 
 #include "editor_atlas_packer.h"
 
+#include "core/math/vector2.h"
+#include "core/math/vector2i.h"
+
 void EditorAtlasPacker::_plot_triangle(Ref<BitMap> p_bitmap, Vector2i *vertices) {
 	int width = p_bitmap->get_size().width;
 	int height = p_bitmap->get_size().height;
diff --git a/editor/editor_atlas_packer.h b/editor/editor_atlas_packer.h
index 133c516d80..169a6bead8 100644
--- a/editor/editor_atlas_packer.h
+++ b/editor/editor_atlas_packer.h
@@ -31,11 +31,12 @@
 #ifndef EDITOR_ATLAS_PACKER_H
 #define EDITOR_ATLAS_PACKER_H
 
-#include "core/math/vector2.h"
-
 #include "core/templates/vector.h"
 #include "scene/resources/bit_map.h"
 
+struct Vector2;
+struct Vector2i;
+
 class EditorAtlasPacker {
 public:
 	struct Chart {
diff --git a/editor/editor_export.cpp b/editor/editor_export.cpp
index 792897e451..9bdfb66235 100644
--- a/editor/editor_export.cpp
+++ b/editor/editor_export.cpp
@@ -818,7 +818,7 @@ Error EditorExportPlatform::export_project_files(const Ref<EditorExportPreset> &
 				int v = 0;
 				if (i * 2 < script_key.length()) {
 					char32_t ct = script_key[i * 2];
-					if (ct >= '0' && ct <= '9') {
+					if (is_digit(ct)) {
 						ct = ct - '0';
 					} else if (ct >= 'a' && ct <= 'f') {
 						ct = 10 + ct - 'a';
@@ -828,7 +828,7 @@ Error EditorExportPlatform::export_project_files(const Ref<EditorExportPreset> &
 
 				if (i * 2 + 1 < script_key.length()) {
 					char32_t ct = script_key[i * 2 + 1];
-					if (ct >= '0' && ct <= '9') {
+					if (is_digit(ct)) {
 						ct = ct - '0';
 					} else if (ct >= 'a' && ct <= 'f') {
 						ct = 10 + ct - 'a';
@@ -1215,7 +1215,7 @@ Error EditorExportPlatform::save_pack(const Ref<EditorExportPreset> &p_preset, c
 				int v = 0;
 				if (i * 2 < script_key.length()) {
 					char32_t ct = script_key[i * 2];
-					if (ct >= '0' && ct <= '9') {
+					if (is_digit(ct)) {
 						ct = ct - '0';
 					} else if (ct >= 'a' && ct <= 'f') {
 						ct = 10 + ct - 'a';
@@ -1225,7 +1225,7 @@ Error EditorExportPlatform::save_pack(const Ref<EditorExportPreset> &p_preset, c
 
 				if (i * 2 + 1 < script_key.length()) {
 					char32_t ct = script_key[i * 2 + 1];
-					if (ct >= '0' && ct <= '9') {
+					if (is_digit(ct)) {
 						ct = ct - '0';
 					} else if (ct >= 'a' && ct <= 'f') {
 						ct = 10 + ct - 'a';
diff --git a/editor/editor_help.cpp b/editor/editor_help.cpp
index dfc95fb676..96c0f3a209 100644
--- a/editor/editor_help.cpp
+++ b/editor/editor_help.cpp
@@ -2007,7 +2007,7 @@ FindBar::FindBar() {
 	hide_button = memnew(TextureButton);
 	add_child(hide_button);
 	hide_button->set_focus_mode(FOCUS_NONE);
-	hide_button->set_expand(true);
+	hide_button->set_ignore_texture_size(true);
 	hide_button->set_stretch_mode(TextureButton::STRETCH_KEEP_CENTERED);
 	hide_button->connect("pressed", callable_mp(this, &FindBar::_hide_bar));
 }
diff --git a/editor/editor_inspector.cpp b/editor/editor_inspector.cpp
index 34cace0239..425b1fc98d 100644
--- a/editor/editor_inspector.cpp
+++ b/editor/editor_inspector.cpp
@@ -1780,7 +1780,7 @@ Array EditorInspectorArray::_extract_properties_as_array(const List<PropertyInfo
 
 			int to_char_index = 0;
 			while (to_char_index < str.length()) {
-				if (str[to_char_index] < '0' || str[to_char_index] > '9') {
+				if (!is_digit(str[to_char_index])) {
 					break;
 				}
 				to_char_index++;
@@ -2512,7 +2512,7 @@ void EditorInspector::update_tree() {
 			String str = p.name.trim_prefix(array_prefix);
 			int to_char_index = 0;
 			while (to_char_index < str.length()) {
-				if (str[to_char_index] < '0' || str[to_char_index] > '9') {
+				if (!is_digit(str[to_char_index])) {
 					break;
 				}
 				to_char_index++;
diff --git a/editor/editor_locale_dialog.cpp b/editor/editor_locale_dialog.cpp
index 5c4ece7065..48a326d6d4 100644
--- a/editor/editor_locale_dialog.cpp
+++ b/editor/editor_locale_dialog.cpp
@@ -37,14 +37,6 @@
 #include "scene/gui/option_button.h"
 #include "scene/gui/tree.h"
 
-static _FORCE_INLINE_ bool is_upper_case(char32_t c) {
-	return (c >= 'A' && c <= 'Z');
-}
-
-static _FORCE_INLINE_ bool is_lower_case(char32_t c) {
-	return (c >= 'a' && c <= 'z');
-}
-
 void EditorLocaleDialog::_bind_methods() {
 	ADD_SIGNAL(MethodInfo("locale_selected", PropertyInfo(Variant::STRING, "locale")));
 }
@@ -363,16 +355,16 @@ void EditorLocaleDialog::set_locale(const String &p_locale) {
 		Vector<String> locale_elements = p_locale.split("_");
 		lang_code->set_text(locale_elements[0]);
 		if (locale_elements.size() >= 2) {
-			if (locale_elements[1].length() == 4 && is_upper_case(locale_elements[1][0]) && is_lower_case(locale_elements[1][1]) && is_lower_case(locale_elements[1][2]) && is_lower_case(locale_elements[1][3])) {
+			if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
 				script_code->set_text(locale_elements[1]);
 				advanced->set_pressed(true);
 			}
-			if (locale_elements[1].length() == 2 && is_upper_case(locale_elements[1][0]) && is_upper_case(locale_elements[1][1])) {
+			if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
 				country_code->set_text(locale_elements[1]);
 			}
 		}
 		if (locale_elements.size() >= 3) {
-			if (locale_elements[2].length() == 2 && is_upper_case(locale_elements[2][0]) && is_upper_case(locale_elements[2][1])) {
+			if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
 				country_code->set_text(locale_elements[2]);
 			} else {
 				variant_code->set_text(locale_elements[2].to_lower());
diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp
index 076a774efa..c9417ee57c 100644
--- a/editor/editor_node.cpp
+++ b/editor/editor_node.cpp
@@ -162,6 +162,7 @@
 #include "editor/plugins/path_3d_editor_plugin.h"
 #include "editor/plugins/physical_bone_3d_editor_plugin.h"
 #include "editor/plugins/polygon_2d_editor_plugin.h"
+#include "editor/plugins/replication_editor_plugin.h"
 #include "editor/plugins/resource_preloader_editor_plugin.h"
 #include "editor/plugins/root_motion_editor_plugin.h"
 #include "editor/plugins/script_editor_plugin.h"
@@ -312,7 +313,7 @@ void EditorNode::_update_scene_tabs() {
 		DisplayServer::get_singleton()->global_menu_clear("_dock");
 	}
 
-	// Get all scene names, which may be ambiguous
+	// Get all scene names, which may be ambiguous.
 	Vector<String> disambiguated_scene_names;
 	Vector<String> full_path_names;
 	for (int i = 0; i < editor_data.get_edited_scene_count(); i++) {
@@ -349,34 +350,35 @@ void EditorNode::_update_scene_tabs() {
 		DisplayServer::get_singleton()->global_menu_add_item("_dock", TTR("New Window"), callable_mp(this, &EditorNode::_global_menu_new_window));
 	}
 
-	scene_tabs->set_current_tab(editor_data.get_edited_scene());
+	if (scene_tabs->get_tab_count() > 0) {
+		scene_tabs->set_current_tab(editor_data.get_edited_scene());
+	}
 
 	if (scene_tabs->get_offset_buttons_visible()) {
-		// move add button to fixed position on the tabbar
+		// Move the add button to a fixed position.
 		if (scene_tab_add->get_parent() == scene_tabs) {
-			if (scene_tabs->is_layout_rtl()) {
-				scene_tab_add->set_position(Point2(tabbar_container->get_size().x - scene_tab_add->get_size().x, 0));
-			} else {
-				scene_tab_add->set_position(Point2(0, 0));
-			}
 			scene_tabs->remove_child(scene_tab_add);
-			tabbar_container->add_child(scene_tab_add);
-			tabbar_container->move_child(scene_tab_add, 1);
+			scene_tab_add_ph->add_child(scene_tab_add);
+			scene_tab_add->set_position(Point2());
 		}
 	} else {
-		// move add button to after last tab
-		if (scene_tab_add->get_parent() == tabbar_container) {
-			tabbar_container->remove_child(scene_tab_add);
+		// Move the add button to be after the last tab.
+		if (scene_tab_add->get_parent() == scene_tab_add_ph) {
+			scene_tab_add_ph->remove_child(scene_tab_add);
 			scene_tabs->add_child(scene_tab_add);
 		}
-		Rect2 last_tab = Rect2();
-		if (scene_tabs->get_tab_count() != 0) {
-			last_tab = scene_tabs->get_tab_rect(scene_tabs->get_tab_count() - 1);
+
+		if (scene_tabs->get_tab_count() == 0) {
+			scene_tab_add->set_position(Point2());
+			return;
 		}
+
+		Rect2 last_tab = scene_tabs->get_tab_rect(scene_tabs->get_tab_count() - 1);
+		int hsep = scene_tabs->get_theme_constant(SNAME("hseparation"));
 		if (scene_tabs->is_layout_rtl()) {
-			scene_tab_add->set_position(Point2(last_tab.get_position().x - scene_tab_add->get_size().x - 3, last_tab.get_position().y));
+			scene_tab_add->set_position(Point2(last_tab.position.x - scene_tab_add->get_size().x - hsep, last_tab.position.y));
 		} else {
-			scene_tab_add->set_position(Point2(last_tab.get_position().x + last_tab.get_size().x + 3, last_tab.get_position().y));
+			scene_tab_add->set_position(Point2(last_tab.position.x + last_tab.size.width + hsep, last_tab.position.y));
 		}
 	}
 }
@@ -624,6 +626,10 @@ void EditorNode::_notification(int p_what) {
 			editor_data.clear_edited_scenes();
 		} break;
 
+		case Control::NOTIFICATION_THEME_CHANGED: {
+			scene_tab_add_ph->set_custom_minimum_size(scene_tab_add->get_minimum_size());
+		} break;
+
 		case NOTIFICATION_READY: {
 			{
 				_initializing_addons = true;
@@ -6227,6 +6233,9 @@ EditorNode::EditorNode() {
 	tab_preview->set_position(Point2(2, 2) * EDSCALE);
 	tab_preview_panel->add_child(tab_preview);
 
+	tabbar_container = memnew(HBoxContainer);
+	srt->add_child(tabbar_container);
+
 	scene_tabs = memnew(TabBar);
 	scene_tabs->add_theme_style_override("tab_selected", gui_base->get_theme_stylebox(SNAME("SceneTabFG"), SNAME("EditorStyles")));
 	scene_tabs->add_theme_style_override("tab_unselected", gui_base->get_theme_stylebox(SNAME("SceneTabBG"), SNAME("EditorStyles")));
@@ -6244,16 +6253,26 @@ EditorNode::EditorNode() {
 	scene_tabs->connect("gui_input", callable_mp(this, &EditorNode::_scene_tab_input));
 	scene_tabs->connect("active_tab_rearranged", callable_mp(this, &EditorNode::_reposition_active_tab));
 	scene_tabs->connect("resized", callable_mp(this, &EditorNode::_update_scene_tabs));
-
-	tabbar_container = memnew(HBoxContainer);
 	scene_tabs->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	tabbar_container->add_child(scene_tabs);
 
 	scene_tabs_context_menu = memnew(PopupMenu);
 	tabbar_container->add_child(scene_tabs_context_menu);
 	scene_tabs_context_menu->connect("id_pressed", callable_mp(this, &EditorNode::_menu_option));
 
-	srt->add_child(tabbar_container);
-	tabbar_container->add_child(scene_tabs);
+	scene_tab_add = memnew(Button);
+	scene_tab_add->set_flat(true);
+	scene_tab_add->set_tooltip(TTR("Add a new scene."));
+	scene_tab_add->set_icon(gui_base->get_theme_icon(SNAME("Add"), SNAME("EditorIcons")));
+	scene_tab_add->add_theme_color_override("icon_normal_color", Color(0.6f, 0.6f, 0.6f, 0.8f));
+	scene_tabs->add_child(scene_tab_add);
+	scene_tab_add->connect("pressed", callable_mp(this, &EditorNode::_menu_option), make_binds(FILE_NEW_SCENE));
+
+	scene_tab_add_ph = memnew(Control);
+	scene_tab_add_ph->set_mouse_filter(Control::MOUSE_FILTER_IGNORE);
+	scene_tab_add_ph->set_custom_minimum_size(scene_tab_add->get_minimum_size());
+	tabbar_container->add_child(scene_tab_add_ph);
+
 	distraction_free = memnew(Button);
 	distraction_free->set_flat(true);
 	ED_SHORTCUT_AND_COMMAND("editor/distraction_free_mode", TTR("Distraction Free Mode"), KeyModifierMask::CMD | KeyModifierMask::SHIFT | Key::F11);
@@ -6263,15 +6282,7 @@ EditorNode::EditorNode() {
 	distraction_free->connect("pressed", callable_mp(this, &EditorNode::_toggle_distraction_free_mode));
 	distraction_free->set_icon(gui_base->get_theme_icon(SNAME("DistractionFree"), SNAME("EditorIcons")));
 	distraction_free->set_toggle_mode(true);
-
-	scene_tab_add = memnew(Button);
-	scene_tab_add->set_flat(true);
-	tabbar_container->add_child(scene_tab_add);
 	tabbar_container->add_child(distraction_free);
-	scene_tab_add->set_tooltip(TTR("Add a new scene."));
-	scene_tab_add->set_icon(gui_base->get_theme_icon(SNAME("Add"), SNAME("EditorIcons")));
-	scene_tab_add->add_theme_color_override("icon_normal_color", Color(0.6f, 0.6f, 0.6f, 0.8f));
-	scene_tab_add->connect("pressed", callable_mp(this, &EditorNode::_menu_option), make_binds(FILE_NEW_SCENE));
 
 	scene_root_parent = memnew(PanelContainer);
 	scene_root_parent->set_custom_minimum_size(Size2(0, 80) * EDSCALE);
@@ -7011,6 +7022,7 @@ EditorNode::EditorNode() {
 	add_editor_plugin(memnew(InputEventEditorPlugin(this)));
 	add_editor_plugin(memnew(SubViewportPreviewEditorPlugin(this)));
 	add_editor_plugin(memnew(TextControlEditorPlugin(this)));
+	add_editor_plugin(memnew(ReplicationEditorPlugin(this)));
 
 	for (int i = 0; i < EditorPlugins::get_plugin_count(); i++) {
 		add_editor_plugin(EditorPlugins::create(i, this));
diff --git a/editor/editor_node.h b/editor/editor_node.h
index 5c013a18d9..8d322a1bfd 100644
--- a/editor/editor_node.h
+++ b/editor/editor_node.h
@@ -384,6 +384,7 @@ private:
 	HBoxContainer *tabbar_container;
 	Button *distraction_free;
 	Button *scene_tab_add;
+	Control *scene_tab_add_ph;
 
 	bool scene_distraction;
 	bool script_distraction;
diff --git a/editor/editor_settings_dialog.cpp b/editor/editor_settings_dialog.cpp
index 2520d662c5..1cb95226ec 100644
--- a/editor/editor_settings_dialog.cpp
+++ b/editor/editor_settings_dialog.cpp
@@ -513,6 +513,38 @@ void EditorSettingsDialog::_shortcut_button_pressed(Object *p_item, int p_column
 	}
 }
 
+void EditorSettingsDialog::_shortcut_cell_double_clicked() {
+	// When a shortcut cell is double clicked:
+	// If the cell has children and is in the bindings column, and if its first child is editable,
+	// then uncollapse the cell, and if the first child is the only child, then edit that child.
+	// If the cell is in the bindings column and can be edited, then edit it.
+	// If the cell is in the name column, then toggle collapse.
+	const ShortcutButton edit_btn_id = EditorSettingsDialog::SHORTCUT_EDIT;
+	const int edit_btn_col = 1;
+	TreeItem *ti = shortcuts->get_selected();
+	String type = ti->get_meta("type");
+	int col = shortcuts->get_selected_column();
+	if (type == "shortcut" && col == 0) {
+		if (ti->get_first_child()) {
+			ti->set_collapsed(!ti->is_collapsed());
+		}
+	} else if (type == "shortcut" && col == 1) {
+		if (ti->get_first_child()) {
+			TreeItem *child_ti = ti->get_first_child();
+			if (child_ti->get_button_by_id(edit_btn_col, edit_btn_id) != -1) {
+				ti->set_collapsed(false);
+				if (ti->get_child_count() == 1) {
+					_shortcut_button_pressed(child_ti, edit_btn_col, edit_btn_id);
+				}
+			}
+		}
+	} else if (type == "event" && col == 1) {
+		if (ti->get_button_by_id(edit_btn_col, edit_btn_id) != -1) {
+			_shortcut_button_pressed(ti, edit_btn_col, edit_btn_id);
+		}
+	}
+}
+
 Variant EditorSettingsDialog::get_drag_data_fw(const Point2 &p_point, Control *p_from) {
 	TreeItem *selected = shortcuts->get_selected();
 
@@ -692,6 +724,7 @@ EditorSettingsDialog::EditorSettingsDialog() {
 	shortcuts->set_column_title(0, TTR("Name"));
 	shortcuts->set_column_title(1, TTR("Binding"));
 	shortcuts->connect("button_pressed", callable_mp(this, &EditorSettingsDialog::_shortcut_button_pressed));
+	shortcuts->connect("item_activated", callable_mp(this, &EditorSettingsDialog::_shortcut_cell_double_clicked));
 	tab_shortcuts->add_child(shortcuts);
 
 	shortcuts->set_drag_forwarding(this);
diff --git a/editor/editor_settings_dialog.h b/editor/editor_settings_dialog.h
index f1c4ea7770..c8858b4fcb 100644
--- a/editor/editor_settings_dialog.h
+++ b/editor/editor_settings_dialog.h
@@ -104,6 +104,7 @@ class EditorSettingsDialog : public AcceptDialog {
 
 	void _update_shortcuts();
 	void _shortcut_button_pressed(Object *p_item, int p_column, int p_idx);
+	void _shortcut_cell_double_clicked();
 
 	void _builtin_action_popup_index_pressed(int p_index);
 
diff --git a/editor/find_in_files.cpp b/editor/find_in_files.cpp
index 131ecc3b12..dd72def6ad 100644
--- a/editor/find_in_files.cpp
+++ b/editor/find_in_files.cpp
@@ -53,11 +53,6 @@ inline void pop_back(T &container) {
 	container.resize(container.size() - 1);
 }
 
-// TODO: Copied from TextEdit private, would be nice to extract it in a single place.
-static bool is_text_char(char32_t c) {
-	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
-}
-
 static bool find_next(const String &line, String pattern, int from, bool match_case, bool whole_words, int &out_begin, int &out_end) {
 	int end = from;
 
@@ -73,10 +68,10 @@ static bool find_next(const String &line, String pattern, int from, bool match_c
 		out_end = end;
 
 		if (whole_words) {
-			if (begin > 0 && is_text_char(line[begin - 1])) {
+			if (begin > 0 && (is_ascii_identifier_char(line[begin - 1]))) {
 				continue;
 			}
-			if (end < line.size() && is_text_char(line[end])) {
+			if (end < line.size() && (is_ascii_identifier_char(line[end]))) {
 				continue;
 			}
 		}
diff --git a/editor/import/resource_importer_layered_texture.cpp b/editor/import/resource_importer_layered_texture.cpp
index d63366638e..69e3311fe6 100644
--- a/editor/import/resource_importer_layered_texture.cpp
+++ b/editor/import/resource_importer_layered_texture.cpp
@@ -32,8 +32,10 @@
 
 #include "resource_importer_texture.h"
 
+#include "core/error/error_macros.h"
 #include "core/io/config_file.h"
 #include "core/io/image_loader.h"
+#include "core/object/ref_counted.h"
 #include "editor/editor_file_system.h"
 #include "editor/editor_node.h"
 #include "resource_importer_texture.h"
@@ -263,12 +265,12 @@ void ResourceImporterLayeredTexture::_save_tex(Vector<Ref<Image>> p_images, cons
 	f->store_8('L');
 
 	f->store_32(StreamTextureLayered::FORMAT_VERSION);
-	f->store_32(p_images.size()); //2d layers or 3d depth
+	f->store_32(p_images.size()); // For 2d layers or 3d depth.
 	f->store_32(mode);
 	f->store_32(0);
 
 	f->store_32(0);
-	f->store_32(mipmap_images.size()); // amount of mipmaps
+	f->store_32(mipmap_images.size()); // Adjust the amount of mipmaps.
 	f->store_32(0);
 	f->store_32(0);
 
@@ -289,7 +291,6 @@ Error ResourceImporterLayeredTexture::import(const String &p_source_file, const
 	int hdr_compression = p_options["compress/hdr_compression"];
 	int bptc_ldr = p_options["compress/bptc_ldr"];
 	bool mipmaps = p_options["mipmaps/generate"];
-	//bool mipmap_limit = p_options["mipmaps/limit"];
 
 	int channel_pack = p_options["compress/channel_pack"];
 	int hslices = (p_options.has("slices/horizontal")) ? int(p_options["slices/horizontal"]) : 0;
@@ -377,87 +378,23 @@ Error ResourceImporterLayeredTexture::import(const String &p_source_file, const
 			slices.push_back(slice);
 		}
 	}
-
-	String extension = get_save_extension();
 	Array formats_imported;
-
-	if (compress_mode == COMPRESS_VRAM_COMPRESSED) {
-		//must import in all formats, in order of priority (so platform choses the best supported one. IE, etc2 over etc).
-		//Android, GLES 2.x
-
-		bool ok_on_pc = false;
-		bool is_hdr = (image->get_format() >= Image::FORMAT_RF && image->get_format() <= Image::FORMAT_RGBE9995);
-		bool is_ldr = (image->get_format() >= Image::FORMAT_L8 && image->get_format() <= Image::FORMAT_RGB565);
-		bool can_bptc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_bptc");
-		bool can_s3tc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_s3tc");
-
-		if (can_bptc) {
-			formats_imported.push_back("bptc"); // Needs to be added anyway.
-		}
-		bool can_compress_hdr = hdr_compression > 0;
-
-		if (is_hdr && can_compress_hdr) {
-			if (used_channels == Image::USED_CHANNELS_LA || used_channels == Image::USED_CHANNELS_RGBA) {
-				//can compress hdr, but hdr with alpha is not compressible
-
-				if (hdr_compression == 2) {
-					//but user selected to compress hdr anyway, so force an alpha-less format.
-					if (image->get_format() == Image::FORMAT_RGBAF) {
-						for (int i = 0; i < slices.size(); i++) {
-							slices.write[i]->convert(Image::FORMAT_RGBF);
-						}
-
-					} else if (image->get_format() == Image::FORMAT_RGBAH) {
-						for (int i = 0; i < slices.size(); i++) {
-							slices.write[i]->convert(Image::FORMAT_RGBH);
-						}
-					}
-				} else {
-					can_compress_hdr = false;
-				}
-			}
-
-			if (can_compress_hdr) {
-				if (!can_bptc) {
-					//default to rgbe
-					if (image->get_format() != Image::FORMAT_RGBE9995) {
-						for (int i = 0; i < slices.size(); i++) {
-							slices.write[i]->convert(Image::FORMAT_RGBE9995);
-						}
-					}
-				}
-			} else {
-				can_bptc = false;
-			}
-		}
-
-		if (is_ldr && can_bptc) {
-			if (bptc_ldr == 0 || (bptc_ldr == 1 && !(used_channels == Image::USED_CHANNELS_LA || used_channels == Image::USED_CHANNELS_RGBA))) {
-				can_bptc = false;
-			}
-		}
-
-		if (can_bptc || can_s3tc) {
-			_save_tex(slices, p_save_path + ".s3tc." + extension, compress_mode, lossy, can_bptc ? Image::COMPRESS_BPTC : Image::COMPRESS_S3TC, csource, used_channels, mipmaps, false);
-			r_platform_variants->push_back("s3tc");
-			formats_imported.push_back("s3tc");
-			ok_on_pc = true;
-		}
-
-		if (ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_etc2")) {
-			_save_tex(slices, p_save_path + ".etc2." + extension, compress_mode, lossy, Image::COMPRESS_ETC2, csource, used_channels, mipmaps, true);
-			r_platform_variants->push_back("etc2");
-			formats_imported.push_back("etc2");
-		}
-
-		if (!ok_on_pc) {
-			EditorNode::add_io_error("Warning, no suitable PC VRAM compression enabled in Project Settings. This texture will not display correctly on PC.");
-		}
-	} else {
-		//import normally
-		_save_tex(slices, p_save_path + "." + extension, compress_mode, lossy, Image::COMPRESS_S3TC /* IGNORED */, csource, used_channels, mipmaps, false);
-	}
-
+	Ref<LayeredTextureImport> texture_import;
+	texture_import.instantiate();
+	texture_import->csource = &csource;
+	texture_import->save_path = p_save_path;
+	texture_import->options = p_options;
+	texture_import->platform_variants = r_platform_variants;
+	texture_import->image = image;
+	texture_import->formats_imported = formats_imported;
+	texture_import->slices = &slices;
+	texture_import->compress_mode = compress_mode;
+	texture_import->lossy = lossy;
+	texture_import->hdr_compression = hdr_compression;
+	texture_import->bptc_ldr = bptc_ldr;
+	texture_import->mipmaps = mipmaps;
+	texture_import->used_channels = used_channels;
+	_check_compress_stex(texture_import);
 	if (r_metadata) {
 		Dictionary metadata;
 		metadata["vram_texture"] = compress_mode == COMPRESS_VRAM_COMPRESSED;
@@ -537,3 +474,76 @@ ResourceImporterLayeredTexture::ResourceImporterLayeredTexture() {
 
 ResourceImporterLayeredTexture::~ResourceImporterLayeredTexture() {
 }
+
+void ResourceImporterLayeredTexture::_check_compress_stex(Ref<LayeredTextureImport> r_texture_import) {
+	String extension = get_save_extension();
+	ERR_FAIL_NULL(r_texture_import->csource);
+	if (r_texture_import->compress_mode != COMPRESS_VRAM_COMPRESSED) {
+		// Import normally.
+		_save_tex(*r_texture_import->slices, r_texture_import->save_path + "." + extension, r_texture_import->compress_mode, r_texture_import->lossy, Image::COMPRESS_S3TC /* IGNORED */, *r_texture_import->csource, r_texture_import->used_channels, r_texture_import->mipmaps, false);
+		return;
+	}
+	// Must import in all formats, in order of priority (so platform choses the best supported one. IE, etc2 over etc).
+	// Android, GLES 2.x
+
+	bool can_bptc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_bptc");
+	if (can_bptc) {
+		r_texture_import->formats_imported.push_back("bptc"); // BPTC needs to be added anyway.
+	}
+	bool can_compress_hdr = r_texture_import->hdr_compression > 0;
+	ERR_FAIL_NULL(r_texture_import->image);
+	bool is_hdr = (r_texture_import->image->get_format() >= Image::FORMAT_RF && r_texture_import->image->get_format() <= Image::FORMAT_RGBE9995);
+	bool is_ldr = (r_texture_import->image->get_format() >= Image::FORMAT_L8 && r_texture_import->image->get_format() <= Image::FORMAT_RGB565);
+	bool can_s3tc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_s3tc");
+	ERR_FAIL_NULL(r_texture_import->slices);
+	// Can compress hdr, but hdr with alpha is not compressible.
+	if (r_texture_import->hdr_compression == 2) {
+		// The user selected to compress hdr anyway, so force an alpha-less format.
+		if (r_texture_import->image->get_format() == Image::FORMAT_RGBAF) {
+			for (int i = 0; i < r_texture_import->slices->size(); i++) {
+				r_texture_import->slices->write[i]->convert(Image::FORMAT_RGBF);
+			}
+
+		} else if (r_texture_import->image->get_format() == Image::FORMAT_RGBAH) {
+			for (int i = 0; i < r_texture_import->slices->size(); i++) {
+				r_texture_import->slices->write[i]->convert(Image::FORMAT_RGBH);
+			}
+		}
+	} else {
+		can_compress_hdr = false;
+	}
+
+	if (is_hdr && can_compress_hdr) {
+		if (!can_bptc) {
+			//default to rgbe
+			if (r_texture_import->image->get_format() != Image::FORMAT_RGBE9995) {
+				for (int i = 0; i < r_texture_import->slices->size(); i++) {
+					r_texture_import->slices->write[i]->convert(Image::FORMAT_RGBE9995);
+				}
+			}
+		}
+	} else {
+		can_bptc = false;
+	}
+
+	if (is_ldr && can_bptc) {
+		if (r_texture_import->bptc_ldr == 0 || (r_texture_import->bptc_ldr == 1 && !(r_texture_import->used_channels == Image::USED_CHANNELS_LA || r_texture_import->used_channels == Image::USED_CHANNELS_RGBA))) {
+			can_bptc = false;
+		}
+	}
+	if (!(r_texture_import->used_channels == Image::USED_CHANNELS_LA || r_texture_import->used_channels == Image::USED_CHANNELS_RGBA)) {
+		if (ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_etc2")) {
+			_save_tex(*r_texture_import->slices, r_texture_import->save_path + ".etc2." + extension, r_texture_import->compress_mode, r_texture_import->lossy, Image::COMPRESS_ETC2, *r_texture_import->csource, r_texture_import->used_channels, r_texture_import->mipmaps, true);
+			r_texture_import->platform_variants->push_back("etc2");
+			r_texture_import->formats_imported.push_back("etc2");
+		}
+
+		if (can_bptc || can_s3tc) {
+			_save_tex(*r_texture_import->slices, r_texture_import->save_path + ".s3tc." + extension, r_texture_import->compress_mode, r_texture_import->lossy, can_bptc ? Image::COMPRESS_BPTC : Image::COMPRESS_S3TC, *r_texture_import->csource, r_texture_import->used_channels, r_texture_import->mipmaps, false);
+			r_texture_import->platform_variants->push_back("s3tc");
+			r_texture_import->formats_imported.push_back("s3tc");
+		}
+		return;
+	}
+	EditorNode::add_io_error("Warning, no suitable PC VRAM compression enabled in Project Settings. This texture will not display correctly on PC.");
+}
diff --git a/editor/import/resource_importer_layered_texture.h b/editor/import/resource_importer_layered_texture.h
index ee8e7dc615..edd981c63d 100644
--- a/editor/import/resource_importer_layered_texture.h
+++ b/editor/import/resource_importer_layered_texture.h
@@ -33,9 +33,30 @@
 
 #include "core/io/image.h"
 #include "core/io/resource_importer.h"
+#include "core/object/ref_counted.h"
 
 class StreamTexture2D;
 
+class LayeredTextureImport : public RefCounted {
+	GDCLASS(LayeredTextureImport, RefCounted);
+
+public:
+	Image::CompressSource *csource = nullptr;
+	String save_path;
+	Map<StringName, Variant> options;
+	List<String> *platform_variants = nullptr;
+	Ref<Image> image = nullptr;
+	Array formats_imported;
+	Vector<Ref<Image>> *slices = nullptr;
+	int compress_mode = 0;
+	float lossy = 1.0;
+	int hdr_compression = 0;
+	int bptc_ldr = 0;
+	bool mipmaps = true;
+	Image::UsedChannels used_channels = Image::USED_CHANNELS_RGBA;
+	virtual ~LayeredTextureImport() {}
+};
+
 class ResourceImporterLayeredTexture : public ResourceImporter {
 	GDCLASS(ResourceImporterLayeredTexture, ResourceImporter);
 
@@ -66,6 +87,8 @@ protected:
 	static ResourceImporterLayeredTexture *singleton;
 
 public:
+	void _check_compress_stex(Ref<LayeredTextureImport> r_texture_import);
+
 	static ResourceImporterLayeredTexture *get_singleton() { return singleton; }
 	virtual String get_importer_name() const override;
 	virtual String get_visible_name() const override;
diff --git a/editor/import/resource_importer_scene.cpp b/editor/import/resource_importer_scene.cpp
index 0fefa0f3c4..d9448dd4a9 100644
--- a/editor/import/resource_importer_scene.cpp
+++ b/editor/import/resource_importer_scene.cpp
@@ -316,7 +316,7 @@ static bool _teststr(const String &p_what, const String &p_str) {
 	String what = p_what;
 
 	//remove trailing spaces and numbers, some apps like blender add ".number" to duplicates so also compensate for this
-	while (what.length() && ((what[what.length() - 1] >= '0' && what[what.length() - 1] <= '9') || what[what.length() - 1] <= 32 || what[what.length() - 1] == '.')) {
+	while (what.length() && (is_digit(what[what.length() - 1]) || what[what.length() - 1] <= 32 || what[what.length() - 1] == '.')) {
 		what = what.substr(0, what.length() - 1);
 	}
 
@@ -336,7 +336,7 @@ static String _fixstr(const String &p_what, const String &p_str) {
 	String what = p_what;
 
 	//remove trailing spaces and numbers, some apps like blender add ".number" to duplicates so also compensate for this
-	while (what.length() && ((what[what.length() - 1] >= '0' && what[what.length() - 1] <= '9') || what[what.length() - 1] <= 32 || what[what.length() - 1] == '.')) {
+	while (what.length() && (is_digit(what[what.length() - 1]) || what[what.length() - 1] <= 32 || what[what.length() - 1] == '.')) {
 		what = what.substr(0, what.length() - 1);
 	}
 
diff --git a/editor/import/resource_importer_texture.cpp b/editor/import/resource_importer_texture.cpp
index 69c705ed5a..127cd4511e 100644
--- a/editor/import/resource_importer_texture.cpp
+++ b/editor/import/resource_importer_texture.cpp
@@ -496,11 +496,10 @@ Error ResourceImporterTexture::import(const String &p_source_file, const String
 		//must import in all formats, in order of priority (so platform choses the best supported one. IE, etc2 over etc).
 		//Android, GLES 2.x
 
-		bool ok_on_pc = false;
-		bool is_hdr = (image->get_format() >= Image::FORMAT_RF && image->get_format() <= Image::FORMAT_RGBE9995);
+		const bool is_hdr = (image->get_format() >= Image::FORMAT_RF && image->get_format() <= Image::FORMAT_RGBE9995);
 		bool is_ldr = (image->get_format() >= Image::FORMAT_L8 && image->get_format() <= Image::FORMAT_RGB565);
-		bool can_bptc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_bptc");
-		bool can_s3tc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_s3tc");
+		const bool can_bptc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_bptc");
+		const bool can_s3tc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_s3tc");
 
 		if (can_bptc) {
 			//add to the list anyway
@@ -525,29 +524,24 @@ Error ResourceImporterTexture::import(const String &p_source_file, const String
 				}
 			}
 
-			if (can_compress_hdr) {
-				if (!can_bptc) {
-					//fallback to RGBE99995
-					if (image->get_format() != Image::FORMAT_RGBE9995) {
-						image->convert(Image::FORMAT_RGBE9995);
-					}
+			if (!can_compress_hdr) {
+				//fallback to RGBE99995
+				if (image->get_format() != Image::FORMAT_RGBE9995) {
+					image->convert(Image::FORMAT_RGBE9995);
 				}
-			} else {
-				can_bptc = false;
-			}
-		}
-
-		if (is_ldr && can_bptc) {
-			if (bptc_ldr == 0 || (bptc_ldr == 1 && !has_alpha)) {
-				can_bptc = false;
 			}
 		}
 
+		bool ok_on_pc = false;
 		if (can_bptc || can_s3tc) {
-			_save_stex(image, p_save_path + ".s3tc.stex", compress_mode, lossy, can_bptc ? Image::COMPRESS_BPTC : Image::COMPRESS_S3TC, mipmaps, stream, detect_3d, detect_roughness, detect_normal, force_normal, srgb_friendly_pack, false, mipmap_limit, normal_image, roughness_channel);
+			ok_on_pc = true;
+			Image::CompressMode image_compress_mode = Image::COMPRESS_BPTC;
+			if (!bptc_ldr && can_s3tc && is_ldr) {
+				image_compress_mode = Image::COMPRESS_S3TC;
+			}
+			_save_stex(image, p_save_path + ".s3tc.stex", compress_mode, lossy, image_compress_mode, mipmaps, stream, detect_3d, detect_roughness, detect_normal, force_normal, srgb_friendly_pack, false, mipmap_limit, normal_image, roughness_channel);
 			r_platform_variants->push_back("s3tc");
 			formats_imported.push_back("s3tc");
-			ok_on_pc = true;
 		}
 
 		if (ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_etc2")) {
diff --git a/editor/plugins/editor_preview_plugins.cpp b/editor/plugins/editor_preview_plugins.cpp
index cef505181a..7e0019faac 100644
--- a/editor/plugins/editor_preview_plugins.cpp
+++ b/editor/plugins/editor_preview_plugins.cpp
@@ -462,10 +462,6 @@ EditorMaterialPreviewPlugin::~EditorMaterialPreviewPlugin() {
 
 ///////////////////////////////////////////////////////////////////////////
 
-static bool _is_text_char(char32_t c) {
-	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
-}
-
 bool EditorScriptPreviewPlugin::handles(const String &p_type) const {
 	return ClassDB::is_parent_class(p_type, "Script");
 }
@@ -538,15 +534,15 @@ Ref<Texture2D> EditorScriptPreviewPlugin::generate(const RES &p_from, const Size
 				if (in_comment) {
 					color = comment_color;
 				} else {
-					if (c != '_' && ((c >= '!' && c <= '/') || (c >= ':' && c <= '@') || (c >= '[' && c <= '`') || (c >= '{' && c <= '~') || c == '\t')) {
+					if (is_symbol(c)) {
 						//make symbol a little visible
 						color = symbol_color;
 						in_control_flow_keyword = false;
 						in_keyword = false;
-					} else if (!prev_is_text && _is_text_char(c)) {
+					} else if (!prev_is_text && is_ascii_identifier_char(c)) {
 						int pos = i;
 
-						while (_is_text_char(code[pos])) {
+						while (is_ascii_identifier_char(code[pos])) {
 							pos++;
 						}
 						String word = code.substr(i, pos - i);
@@ -556,7 +552,7 @@ Ref<Texture2D> EditorScriptPreviewPlugin::generate(const RES &p_from, const Size
 							in_keyword = true;
 						}
 
-					} else if (!_is_text_char(c)) {
+					} else if (!is_ascii_identifier_char(c)) {
 						in_keyword = false;
 					}
 
@@ -571,7 +567,7 @@ Ref<Texture2D> EditorScriptPreviewPlugin::generate(const RES &p_from, const Size
 				img->set_pixel(col, y0 + line * 2, bg_color.blend(ul));
 				img->set_pixel(col, y0 + line * 2 + 1, color);
 
-				prev_is_text = _is_text_char(c);
+				prev_is_text = is_ascii_identifier_char(c);
 			}
 			col++;
 		} else {
diff --git a/editor/plugins/replication_editor_plugin.cpp b/editor/plugins/replication_editor_plugin.cpp
new file mode 100644
index 0000000000..93f4a853f3
--- /dev/null
+++ b/editor/plugins/replication_editor_plugin.cpp
@@ -0,0 +1,390 @@
+/*************************************************************************/
+/*  replication_editor_plugin.cpp                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "replication_editor_plugin.h"
+
+#include "editor/editor_scale.h"
+#include "editor/inspector_dock.h"
+#include "scene/gui/dialogs.h"
+#include "scene/gui/tree.h"
+#include "scene/multiplayer/multiplayer_synchronizer.h"
+
+/// ReplicationEditor
+ReplicationEditor::ReplicationEditor(EditorNode *p_editor) {
+	editor = p_editor;
+	set_v_size_flags(SIZE_EXPAND_FILL);
+	set_custom_minimum_size(Size2(0, 200) * EDSCALE);
+
+	delete_dialog = memnew(ConfirmationDialog);
+	delete_dialog->connect("cancelled", callable_mp(this, &ReplicationEditor::_dialog_closed), varray(false));
+	delete_dialog->connect("confirmed", callable_mp(this, &ReplicationEditor::_dialog_closed), varray(true));
+	add_child(delete_dialog);
+
+	error_dialog = memnew(AcceptDialog);
+	error_dialog->get_ok_button()->set_text(TTR("Close"));
+	error_dialog->set_title(TTR("Error!"));
+	add_child(error_dialog);
+
+	VBoxContainer *vb = memnew(VBoxContainer);
+	vb->set_v_size_flags(SIZE_EXPAND_FILL);
+	add_child(vb);
+
+	HBoxContainer *hb = memnew(HBoxContainer);
+	vb->add_child(hb);
+	np_line_edit = memnew(LineEdit);
+	np_line_edit->set_placeholder(":property");
+	np_line_edit->set_h_size_flags(SIZE_EXPAND_FILL);
+	hb->add_child(np_line_edit);
+	add_button = memnew(Button);
+	add_button->connect("pressed", callable_mp(this, &ReplicationEditor::_add_pressed));
+	add_button->set_text(TTR("Add"));
+	hb->add_child(add_button);
+
+	tree = memnew(Tree);
+	tree->set_hide_root(true);
+	tree->set_columns(4);
+	tree->set_column_titles_visible(true);
+	tree->set_column_title(0, TTR("Properties"));
+	tree->set_column_expand(0, true);
+	tree->set_column_title(1, TTR("Spawn"));
+	tree->set_column_expand(1, false);
+	tree->set_column_custom_minimum_width(1, 100);
+	tree->set_column_title(2, TTR("Sync"));
+	tree->set_column_custom_minimum_width(2, 100);
+	tree->set_column_expand(2, false);
+	tree->set_column_expand(3, false);
+	tree->create_item();
+	tree->connect("button_pressed", callable_mp(this, &ReplicationEditor::_tree_button_pressed));
+	tree->connect("item_edited", callable_mp(this, &ReplicationEditor::_tree_item_edited));
+	tree->set_v_size_flags(SIZE_EXPAND_FILL);
+	vb->add_child(tree);
+}
+
+void ReplicationEditor::_bind_methods() {
+	ClassDB::bind_method(D_METHOD("_update_config"), &ReplicationEditor::_update_config);
+	ClassDB::bind_method(D_METHOD("_update_checked", "property", "column", "checked"), &ReplicationEditor::_update_checked);
+	ADD_SIGNAL(MethodInfo("keying_changed"));
+}
+
+void ReplicationEditor::_notification(int p_what) {
+	if (p_what == NOTIFICATION_ENTER_TREE || p_what == EditorSettings::NOTIFICATION_EDITOR_SETTINGS_CHANGED) {
+		add_theme_style_override("panel", editor->get_gui_base()->get_theme_stylebox(SNAME("panel"), SNAME("Panel")));
+	} else if (p_what == NOTIFICATION_VISIBILITY_CHANGED) {
+		update_keying();
+	}
+}
+
+void ReplicationEditor::_add_pressed() {
+	if (!current) {
+		error_dialog->set_text(TTR("Please select a MultiplayerSynchronizer first."));
+		error_dialog->popup_centered();
+		return;
+	}
+	if (current->get_root_path().is_empty()) {
+		error_dialog->set_text(TTR("The MultiplayerSynchronizer needs a root path."));
+		error_dialog->popup_centered();
+		return;
+	}
+	String np_text = np_line_edit->get_text();
+	if (np_text.find(":") == -1) {
+		np_text = ":" + np_text;
+	}
+	NodePath prop = NodePath(np_text);
+	if (prop.is_empty()) {
+		return;
+	}
+	UndoRedo *undo_redo = editor->get_undo_redo();
+	undo_redo->create_action(TTR("Add property"));
+	config = current->get_replication_config();
+	if (config.is_null()) {
+		config.instantiate();
+		current->set_replication_config(config);
+		undo_redo->add_do_method(current, "set_replication_config", config);
+		undo_redo->add_undo_method(current, "set_replication_config", Ref<SceneReplicationConfig>());
+		_update_config();
+	}
+	undo_redo->add_do_method(config.ptr(), "add_property", prop);
+	undo_redo->add_undo_method(config.ptr(), "remove_property", prop);
+	undo_redo->add_do_method(this, "_update_config");
+	undo_redo->add_undo_method(this, "_update_config");
+	undo_redo->commit_action();
+}
+
+void ReplicationEditor::_tree_item_edited() {
+	TreeItem *ti = tree->get_edited();
+	if (!ti || config.is_null()) {
+		return;
+	}
+	int column = tree->get_edited_column();
+	ERR_FAIL_COND(column < 1 || column > 2);
+	const NodePath prop = ti->get_metadata(0);
+	UndoRedo *undo_redo = editor->get_undo_redo();
+	bool value = ti->is_checked(column);
+	String method;
+	if (column == 1) {
+		undo_redo->create_action(TTR("Set spawn property"));
+		method = "property_set_spawn";
+	} else {
+		undo_redo->create_action(TTR("Set sync property"));
+		method = "property_set_sync";
+	}
+	undo_redo->add_do_method(config.ptr(), method, prop, value);
+	undo_redo->add_undo_method(config.ptr(), method, prop, !value);
+	undo_redo->add_do_method(this, "_update_checked", prop, column, value);
+	undo_redo->add_undo_method(this, "_update_checked", prop, column, !value);
+	undo_redo->commit_action();
+}
+
+void ReplicationEditor::_tree_button_pressed(Object *p_item, int p_column, int p_id) {
+	TreeItem *ti = Object::cast_to<TreeItem>(p_item);
+	if (!ti) {
+		return;
+	}
+	deleting = ti->get_metadata(0);
+	delete_dialog->set_text(TTR("Delete Property?") + "\n\"" + ti->get_text(0) + "\"");
+	delete_dialog->popup_centered();
+}
+
+void ReplicationEditor::_dialog_closed(bool p_confirmed) {
+	if (deleting.is_empty() || config.is_null()) {
+		return;
+	}
+	if (p_confirmed) {
+		const NodePath prop = deleting;
+		int idx = config->property_get_index(prop);
+		bool spawn = config->property_get_spawn(prop);
+		bool sync = config->property_get_sync(prop);
+		UndoRedo *undo_redo = editor->get_undo_redo();
+		undo_redo->create_action(TTR("Remove Property"));
+		undo_redo->add_do_method(config.ptr(), "remove_property", prop);
+		undo_redo->add_undo_method(config.ptr(), "add_property", prop, idx);
+		undo_redo->add_undo_method(config.ptr(), "property_set_spawn", prop, spawn);
+		undo_redo->add_undo_method(config.ptr(), "property_set_sync", prop, sync);
+		undo_redo->add_do_method(this, "_update_config");
+		undo_redo->add_undo_method(this, "_update_config");
+		undo_redo->commit_action();
+	}
+	deleting = NodePath();
+}
+
+void ReplicationEditor::_update_checked(const NodePath &p_prop, int p_column, bool p_checked) {
+	if (!tree->get_root()) {
+		return;
+	}
+	TreeItem *ti = tree->get_root()->get_first_child();
+	while (ti) {
+		if (ti->get_metadata(0).operator NodePath() == p_prop) {
+			ti->set_checked(p_column, p_checked);
+			return;
+		}
+		ti = ti->get_next();
+	}
+}
+
+void ReplicationEditor::update_keying() {
+	/// TODO make keying usable.
+#if 0
+	bool keying_enabled = false;
+	EditorHistory *editor_history = EditorNode::get_singleton()->get_editor_history();
+	if (is_visible_in_tree() && config.is_valid() && editor_history->get_path_size() > 0) {
+		Object *obj = ObjectDB::get_instance(editor_history->get_path_object(0));
+		keying_enabled = Object::cast_to<Node>(obj) != nullptr;
+	}
+
+	if (keying_enabled == keying) {
+		return;
+	}
+
+	keying = keying_enabled;
+	emit_signal(SNAME("keying_changed"));
+#endif
+}
+
+void ReplicationEditor::_update_config() {
+	deleting = NodePath();
+	tree->clear();
+	tree->create_item();
+	if (!config.is_valid()) {
+		update_keying();
+		return;
+	}
+	TypedArray<NodePath> props = config->get_properties();
+	for (int i = 0; i < props.size(); i++) {
+		const NodePath path = props[i];
+		_add_property(path, config->property_get_spawn(path), config->property_get_sync(path));
+	}
+	update_keying();
+}
+
+void ReplicationEditor::edit(MultiplayerSynchronizer *p_sync) {
+	if (current == p_sync) {
+		return;
+	}
+	current = p_sync;
+	if (current) {
+		config = current->get_replication_config();
+	} else {
+		config.unref();
+	}
+	_update_config();
+}
+
+Ref<Texture2D> ReplicationEditor::_get_class_icon(const Node *p_node) {
+	if (!p_node || !has_theme_icon(p_node->get_class(), "EditorIcons")) {
+		return get_theme_icon("ImportFail", "EditorIcons");
+	}
+	return get_theme_icon(p_node->get_class(), "EditorIcons");
+}
+
+void ReplicationEditor::_add_property(const NodePath &p_property, bool p_spawn, bool p_sync) {
+	String prop = String(p_property);
+	TreeItem *item = tree->create_item();
+	item->set_selectable(0, false);
+	item->set_selectable(1, false);
+	item->set_selectable(2, false);
+	item->set_selectable(3, false);
+	item->set_text(0, prop);
+	item->set_metadata(0, prop);
+	Node *root_node = current && !current->get_root_path().is_empty() ? current->get_node(current->get_root_path()) : nullptr;
+	Ref<Texture2D> icon = _get_class_icon(root_node);
+	if (root_node) {
+		String path = prop.substr(0, prop.find(":"));
+		String subpath = prop.substr(path.size());
+		Node *node = root_node->get_node_or_null(path);
+		if (!node) {
+			node = root_node;
+		}
+		item->set_text(0, String(node->get_name()) + ":" + subpath);
+		icon = _get_class_icon(node);
+	}
+	item->set_icon(0, icon);
+	item->add_button(3, get_theme_icon("Remove", "EditorIcons"));
+	item->set_text_alignment(1, HORIZONTAL_ALIGNMENT_CENTER);
+	item->set_cell_mode(1, TreeItem::CELL_MODE_CHECK);
+	item->set_checked(1, p_spawn);
+	item->set_editable(1, true);
+	item->set_text_alignment(2, HORIZONTAL_ALIGNMENT_CENTER);
+	item->set_cell_mode(2, TreeItem::CELL_MODE_CHECK);
+	item->set_checked(2, p_sync);
+	item->set_editable(2, true);
+}
+
+void ReplicationEditor::property_keyed(const String &p_property) {
+	ERR_FAIL_COND(!current || config.is_null());
+	Node *root = current->get_node(current->get_root_path());
+	ERR_FAIL_COND(!root);
+	EditorHistory *history = editor->get_editor_history();
+	ERR_FAIL_COND(history->get_path_size() == 0);
+	Node *node = Object::cast_to<Node>(ObjectDB::get_instance(history->get_path_object(0)));
+	ERR_FAIL_COND(!node);
+	if (node->is_class("MultiplayerSynchronizer")) {
+		error_dialog->set_text(TTR("Properties of 'MultiplayerSynchronizer' cannot be configured for replication."));
+		error_dialog->popup_centered();
+		return;
+	}
+	if (history->get_path_size() > 1 || p_property.get_slice_count(":") > 1) {
+		error_dialog->set_text(TTR("Subresources cannot yet be configured for replication."));
+		error_dialog->popup_centered();
+		return;
+	}
+
+	String path = root->get_path_to(node);
+	for (int i = 1; i < history->get_path_size(); i++) {
+		String prop = history->get_path_property(i);
+		ERR_FAIL_COND(prop == "");
+		path += ":" + prop;
+	}
+	path += ":" + p_property;
+
+	NodePath prop = path;
+	UndoRedo *undo_redo = editor->get_undo_redo();
+	undo_redo->create_action(TTR("Add property"));
+	undo_redo->add_do_method(config.ptr(), "add_property", prop);
+	undo_redo->add_undo_method(config.ptr(), "remove_property", prop);
+	undo_redo->add_do_method(this, "_update_config");
+	undo_redo->add_undo_method(this, "_update_config");
+	undo_redo->commit_action();
+}
+
+/// ReplicationEditorPlugin
+ReplicationEditorPlugin::ReplicationEditorPlugin(EditorNode *p_node) {
+	editor = p_node;
+	repl_editor = memnew(ReplicationEditor(editor));
+	editor->add_bottom_panel_item(TTR("Replication"), repl_editor);
+}
+
+ReplicationEditorPlugin::~ReplicationEditorPlugin() {
+}
+
+void ReplicationEditorPlugin::_keying_changed() {
+	// TODO make lock usable.
+	//InspectorDock::get_inspector_singleton()->set_keying(repl_editor->has_keying(), this);
+}
+
+void ReplicationEditorPlugin::_property_keyed(const String &p_keyed, const Variant &p_value, bool p_advance) {
+	if (!repl_editor->has_keying()) {
+		return;
+	}
+	repl_editor->property_keyed(p_keyed);
+}
+
+void ReplicationEditorPlugin::_notification(int p_what) {
+	if (p_what == NOTIFICATION_ENTER_TREE) {
+		//Node3DEditor::get_singleton()->connect("transform_key_request", callable_mp(this, &AnimationPlayerEditorPlugin::_transform_key_request));
+		InspectorDock::get_inspector_singleton()->connect("property_keyed", callable_mp(this, &ReplicationEditorPlugin::_property_keyed));
+		repl_editor->connect("keying_changed", callable_mp(this, &ReplicationEditorPlugin::_keying_changed));
+		// TODO make lock usable.
+		//InspectorDock::get_inspector_singleton()->connect("object_inspected", callable_mp(repl_editor, &ReplicationEditor::update_keying));
+		get_tree()->connect("node_removed", callable_mp(this, &ReplicationEditorPlugin::_node_removed));
+	}
+}
+
+void ReplicationEditorPlugin::_node_removed(Node *p_node) {
+	if (p_node && p_node == repl_editor->get_current()) {
+		repl_editor->edit(nullptr);
+		if (repl_editor->is_visible_in_tree()) {
+			editor->hide_bottom_panel();
+		}
+	}
+}
+
+void ReplicationEditorPlugin::edit(Object *p_object) {
+	repl_editor->edit(Object::cast_to<MultiplayerSynchronizer>(p_object));
+}
+
+bool ReplicationEditorPlugin::handles(Object *p_object) const {
+	return p_object->is_class("MultiplayerSynchronizer");
+}
+
+void ReplicationEditorPlugin::make_visible(bool p_visible) {
+	if (p_visible) {
+		editor->make_bottom_panel_item_visible(repl_editor);
+	}
+}
diff --git a/editor/plugins/replication_editor_plugin.h b/editor/plugins/replication_editor_plugin.h
new file mode 100644
index 0000000000..049eda99cc
--- /dev/null
+++ b/editor/plugins/replication_editor_plugin.h
@@ -0,0 +1,108 @@
+/*************************************************************************/
+/*  replication_editor_plugin.h                                          */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef REPLICATION_EDITOR_PLUGIN_H
+#define REPLICATION_EDITOR_PLUGIN_H
+
+#include "editor/editor_node.h"
+#include "editor/editor_plugin.h"
+#include "scene/resources/scene_replication_config.h"
+
+class ConfirmationDialog;
+class MultiplayerSynchronizer;
+class Tree;
+
+class ReplicationEditor : public VBoxContainer {
+	GDCLASS(ReplicationEditor, VBoxContainer);
+
+private:
+	EditorNode *editor;
+	MultiplayerSynchronizer *current = nullptr;
+
+	AcceptDialog *error_dialog = nullptr;
+	ConfirmationDialog *delete_dialog = nullptr;
+	Button *add_button = nullptr;
+	LineEdit *np_line_edit = nullptr;
+
+	Ref<SceneReplicationConfig> config;
+	NodePath deleting;
+	Tree *tree;
+	bool keying = false;
+
+	Ref<Texture2D> _get_class_icon(const Node *p_node);
+
+	void _add_pressed();
+	void _tree_item_edited();
+	void _tree_button_pressed(Object *p_item, int p_column, int p_id);
+	void _update_checked(const NodePath &p_prop, int p_column, bool p_checked);
+	void _update_config();
+	void _dialog_closed(bool p_confirmed);
+	void _add_property(const NodePath &p_property, bool p_spawn = true, bool p_sync = true);
+
+protected:
+	static void _bind_methods();
+
+	void _notification(int p_what);
+
+public:
+	void update_keying();
+	void edit(MultiplayerSynchronizer *p_object);
+	bool has_keying() const { return keying; }
+	MultiplayerSynchronizer *get_current() const { return current; }
+	void property_keyed(const String &p_property);
+
+	ReplicationEditor(EditorNode *p_node);
+	~ReplicationEditor() {}
+};
+
+class ReplicationEditorPlugin : public EditorPlugin {
+	GDCLASS(ReplicationEditorPlugin, EditorPlugin);
+
+private:
+	EditorNode *editor;
+	ReplicationEditor *repl_editor;
+
+	void _node_removed(Node *p_node);
+	void _keying_changed();
+	void _property_keyed(const String &p_keyed, const Variant &p_value, bool p_advance);
+
+protected:
+	void _notification(int p_what);
+
+public:
+	virtual void edit(Object *p_object) override;
+	virtual bool handles(Object *p_object) const override;
+	virtual void make_visible(bool p_visible) override;
+
+	ReplicationEditorPlugin(EditorNode *p_node);
+	~ReplicationEditorPlugin();
+};
+
+#endif // REPLICATION_EDITOR_PLUGIN_H
diff --git a/editor/plugins/tiles/tile_map_editor.cpp b/editor/plugins/tiles/tile_map_editor.cpp
index 89027b174d..6e3724ead9 100644
--- a/editor/plugins/tiles/tile_map_editor.cpp
+++ b/editor/plugins/tiles/tile_map_editor.cpp
@@ -2337,7 +2337,7 @@ Map<Vector2i, TileMapCell> TileMapEditorTerrainsPlugin::_draw_terrains(const Map
 	bool to_replace_modified = true;
 	while (to_replace_modified) {
 		// Get the constraints from the removed cells.
-		removed_cells_constraints_set = tile_map->get_terrain_constraints_from_removed_cells_list(tile_map_layer, to_replace, p_terrain_set);
+		removed_cells_constraints_set = tile_map->get_terrain_constraints_from_removed_cells_list(tile_map_layer, to_replace, p_terrain_set, false);
 
 		// Filter the sources to make sure they are in the potential_to_replace.
 		Map<TileMap::TerrainConstraint, Set<Vector2i>> per_constraint_tiles;
@@ -3735,7 +3735,7 @@ void TileMapEditor::_move_tile_map_array_element(Object *p_undo_redo, Object *p_
 			String str = pi.name.trim_prefix(p_array_prefix);
 			int to_char_index = 0;
 			while (to_char_index < str.length()) {
-				if (str[to_char_index] < '0' || str[to_char_index] > '9') {
+				if (!is_digit(str[to_char_index])) {
 					break;
 				}
 				to_char_index++;
diff --git a/editor/plugins/tiles/tile_set_editor.cpp b/editor/plugins/tiles/tile_set_editor.cpp
index be261927ee..ab355d4658 100644
--- a/editor/plugins/tiles/tile_set_editor.cpp
+++ b/editor/plugins/tiles/tile_set_editor.cpp
@@ -458,7 +458,7 @@ void TileSetEditor::_move_tile_set_array_element(Object *p_undo_redo, Object *p_
 			String str = pi.name.trim_prefix(p_array_prefix);
 			int to_char_index = 0;
 			while (to_char_index < str.length()) {
-				if (str[to_char_index] < '0' || str[to_char_index] > '9') {
+				if (!is_digit(str[to_char_index])) {
 					break;
 				}
 				to_char_index++;
diff --git a/editor/scene_tree_editor.cpp b/editor/scene_tree_editor.cpp
index c755bca64f..fcb4f5b32e 100644
--- a/editor/scene_tree_editor.cpp
+++ b/editor/scene_tree_editor.cpp
@@ -362,6 +362,17 @@ bool SceneTreeEditor::_add_nodes(Node *p_node, TreeItem *p_parent, bool p_scroll
 			}
 
 			_update_visibility_color(p_node, item);
+		} else if (p_node->is_class("CanvasLayer")) {
+			bool v = p_node->call("is_visible");
+			if (v) {
+				item->add_button(0, get_theme_icon("GuiVisibilityVisible", "EditorIcons"), BUTTON_VISIBILITY, false, TTR("Toggle Visibility"));
+			} else {
+				item->add_button(0, get_theme_icon("GuiVisibilityHidden", "EditorIcons"), BUTTON_VISIBILITY, false, TTR("Toggle Visibility"));
+			}
+
+			if (!p_node->is_connected("visibility_changed", callable_mp(this, &SceneTreeEditor::_node_visibility_changed))) {
+				p_node->connect("visibility_changed", callable_mp(this, &SceneTreeEditor::_node_visibility_changed), varray(p_node));
+			}
 		} else if (p_node->is_class("Node3D")) {
 			bool is_locked = p_node->has_meta("_edit_lock_");
 			if (is_locked) {
@@ -471,6 +482,9 @@ void SceneTreeEditor::_node_visibility_changed(Node *p_node) {
 	if (p_node->is_class("CanvasItem")) {
 		visible = p_node->call("is_visible");
 		CanvasItemEditor::get_singleton()->get_viewport_control()->update();
+	} else if (p_node->is_class("CanvasLayer")) {
+		visible = p_node->call("is_visible");
+		CanvasItemEditor::get_singleton()->get_viewport_control()->update();
 	} else if (p_node->is_class("Node3D")) {
 		visible = p_node->call("is_visible");
 	}
@@ -514,7 +528,7 @@ void SceneTreeEditor::_node_removed(Node *p_node) {
 		p_node->disconnect("script_changed", callable_mp(this, &SceneTreeEditor::_node_script_changed));
 	}
 
-	if (p_node->is_class("Node3D") || p_node->is_class("CanvasItem")) {
+	if (p_node->is_class("Node3D") || p_node->is_class("CanvasItem") || p_node->is_class("CanvasLayer")) {
 		if (p_node->is_connected("visibility_changed", callable_mp(this, &SceneTreeEditor::_node_visibility_changed))) {
 			p_node->disconnect("visibility_changed", callable_mp(this, &SceneTreeEditor::_node_visibility_changed));
 		}
diff --git a/editor/script_create_dialog.cpp b/editor/script_create_dialog.cpp
index eda5801905..cafa12c42e 100644
--- a/editor/script_create_dialog.cpp
+++ b/editor/script_create_dialog.cpp
@@ -153,7 +153,7 @@ bool ScriptCreateDialog::_validate_class(const String &p_string) {
 			}
 		}
 
-		bool valid_char = (p_string[i] >= '0' && p_string[i] <= '9') || (p_string[i] >= 'a' && p_string[i] <= 'z') || (p_string[i] >= 'A' && p_string[i] <= 'Z') || p_string[i] == '_' || p_string[i] == '.';
+		bool valid_char = is_ascii_identifier_char(p_string[i]) || p_string[i] == '.';
 
 		if (!valid_char) {
 			return false;
@@ -763,10 +763,10 @@ void ScriptCreateDialog::_update_dialog() {
 }
 
 ScriptLanguage::ScriptTemplate ScriptCreateDialog::_get_current_template() const {
-	int selected_id = template_menu->get_selected_id();
+	int selected_index = template_menu->get_selected();
 	for (const ScriptLanguage::ScriptTemplate &t : template_list) {
 		if (is_using_templates) {
-			if (t.id == selected_id) {
+			if (t.id == selected_index) {
 				return t;
 			}
 		} else {
diff --git a/modules/cvtt/SCsub b/modules/cvtt/SCsub
index e56177d6e9..1d5a7ff6a3 100644
--- a/modules/cvtt/SCsub
+++ b/modules/cvtt/SCsub
@@ -11,7 +11,16 @@ thirdparty_obj = []
 
 thirdparty_dir = "#thirdparty/cvtt/"
 thirdparty_sources = [
-    "ConvectionKernels.cpp",
+    "ConvectionKernels_API.cpp",
+    "ConvectionKernels_ETC.cpp",
+    "ConvectionKernels_BC67.cpp",
+    "ConvectionKernels_IndexSelector.cpp",
+    "ConvectionKernels_BC6H_IO.cpp",
+    "ConvectionKernels_S3TC.cpp",
+    "ConvectionKernels_BC7_PrioData.cpp",
+    "ConvectionKernels_SingleFile.cpp",
+    "ConvectionKernels_BCCommon.cpp",
+    "ConvectionKernels_Util.cpp",
 ]
 
 thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
diff --git a/modules/cvtt/image_compress_cvtt.cpp b/modules/cvtt/image_compress_cvtt.cpp
index 9e0579740b..d18340a2c8 100644
--- a/modules/cvtt/image_compress_cvtt.cpp
+++ b/modules/cvtt/image_compress_cvtt.cpp
@@ -41,7 +41,7 @@ struct CVTTCompressionJobParams {
 	bool is_hdr = false;
 	bool is_signed = false;
 	int bytes_per_pixel = 0;
-
+	cvtt::BC7EncodingPlan bc7_plan;
 	cvtt::Options options;
 };
 
@@ -116,7 +116,7 @@ static void _digest_row_task(const CVTTCompressionJobParams &p_job_params, const
 				cvtt::Kernels::EncodeBC6HU(output_blocks, input_blocks_hdr, p_job_params.options);
 			}
 		} else {
-			cvtt::Kernels::EncodeBC7(output_blocks, input_blocks_ldr, p_job_params.options);
+			cvtt::Kernels::EncodeBC7(output_blocks, input_blocks_ldr, p_job_params.options, p_job_params.bc7_plan);
 		}
 
 		unsigned int num_real_blocks = ((w - x_start) + 3) / 4;
@@ -141,7 +141,6 @@ void image_compress_cvtt(Image *p_image, float p_lossy_quality, Image::UsedChann
 	if (p_image->get_format() >= Image::FORMAT_BPTC_RGBA) {
 		return; //do not compress, already compressed
 	}
-
 	int w = p_image->get_width();
 	int h = p_image->get_height();
 
@@ -153,22 +152,8 @@ void image_compress_cvtt(Image *p_image, float p_lossy_quality, Image::UsedChann
 	}
 
 	cvtt::Options options;
-	uint32_t flags = cvtt::Flags::Fastest;
-
-	if (p_lossy_quality > 0.85) {
-		flags = cvtt::Flags::Ultra;
-	} else if (p_lossy_quality > 0.75) {
-		flags = cvtt::Flags::Better;
-	} else if (p_lossy_quality > 0.55) {
-		flags = cvtt::Flags::Default;
-	} else if (p_lossy_quality > 0.35) {
-		flags = cvtt::Flags::Fast;
-	} else if (p_lossy_quality > 0.15) {
-		flags = cvtt::Flags::Faster;
-	}
-
+	uint32_t flags = cvtt::Flags::Default;
 	flags |= cvtt::Flags::BC7_RespectPunchThrough;
-
 	if (p_channels == Image::USED_CHANNELS_RG) { //guessing this is a normal map
 		flags |= cvtt::Flags::Uniform;
 	}
@@ -215,12 +200,15 @@ void image_compress_cvtt(Image *p_image, float p_lossy_quality, Image::UsedChann
 	job_queue.job_params.is_signed = is_signed;
 	job_queue.job_params.options = options;
 	job_queue.job_params.bytes_per_pixel = is_hdr ? 6 : 4;
+	cvtt::Kernels::ConfigureBC7EncodingPlanFromQuality(job_queue.job_params.bc7_plan, 5);
 
-#ifdef NO_THREADS
 	int num_job_threads = 0;
-#else
-	int num_job_threads = OS::get_singleton()->can_use_threads() ? (OS::get_singleton()->get_processor_count() - 1) : 0;
-#endif
+	// Amdahl's law (Wikipedia)
+	// If a program needs 20 hours to complete using a single thread, but a one-hour portion of the program cannot be parallelized,
+	// therefore only the remaining 19 hours (p = 0.95) of execution time can be parallelized, then regardless of how many threads are devoted
+	// to a parallelized execution of this program, the minimum execution time cannot be less than one hour.
+	//
+	// The number of executions with different inputs can be increased while the latency is the same.
 
 	Vector<CVTTCompressionRowTask> tasks;
 
@@ -278,7 +266,6 @@ void image_compress_cvtt(Image *p_image, float p_lossy_quality, Image::UsedChann
 			memdelete(threads_wb[i]);
 		}
 	}
-
 	p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
 }
 
@@ -388,6 +375,5 @@ void image_decompress_cvtt(Image *p_image) {
 		w >>= 1;
 		h >>= 1;
 	}
-
 	p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
 }
diff --git a/modules/gdnative/gdnative/packed_arrays.cpp b/modules/gdnative/gdnative/packed_arrays.cpp
index bb6f0324a8..0c49694e0b 100644
--- a/modules/gdnative/gdnative/packed_arrays.cpp
+++ b/modules/gdnative/gdnative/packed_arrays.cpp
@@ -32,7 +32,7 @@
 
 #include "core/variant/variant.h"
 
-#include "core/math/vector2.h"
+#include "core/math/vector2i.h"
 #include "core/math/vector3i.h"
 
 static_assert(sizeof(godot_packed_byte_array) == sizeof(PackedByteArray), "PackedByteArray size mismatch");
diff --git a/modules/gdnative/gdnative/rect2.cpp b/modules/gdnative/gdnative/rect2.cpp
index f4674850e3..7e0ce76c26 100644
--- a/modules/gdnative/gdnative/rect2.cpp
+++ b/modules/gdnative/gdnative/rect2.cpp
@@ -31,6 +31,8 @@
 #include "gdnative/rect2.h"
 
 #include "core/math/rect2.h"
+#include "core/math/rect2i.h"
+#include "core/os/memory.h"
 
 static_assert(sizeof(godot_rect2) == sizeof(Rect2), "Rect2 size mismatch");
 static_assert(sizeof(godot_rect2i) == sizeof(Rect2i), "Rect2i size mismatch");
diff --git a/modules/gdnative/gdnative/transform2d.cpp b/modules/gdnative/gdnative/transform2d.cpp
index 45ba790dc1..7dc07024e5 100644
--- a/modules/gdnative/gdnative/transform2d.cpp
+++ b/modules/gdnative/gdnative/transform2d.cpp
@@ -31,6 +31,7 @@
 #include "gdnative/transform2d.h"
 
 #include "core/math/transform_2d.h"
+#include "core/os/memory.h"
 
 static_assert(sizeof(godot_transform2d) == sizeof(Transform2D), "Transform2D size mismatch");
 
diff --git a/modules/gdnative/gdnative/vector2.cpp b/modules/gdnative/gdnative/vector2.cpp
index eb8ffd74cd..a8d4281d25 100644
--- a/modules/gdnative/gdnative/vector2.cpp
+++ b/modules/gdnative/gdnative/vector2.cpp
@@ -31,6 +31,8 @@
 #include "gdnative/vector2.h"
 
 #include "core/math/vector2.h"
+#include "core/math/vector2i.h"
+#include "core/os/memory.h"
 
 static_assert(sizeof(godot_vector2) == sizeof(Vector2), "Vector2 size mismatch");
 static_assert(sizeof(godot_vector2i) == sizeof(Vector2i), "Vector2i size mismatch");
diff --git a/modules/gdscript/editor/gdscript_highlighter.cpp b/modules/gdscript/editor/gdscript_highlighter.cpp
index 2f571874ae..ac6684a29c 100644
--- a/modules/gdscript/editor/gdscript_highlighter.cpp
+++ b/modules/gdscript/editor/gdscript_highlighter.cpp
@@ -33,18 +33,6 @@
 #include "../gdscript_tokenizer.h"
 #include "editor/editor_settings.h"
 
-static bool _is_char(char32_t c) {
-	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
-}
-
-static bool _is_hex_symbol(char32_t c) {
-	return ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
-}
-
-static bool _is_bin_symbol(char32_t c) {
-	return (c == '0' || c == '1');
-}
-
 Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_line) {
 	Dictionary color_map;
 
@@ -102,7 +90,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 		color = font_color;
 		bool is_char = !is_symbol(str[j]);
 		bool is_a_symbol = is_symbol(str[j]);
-		bool is_number = (str[j] >= '0' && str[j] <= '9');
+		bool is_number = is_digit(str[j]);
 
 		/* color regions */
 		if (is_a_symbol || in_region != -1) {
@@ -241,14 +229,14 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 		}
 
 		// allow ABCDEF in hex notation
-		if (is_hex_notation && (_is_hex_symbol(str[j]) || is_number)) {
+		if (is_hex_notation && (is_hex_digit(str[j]) || is_number)) {
 			is_number = true;
 		} else {
 			is_hex_notation = false;
 		}
 
 		// disallow anything not a 0 or 1
-		if (is_bin_notation && (_is_bin_symbol(str[j]))) {
+		if (is_bin_notation && (is_binary_digit(str[j]))) {
 			is_number = true;
 		} else if (is_bin_notation) {
 			is_bin_notation = false;
@@ -270,7 +258,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 			}
 		}
 
-		if (!in_word && _is_char(str[j]) && !is_number) {
+		if (!in_word && (is_ascii_char(str[j]) || is_underscore(str[j])) && !is_number) {
 			in_word = true;
 		}
 
diff --git a/modules/gdscript/gdscript.cpp b/modules/gdscript/gdscript.cpp
index 5b40c94110..a80874d785 100644
--- a/modules/gdscript/gdscript.cpp
+++ b/modules/gdscript/gdscript.cpp
@@ -2022,8 +2022,6 @@ void GDScriptLanguage::get_reserved_words(List<String> *p_words) const {
 		"preload",
 		"signal",
 		"super",
-		"trait",
-		"yield",
 		// var
 		"const",
 		"enum",
@@ -2040,6 +2038,11 @@ void GDScriptLanguage::get_reserved_words(List<String> *p_words) const {
 		"return",
 		"match",
 		"while",
+		// These keywords are not implemented currently, but reserved for (potential) future use.
+		// We highlight them as keywords to make errors easier to understand.
+		"trait",
+		"namespace",
+		"yield",
 		nullptr
 	};
 
diff --git a/modules/gdscript/gdscript_analyzer.cpp b/modules/gdscript/gdscript_analyzer.cpp
index d11174227a..9ff52347e9 100644
--- a/modules/gdscript/gdscript_analyzer.cpp
+++ b/modules/gdscript/gdscript_analyzer.cpp
@@ -108,7 +108,7 @@ static GDScriptParser::DataType make_native_enum_type(const StringName &p_native
 	GDScriptParser::DataType type;
 	type.type_source = GDScriptParser::DataType::ANNOTATED_EXPLICIT;
 	type.kind = GDScriptParser::DataType::ENUM;
-	type.builtin_type = Variant::OBJECT;
+	type.builtin_type = Variant::INT;
 	type.is_constant = true;
 	type.is_meta_type = true;
 
@@ -650,9 +650,9 @@ void GDScriptAnalyzer::resolve_class_interface(GDScriptParser::ClassNode *p_clas
 					datatype = specified_type;
 
 					if (member.variable->initializer != nullptr) {
-						if (!is_type_compatible(datatype, member.variable->initializer->get_datatype(), true)) {
+						if (!is_type_compatible(datatype, member.variable->initializer->get_datatype(), true, member.variable->initializer)) {
 							// Try reverse test since it can be a masked subtype.
-							if (!is_type_compatible(member.variable->initializer->get_datatype(), datatype, true)) {
+							if (!is_type_compatible(member.variable->initializer->get_datatype(), datatype, true, member.variable->initializer)) {
 								push_error(vformat(R"(Value of type "%s" cannot be assigned to a variable of type "%s".)", member.variable->initializer->get_datatype().to_string(), datatype.to_string()), member.variable->initializer);
 							} else {
 								// TODO: Add warning.
@@ -1400,9 +1400,9 @@ void GDScriptAnalyzer::resolve_variable(GDScriptParser::VariableNode *p_variable
 		type.is_meta_type = false;
 
 		if (p_variable->initializer != nullptr) {
-			if (!is_type_compatible(type, p_variable->initializer->get_datatype(), true)) {
+			if (!is_type_compatible(type, p_variable->initializer->get_datatype(), true, p_variable->initializer)) {
 				// Try reverse test since it can be a masked subtype.
-				if (!is_type_compatible(p_variable->initializer->get_datatype(), type, true)) {
+				if (!is_type_compatible(p_variable->initializer->get_datatype(), type, true, p_variable->initializer)) {
 					push_error(vformat(R"(Value of type "%s" cannot be assigned to a variable of type "%s".)", p_variable->initializer->get_datatype().to_string(), type.to_string()), p_variable->initializer);
 				} else {
 					// TODO: Add warning.
@@ -1877,11 +1877,11 @@ void GDScriptAnalyzer::reduce_assignment(GDScriptParser::AssignmentNode *p_assig
 
 	if (!assignee_type.is_variant() && assigned_value_type.is_hard_type()) {
 		if (compatible) {
-			compatible = is_type_compatible(assignee_type, op_type, true);
+			compatible = is_type_compatible(assignee_type, op_type, true, p_assignment->assigned_value);
 			if (!compatible) {
 				if (assignee_type.is_hard_type()) {
 					// Try reverse test since it can be a masked subtype.
-					if (!is_type_compatible(op_type, assignee_type, true)) {
+					if (!is_type_compatible(op_type, assignee_type, true, p_assignment->assigned_value)) {
 						push_error(vformat(R"(Cannot assign a value of type "%s" to a target of type "%s".)", assigned_value_type.to_string(), assignee_type.to_string()), p_assignment->assigned_value);
 					} else {
 						// TODO: Add warning.
@@ -2416,6 +2416,11 @@ void GDScriptAnalyzer::reduce_call(GDScriptParser::CallNode *p_call, bool p_is_a
 		}
 		validate_call_arg(par_types, default_arg_count, is_vararg, p_call);
 
+		if (base_type.kind == GDScriptParser::DataType::ENUM && base_type.is_meta_type) {
+			// Enum type is treated as a dictionary value for function calls.
+			base_type.is_meta_type = false;
+		}
+
 		if (is_self && parser->current_function != nullptr && parser->current_function->is_static && !is_static) {
 			push_error(vformat(R"*(Cannot call non-static function "%s()" from static function "%s()".)*", p_call->function_name, parser->current_function->identifier->name), p_call->callee);
 		} else if (!is_self && base_type.is_meta_type && !is_static) {
@@ -2474,17 +2479,24 @@ void GDScriptAnalyzer::reduce_cast(GDScriptParser::CastNode *p_cast) {
 	GDScriptParser::DataType cast_type = resolve_datatype(p_cast->cast_type);
 
 	if (!cast_type.is_set()) {
+		mark_node_unsafe(p_cast);
 		return;
 	}
 
-	cast_type.is_meta_type = false; // The casted value won't be a type name.
+	cast_type = type_from_metatype(cast_type); // The casted value won't be a type name.
 	p_cast->set_datatype(cast_type);
 
 	if (!cast_type.is_variant()) {
 		GDScriptParser::DataType op_type = p_cast->operand->get_datatype();
 		if (!op_type.is_variant()) {
 			bool valid = false;
-			if (op_type.kind == GDScriptParser::DataType::BUILTIN && cast_type.kind == GDScriptParser::DataType::BUILTIN) {
+			if (op_type.kind == GDScriptParser::DataType::ENUM && cast_type.kind == GDScriptParser::DataType::ENUM) {
+				// Enum types are compatible between each other, so it's a safe cast.
+				valid = true;
+			} else if (op_type.kind == GDScriptParser::DataType::BUILTIN && op_type.builtin_type == Variant::INT && cast_type.kind == GDScriptParser::DataType::ENUM) {
+				// Convertint int to enum is always valid.
+				valid = true;
+			} else if (op_type.kind == GDScriptParser::DataType::BUILTIN && cast_type.kind == GDScriptParser::DataType::BUILTIN) {
 				valid = Variant::can_convert(op_type.builtin_type, cast_type.builtin_type);
 			} else if (op_type.kind != GDScriptParser::DataType::BUILTIN && cast_type.kind != GDScriptParser::DataType::BUILTIN) {
 				valid = is_type_compatible(cast_type, op_type) || is_type_compatible(op_type, cast_type);
@@ -2586,6 +2598,34 @@ void GDScriptAnalyzer::reduce_identifier_from_base(GDScriptParser::IdentifierNod
 
 	const StringName &name = p_identifier->name;
 
+	if (base.kind == GDScriptParser::DataType::ENUM) {
+		if (base.is_meta_type) {
+			if (base.enum_values.has(name)) {
+				p_identifier->is_constant = true;
+				p_identifier->reduced_value = base.enum_values[name];
+
+				GDScriptParser::DataType result;
+				result.type_source = GDScriptParser::DataType::ANNOTATED_EXPLICIT;
+				result.kind = GDScriptParser::DataType::ENUM;
+				result.is_constant = true;
+				result.builtin_type = Variant::INT;
+				result.native_type = base.native_type;
+				result.enum_type = base.enum_type;
+				p_identifier->set_datatype(result);
+				return;
+			} else {
+				// Consider as a Dictionary, so it can be anything.
+				// This will be evaluated in the next if block.
+				base.kind = GDScriptParser::DataType::BUILTIN;
+				base.builtin_type = Variant::DICTIONARY;
+				base.is_meta_type = false;
+			}
+		} else {
+			push_error(R"(Cannot get property from enum value.)", p_identifier);
+			return;
+		}
+	}
+
 	if (base.kind == GDScriptParser::DataType::BUILTIN) {
 		if (base.is_meta_type) {
 			bool valid = true;
@@ -2632,32 +2672,6 @@ void GDScriptAnalyzer::reduce_identifier_from_base(GDScriptParser::IdentifierNod
 		return;
 	}
 
-	if (base.kind == GDScriptParser::DataType::ENUM) {
-		if (base.is_meta_type) {
-			if (base.enum_values.has(name)) {
-				p_identifier->is_constant = true;
-				p_identifier->reduced_value = base.enum_values[name];
-
-				GDScriptParser::DataType result;
-				result.type_source = GDScriptParser::DataType::ANNOTATED_EXPLICIT;
-				result.kind = GDScriptParser::DataType::ENUM_VALUE;
-				result.is_constant = true;
-				result.builtin_type = Variant::INT;
-				result.native_type = base.native_type;
-				result.enum_type = name;
-				p_identifier->set_datatype(result);
-			} else {
-				// Consider as a Dictionary
-				GDScriptParser::DataType dummy;
-				dummy.kind = GDScriptParser::DataType::VARIANT;
-				p_identifier->set_datatype(dummy);
-			}
-		} else {
-			push_error(R"(Cannot get property from enum value.)", p_identifier);
-		}
-		return;
-	}
-
 	GDScriptParser::ClassNode *base_class = base.class_type;
 
 	// TODO: Switch current class/function/suite here to avoid misrepresenting identifiers (in recursive reduce calls).
@@ -2793,7 +2807,7 @@ void GDScriptAnalyzer::reduce_identifier(GDScriptParser::IdentifierNode *p_ident
 			if (element.identifier->name == p_identifier->name) {
 				GDScriptParser::DataType type;
 				type.type_source = GDScriptParser::DataType::ANNOTATED_EXPLICIT;
-				type.kind = element.parent_enum->identifier ? GDScriptParser::DataType::ENUM_VALUE : GDScriptParser::DataType::BUILTIN;
+				type.kind = element.parent_enum->identifier ? GDScriptParser::DataType::ENUM : GDScriptParser::DataType::BUILTIN;
 				type.builtin_type = Variant::INT;
 				type.is_constant = true;
 				if (element.parent_enum->identifier) {
@@ -3493,6 +3507,9 @@ GDScriptParser::DataType GDScriptAnalyzer::type_from_metatype(const GDScriptPars
 	GDScriptParser::DataType result = p_meta_type;
 	result.is_meta_type = false;
 	result.is_constant = false;
+	if (p_meta_type.kind == GDScriptParser::DataType::ENUM) {
+		result.builtin_type = Variant::INT;
+	}
 	return result;
 }
 
@@ -3549,6 +3566,18 @@ bool GDScriptAnalyzer::get_function_signature(GDScriptParser::CallNode *p_source
 	r_default_arg_count = 0;
 	StringName function_name = p_function;
 
+	if (p_base_type.kind == GDScriptParser::DataType::ENUM) {
+		if (p_base_type.is_meta_type) {
+			// Enum type can be treated as a dictionary value.
+			p_base_type.kind = GDScriptParser::DataType::BUILTIN;
+			p_base_type.builtin_type = Variant::DICTIONARY;
+			p_base_type.is_meta_type = false;
+		} else {
+			push_error("Cannot call function on enum value.", p_source);
+			return false;
+		}
+	}
+
 	if (p_base_type.kind == GDScriptParser::DataType::BUILTIN) {
 		// Construct a base type to get methods.
 		Callable::CallError err;
@@ -3799,6 +3828,22 @@ GDScriptParser::DataType GDScriptAnalyzer::get_operation_type(Variant::Operator
 
 	Variant::Type a_type = p_a.builtin_type;
 	Variant::Type b_type = p_b.builtin_type;
+
+	if (p_a.kind == GDScriptParser::DataType::ENUM) {
+		if (p_a.is_meta_type) {
+			a_type = Variant::DICTIONARY;
+		} else {
+			a_type = Variant::INT;
+		}
+	}
+	if (p_b.kind == GDScriptParser::DataType::ENUM) {
+		if (p_b.is_meta_type) {
+			b_type = Variant::DICTIONARY;
+		} else {
+			b_type = Variant::INT;
+		}
+	}
+
 	Variant::ValidatedOperatorEvaluator op_eval = Variant::get_validated_operator_evaluator(p_operation, a_type, b_type);
 
 	bool hard_operation = p_a.is_hard_type() && p_b.is_hard_type();
@@ -3828,7 +3873,7 @@ GDScriptParser::DataType GDScriptAnalyzer::get_operation_type(Variant::Operator
 }
 
 // TODO: Add safe/unsafe return variable (for variant cases)
-bool GDScriptAnalyzer::is_type_compatible(const GDScriptParser::DataType &p_target, const GDScriptParser::DataType &p_source, bool p_allow_implicit_conversion) const {
+bool GDScriptAnalyzer::is_type_compatible(const GDScriptParser::DataType &p_target, const GDScriptParser::DataType &p_source, bool p_allow_implicit_conversion, const GDScriptParser::Node *p_source_node) {
 	// These return "true" so it doesn't affect users negatively.
 	ERR_FAIL_COND_V_MSG(!p_target.is_set(), true, "Parser bug (please report): Trying to check compatibility of unset target type");
 	ERR_FAIL_COND_V_MSG(!p_source.is_set(), true, "Parser bug (please report): Trying to check compatibility of unset value type");
@@ -3848,7 +3893,7 @@ bool GDScriptAnalyzer::is_type_compatible(const GDScriptParser::DataType &p_targ
 		if (!valid && p_allow_implicit_conversion) {
 			valid = Variant::can_convert_strict(p_source.builtin_type, p_target.builtin_type);
 		}
-		if (!valid && p_target.builtin_type == Variant::INT && p_source.kind == GDScriptParser::DataType::ENUM_VALUE) {
+		if (!valid && p_target.builtin_type == Variant::INT && p_source.kind == GDScriptParser::DataType::ENUM && !p_source.is_meta_type) {
 			// Enum value is also integer.
 			valid = true;
 		}
@@ -3869,6 +3914,11 @@ bool GDScriptAnalyzer::is_type_compatible(const GDScriptParser::DataType &p_targ
 
 	if (p_target.kind == GDScriptParser::DataType::ENUM) {
 		if (p_source.kind == GDScriptParser::DataType::BUILTIN && p_source.builtin_type == Variant::INT) {
+#ifdef DEBUG_ENABLED
+			if (p_source_node) {
+				parser->push_warning(p_source_node, GDScriptWarning::INT_ASSIGNED_TO_ENUM);
+			}
+#endif
 			return true;
 		}
 		if (p_source.kind == GDScriptParser::DataType::ENUM) {
@@ -3876,11 +3926,6 @@ bool GDScriptAnalyzer::is_type_compatible(const GDScriptParser::DataType &p_targ
 				return true;
 			}
 		}
-		if (p_source.kind == GDScriptParser::DataType::ENUM_VALUE) {
-			if (p_source.native_type == p_target.native_type && p_target.enum_values.has(p_source.enum_type)) {
-				return true;
-			}
-		}
 		return false;
 	}
 
@@ -3935,7 +3980,6 @@ bool GDScriptAnalyzer::is_type_compatible(const GDScriptParser::DataType &p_targ
 		case GDScriptParser::DataType::VARIANT:
 		case GDScriptParser::DataType::BUILTIN:
 		case GDScriptParser::DataType::ENUM:
-		case GDScriptParser::DataType::ENUM_VALUE:
 		case GDScriptParser::DataType::UNRESOLVED:
 			break; // Already solved before.
 	}
@@ -3972,7 +4016,6 @@ bool GDScriptAnalyzer::is_type_compatible(const GDScriptParser::DataType &p_targ
 		case GDScriptParser::DataType::VARIANT:
 		case GDScriptParser::DataType::BUILTIN:
 		case GDScriptParser::DataType::ENUM:
-		case GDScriptParser::DataType::ENUM_VALUE:
 		case GDScriptParser::DataType::UNRESOLVED:
 			break; // Already solved before.
 	}
diff --git a/modules/gdscript/gdscript_analyzer.h b/modules/gdscript/gdscript_analyzer.h
index 4cee5cb44a..2697a6ec2b 100644
--- a/modules/gdscript/gdscript_analyzer.h
+++ b/modules/gdscript/gdscript_analyzer.h
@@ -112,7 +112,7 @@ class GDScriptAnalyzer {
 	GDScriptParser::DataType get_operation_type(Variant::Operator p_operation, const GDScriptParser::DataType &p_a, const GDScriptParser::DataType &p_b, bool &r_valid, const GDScriptParser::Node *p_source);
 	GDScriptParser::DataType get_operation_type(Variant::Operator p_operation, const GDScriptParser::DataType &p_a, bool &r_valid, const GDScriptParser::Node *p_source);
 	void update_array_literal_element_type(const GDScriptParser::DataType &p_base_type, GDScriptParser::ArrayNode *p_array_literal);
-	bool is_type_compatible(const GDScriptParser::DataType &p_target, const GDScriptParser::DataType &p_source, bool p_allow_implicit_conversion = false) const;
+	bool is_type_compatible(const GDScriptParser::DataType &p_target, const GDScriptParser::DataType &p_source, bool p_allow_implicit_conversion = false, const GDScriptParser::Node *p_source_node = nullptr);
 	void push_error(const String &p_message, const GDScriptParser::Node *p_origin);
 	void mark_node_unsafe(const GDScriptParser::Node *p_node);
 	bool class_exists(const StringName &p_class) const;
diff --git a/modules/gdscript/gdscript_compiler.cpp b/modules/gdscript/gdscript_compiler.cpp
index ca125d3a07..108c988add 100644
--- a/modules/gdscript/gdscript_compiler.cpp
+++ b/modules/gdscript/gdscript_compiler.cpp
@@ -141,10 +141,13 @@ GDScriptDataType GDScriptCompiler::_gdtype_from_datatype(const GDScriptParser::D
 			}
 		} break;
 		case GDScriptParser::DataType::ENUM:
-		case GDScriptParser::DataType::ENUM_VALUE:
 			result.has_type = true;
 			result.kind = GDScriptDataType::BUILTIN;
-			result.builtin_type = Variant::INT;
+			if (p_datatype.is_meta_type) {
+				result.builtin_type = Variant::DICTIONARY;
+			} else {
+				result.builtin_type = Variant::INT;
+			}
 			break;
 		case GDScriptParser::DataType::UNRESOLVED: {
 			ERR_PRINT("Parser bug: converting unresolved type.");
@@ -469,7 +472,14 @@ GDScriptCodeGenerator::Address GDScriptCompiler::_parse_expression(CodeGen &code
 		} break;
 		case GDScriptParser::Node::CAST: {
 			const GDScriptParser::CastNode *cn = static_cast<const GDScriptParser::CastNode *>(p_expression);
-			GDScriptDataType cast_type = _gdtype_from_datatype(cn->cast_type->get_datatype());
+			GDScriptParser::DataType og_cast_type = cn->cast_type->get_datatype();
+			GDScriptDataType cast_type = _gdtype_from_datatype(og_cast_type);
+
+			if (og_cast_type.kind == GDScriptParser::DataType::ENUM) {
+				// Enum types are usually treated as dictionaries, but in this case we want to cast to an integer.
+				cast_type.kind = GDScriptDataType::BUILTIN;
+				cast_type.builtin_type = Variant::INT;
+			}
 
 			// Create temporary for result first since it will be deleted last.
 			GDScriptCodeGenerator::Address result = codegen.add_temporary(cast_type);
diff --git a/modules/gdscript/gdscript_editor.cpp b/modules/gdscript/gdscript_editor.cpp
index e04f6063ec..33a88dd2dd 100644
--- a/modules/gdscript/gdscript_editor.cpp
+++ b/modules/gdscript/gdscript_editor.cpp
@@ -610,7 +610,7 @@ static String _make_arguments_hint(const GDScriptParser::FunctionNode *p_functio
 				case GDScriptParser::Node::SUBSCRIPT: {
 					const GDScriptParser::SubscriptNode *sub = static_cast<const GDScriptParser::SubscriptNode *>(par->default_value);
 					if (sub->is_constant) {
-						if (sub->datatype.kind == GDScriptParser::DataType::ENUM_VALUE) {
+						if (sub->datatype.kind == GDScriptParser::DataType::ENUM) {
 							def_val = sub->get_datatype().to_string();
 						} else if (sub->reduced) {
 							const Variant::Type vt = sub->reduced_value.get_type();
diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp
index 460bd85a86..cfad832a6c 100644
--- a/modules/gdscript/gdscript_parser.cpp
+++ b/modules/gdscript/gdscript_parser.cpp
@@ -3740,8 +3740,6 @@ String GDScriptParser::DataType::to_string() const {
 		}
 		case ENUM:
 			return enum_type.operator String() + " (enum)";
-		case ENUM_VALUE:
-			return enum_type.operator String() + " (enum value)";
 		case UNRESOLVED:
 			return "<unresolved type>";
 	}
diff --git a/modules/gdscript/gdscript_parser.h b/modules/gdscript/gdscript_parser.h
index e4311d2d5e..c09b07282f 100644
--- a/modules/gdscript/gdscript_parser.h
+++ b/modules/gdscript/gdscript_parser.h
@@ -106,8 +106,7 @@ public:
 			NATIVE,
 			SCRIPT,
 			CLASS, // GDScript.
-			ENUM, // Full enumeration.
-			ENUM_VALUE, // Value from enumeration.
+			ENUM, // Enumeration.
 			VARIANT, // Can be any type.
 			UNRESOLVED,
 		};
@@ -185,8 +184,6 @@ public:
 					return builtin_type == p_other.builtin_type;
 				case NATIVE:
 				case ENUM:
-					return native_type == p_other.native_type;
-				case ENUM_VALUE:
 					return native_type == p_other.native_type && enum_type == p_other.enum_type;
 				case SCRIPT:
 					return script_type == p_other.script_type;
diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp
index 9977b88aa1..d3287ab345 100644
--- a/modules/gdscript/gdscript_tokenizer.cpp
+++ b/modules/gdscript/gdscript_tokenizer.cpp
@@ -312,22 +312,6 @@ GDScriptTokenizer::Token GDScriptTokenizer::pop_error() {
 	return error;
 }
 
-static bool _is_alphanumeric(char32_t c) {
-	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
-}
-
-static bool _is_digit(char32_t c) {
-	return (c >= '0' && c <= '9');
-}
-
-static bool _is_hex_digit(char32_t c) {
-	return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
-}
-
-static bool _is_binary_digit(char32_t c) {
-	return (c == '0' || c == '1');
-}
-
 GDScriptTokenizer::Token GDScriptTokenizer::make_token(Token::Type p_type) {
 	Token token(p_type);
 	token.start_line = start_line;
@@ -448,10 +432,10 @@ GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(char32_t p_test, To
 }
 
 GDScriptTokenizer::Token GDScriptTokenizer::annotation() {
-	if (!_is_alphanumeric(_peek())) {
+	if (!is_ascii_identifier_char(_peek())) {
 		push_error("Expected annotation identifier after \"@\".");
 	}
-	while (_is_alphanumeric(_peek())) {
+	while (is_ascii_identifier_char(_peek())) {
 		// Consume all identifier characters.
 		_advance();
 	}
@@ -526,7 +510,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
 #define MAX_KEYWORD_LENGTH 10
 
 	// Consume all alphanumeric characters.
-	while (_is_alphanumeric(_peek())) {
+	while (is_ascii_identifier_char(_peek())) {
 		_advance();
 	}
 
@@ -612,7 +596,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() {
 	bool has_decimal = false;
 	bool has_exponent = false;
 	bool has_error = false;
-	bool (*digit_check_func)(char32_t) = _is_digit;
+	bool (*digit_check_func)(char32_t) = is_digit;
 
 	if (_peek(-1) == '.') {
 		has_decimal = true;
@@ -620,20 +604,20 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() {
 		if (_peek() == 'x') {
 			// Hexadecimal.
 			base = 16;
-			digit_check_func = _is_hex_digit;
+			digit_check_func = is_hex_digit;
 			_advance();
 		} else if (_peek() == 'b') {
 			// Binary.
 			base = 2;
-			digit_check_func = _is_binary_digit;
+			digit_check_func = is_binary_digit;
 			_advance();
 		}
 	}
 
 	// Allow '_' to be used in a number, for readability.
 	bool previous_was_underscore = false;
-	while (digit_check_func(_peek()) || _peek() == '_') {
-		if (_peek() == '_') {
+	while (digit_check_func(_peek()) || is_underscore(_peek())) {
+		if (is_underscore(_peek())) {
 			if (previous_was_underscore) {
 				Token error = make_error(R"(Only one underscore can be used as a numeric separator.)");
 				error.start_column = column;
@@ -682,7 +666,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() {
 			_advance();
 
 			// Consume decimal digits.
-			while (_is_digit(_peek()) || _peek() == '_') {
+			while (is_digit(_peek()) || is_underscore(_peek())) {
 				_advance();
 			}
 		}
@@ -696,7 +680,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() {
 				_advance();
 			}
 			// Consume exponent digits.
-			if (!_is_digit(_peek())) {
+			if (!is_digit(_peek())) {
 				Token error = make_error(R"(Expected exponent value after "e".)");
 				error.start_column = column;
 				error.leftmost_column = column;
@@ -705,8 +689,8 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() {
 				push_error(error);
 			}
 			previous_was_underscore = false;
-			while (_is_digit(_peek()) || _peek() == '_') {
-				if (_peek() == '_') {
+			while (is_digit(_peek()) || is_underscore(_peek())) {
+				if (is_underscore(_peek())) {
 					if (previous_was_underscore) {
 						Token error = make_error(R"(Only one underscore can be used as a numeric separator.)");
 						error.start_column = column;
@@ -733,7 +717,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() {
 		error.rightmost_column = column + 1;
 		push_error(error);
 		has_error = true;
-	} else if (_is_alphanumeric(_peek())) {
+	} else if (is_ascii_identifier_char(_peek())) {
 		// Letter at the end of the number.
 		push_error("Invalid numeric notation.");
 	}
@@ -865,7 +849,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
 
 						char32_t digit = _peek();
 						char32_t value = 0;
-						if (digit >= '0' && digit <= '9') {
+						if (is_digit(digit)) {
 							value = digit - '0';
 						} else if (digit >= 'a' && digit <= 'f') {
 							value = digit - 'a';
@@ -1322,9 +1306,9 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
 
 	line_continuation = false;
 
-	if (_is_digit(c)) {
+	if (is_digit(c)) {
 		return number();
-	} else if (_is_alphanumeric(c)) {
+	} else if (is_ascii_identifier_char(c)) {
 		return potential_identifier();
 	}
 
@@ -1392,7 +1376,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
 			if (_peek() == '.') {
 				_advance();
 				return make_token(Token::PERIOD_PERIOD);
-			} else if (_is_digit(_peek())) {
+			} else if (is_digit(_peek())) {
 				// Number starting with '.'.
 				return number();
 			} else {
diff --git a/modules/gdscript/gdscript_warning.cpp b/modules/gdscript/gdscript_warning.cpp
index 73536f5f8e..ad96e36640 100644
--- a/modules/gdscript/gdscript_warning.cpp
+++ b/modules/gdscript/gdscript_warning.cpp
@@ -152,6 +152,9 @@ String GDScriptWarning::get_message() const {
 			CHECK_SYMBOLS(3);
 			return vformat(R"(The %s '%s' has the same name as a %s.)", symbols[0], symbols[1], symbols[2]);
 		}
+		case INT_ASSIGNED_TO_ENUM: {
+			return "Integer used when an enum value is expected. If this is intended cast the integer to the enum type.";
+		}
 		case WARNING_MAX:
 			break; // Can't happen, but silences warning
 	}
@@ -199,6 +202,7 @@ String GDScriptWarning::get_name_from_code(Code p_code) {
 		"REDUNDANT_AWAIT",
 		"EMPTY_FILE",
 		"SHADOWED_GLOBAL_IDENTIFIER",
+		"INT_ASSIGNED_TO_ENUM",
 	};
 
 	static_assert((sizeof(names) / sizeof(*names)) == WARNING_MAX, "Amount of warning types don't match the amount of warning names.");
diff --git a/modules/gdscript/gdscript_warning.h b/modules/gdscript/gdscript_warning.h
index 112b40781a..82efe3568f 100644
--- a/modules/gdscript/gdscript_warning.h
+++ b/modules/gdscript/gdscript_warning.h
@@ -70,6 +70,7 @@ public:
 		REDUNDANT_AWAIT, // await is used but expression is synchronous (not a signal nor a coroutine).
 		EMPTY_FILE, // A script file is empty.
 		SHADOWED_GLOBAL_IDENTIFIER, // A global class or function has the same name as variable.
+		INT_ASSIGNED_TO_ENUM, // An integer value was assigned to an enum-typed variable without casting.
 		WARNING_MAX,
 	};
 
diff --git a/modules/gdscript/language_server/gdscript_extend_parser.cpp b/modules/gdscript/language_server/gdscript_extend_parser.cpp
index 49f5303ae6..17886181d5 100644
--- a/modules/gdscript/language_server/gdscript_extend_parser.cpp
+++ b/modules/gdscript/language_server/gdscript_extend_parser.cpp
@@ -541,7 +541,7 @@ String ExtendGDScriptParser::get_identifier_under_position(const lsp::Position &
 	for (int c = p_position.character; c >= 0; c--) {
 		start_pos = c;
 		char32_t ch = line[c];
-		bool valid_char = (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_';
+		bool valid_char = is_ascii_identifier_char(ch);
 		if (!valid_char) {
 			break;
 		}
@@ -550,7 +550,7 @@ String ExtendGDScriptParser::get_identifier_under_position(const lsp::Position &
 	int end_pos = p_position.character;
 	for (int c = p_position.character; c < line.length(); c++) {
 		char32_t ch = line[c];
-		bool valid_char = (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_';
+		bool valid_char = is_ascii_identifier_char(ch);
 		if (!valid_char) {
 			break;
 		}
diff --git a/modules/gdscript/tests/gdscript_test_runner.cpp b/modules/gdscript/tests/gdscript_test_runner.cpp
index 47772b8039..73a424dae4 100644
--- a/modules/gdscript/tests/gdscript_test_runner.cpp
+++ b/modules/gdscript/tests/gdscript_test_runner.cpp
@@ -134,12 +134,14 @@ GDScriptTestRunner::GDScriptTestRunner(const String &p_source_dir, bool p_init_l
 	if (do_init_languages) {
 		init_language(p_source_dir);
 	}
+#ifdef DEBUG_ENABLED
 	// Enable all warnings for GDScript, so we can test them.
 	ProjectSettings::get_singleton()->set_setting("debug/gdscript/warnings/enable", true);
 	for (int i = 0; i < (int)GDScriptWarning::WARNING_MAX; i++) {
 		String warning = GDScriptWarning::get_name_from_code((GDScriptWarning::Code)i).to_lower();
 		ProjectSettings::get_singleton()->set_setting("debug/gdscript/warnings/" + warning, true);
 	}
+#endif
 
 	// Enable printing to show results
 	_print_line_enabled = true;
@@ -153,6 +155,21 @@ GDScriptTestRunner::~GDScriptTestRunner() {
 	}
 }
 
+#ifndef DEBUG_ENABLED
+static String strip_warnings(const String &p_expected) {
+	// On release builds we don't have warnings. Here we remove them from the output before comparison
+	// so it doesn't fail just because of difference in warnings.
+	String expected_no_warnings;
+	for (String line : p_expected.split("\n")) {
+		if (line.begins_with(">> ")) {
+			continue;
+		}
+		expected_no_warnings += line + "\n";
+	}
+	return expected_no_warnings.strip_edges() + "\n";
+}
+#endif
+
 int GDScriptTestRunner::run_tests() {
 	if (!make_tests()) {
 		FAIL("An error occurred while making the tests.");
@@ -170,6 +187,9 @@ int GDScriptTestRunner::run_tests() {
 		GDScriptTest::TestResult result = test.run_test();
 
 		String expected = FileAccess::get_file_as_string(test.get_output_file());
+#ifndef DEBUG_ENABLED
+		expected = strip_warnings(expected);
+#endif
 		INFO(test.get_source_file());
 		if (!result.passed) {
 			INFO(expected);
@@ -233,6 +253,22 @@ bool GDScriptTestRunner::make_tests_for_dir(const String &p_dir) {
 			}
 		} else {
 			if (next.get_extension().to_lower() == "gd") {
+#ifndef DEBUG_ENABLED
+				// On release builds, skip tests marked as debug only.
+				Error open_err = OK;
+				FileAccessRef script_file(FileAccess::open(current_dir.plus_file(next), FileAccess::READ, &open_err));
+				if (open_err != OK) {
+					ERR_PRINT(vformat(R"(Couldn't open test file "%s".)", next));
+					next = dir->get_next();
+					continue;
+				} else {
+					if (script_file->get_line() == "#debug-only") {
+						next = dir->get_next();
+						continue;
+					}
+				}
+#endif
+
 				String out_file = next.get_basename() + ".out";
 				if (!is_generating && !dir->file_exists(out_file)) {
 					ERR_FAIL_V_MSG(false, "Could not find output file for " + next);
@@ -387,6 +423,10 @@ bool GDScriptTest::check_output(const String &p_output) const {
 	String got = p_output.strip_edges(); // TODO: may be hacky.
 	got += "\n"; // Make sure to insert newline for CI static checks.
 
+#ifndef DEBUG_ENABLED
+	expected = strip_warnings(expected);
+#endif
+
 	return got == expected;
 }
 
@@ -469,6 +509,7 @@ GDScriptTest::TestResult GDScriptTest::execute_test_code(bool p_is_generating) {
 		return result;
 	}
 
+#ifdef DEBUG_ENABLED
 	StringBuilder warning_string;
 	for (const GDScriptWarning &E : parser.get_warnings()) {
 		const GDScriptWarning warning = E;
@@ -482,6 +523,7 @@ GDScriptTest::TestResult GDScriptTest::execute_test_code(bool p_is_generating) {
 		warning_string.append("\n");
 	}
 	result.output += warning_string.as_string();
+#endif
 
 	// Test compiling.
 	GDScriptCompiler compiler;
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_assign_with_wrong_enum_type.gd b/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_assign_with_wrong_enum_type.gd
new file mode 100644
index 0000000000..928c886650
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_assign_with_wrong_enum_type.gd
@@ -0,0 +1,10 @@
+enum MyEnum { ENUM_VALUE_1, ENUM_VALUE_2 }
+enum MyOtherEnum { OTHER_ENUM_VALUE_1, OTHER_ENUM_VALUE_2 }
+
+# Different enum types can't be assigned without casting.
+var class_var: MyEnum = MyEnum.ENUM_VALUE_1
+
+func test():
+	print(class_var)
+	class_var = MyOtherEnum.OTHER_ENUM_VALUE_2
+	print(class_var)
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_assign_with_wrong_enum_type.out b/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_assign_with_wrong_enum_type.out
new file mode 100644
index 0000000000..fde7e92f8c
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_assign_with_wrong_enum_type.out
@@ -0,0 +1,2 @@
+GDTEST_ANALYZER_ERROR
+Cannot assign a value of type "MyOtherEnum (enum)" to a target of type "MyEnum (enum)".
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_init_with_wrong_enum_type.gd b/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_init_with_wrong_enum_type.gd
new file mode 100644
index 0000000000..03a1711d7b
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_init_with_wrong_enum_type.gd
@@ -0,0 +1,8 @@
+enum MyEnum { ENUM_VALUE_1, ENUM_VALUE_2 }
+enum MyOtherEnum { OTHER_ENUM_VALUE_1, OTHER_ENUM_VALUE_2 }
+
+# Different enum types can't be assigned without casting.
+var class_var: MyEnum = MyOtherEnum.OTHER_ENUM_VALUE_1
+
+func test():
+	print(class_var)
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_init_with_wrong_enum_type.out b/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_init_with_wrong_enum_type.out
new file mode 100644
index 0000000000..b1710c798d
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/enum_class_var_init_with_wrong_enum_type.out
@@ -0,0 +1,2 @@
+GDTEST_ANALYZER_ERROR
+Value of type "MyOtherEnum (enum)" cannot be assigned to a variable of type "MyEnum (enum)".
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_assign_with_wrong_enum_type.gd b/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_assign_with_wrong_enum_type.gd
new file mode 100644
index 0000000000..d08d3dd7b2
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_assign_with_wrong_enum_type.gd
@@ -0,0 +1,8 @@
+enum MyEnum { ENUM_VALUE_1, ENUM_VALUE_2 }
+enum MyOtherEnum { OTHER_ENUM_VALUE_1, OTHER_ENUM_VALUE_2 }
+
+func test():
+	var local_var: MyEnum = MyEnum.ENUM_VALUE_1
+	print(local_var)
+	local_var = MyOtherEnum.OTHER_ENUM_VALUE_2
+	print(local_var)
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_assign_with_wrong_enum_type.out b/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_assign_with_wrong_enum_type.out
new file mode 100644
index 0000000000..fde7e92f8c
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_assign_with_wrong_enum_type.out
@@ -0,0 +1,2 @@
+GDTEST_ANALYZER_ERROR
+Cannot assign a value of type "MyOtherEnum (enum)" to a target of type "MyEnum (enum)".
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_init_with_wrong_enum_type.gd b/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_init_with_wrong_enum_type.gd
new file mode 100644
index 0000000000..ca6d892218
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_init_with_wrong_enum_type.gd
@@ -0,0 +1,6 @@
+enum MyEnum { ENUM_VALUE_1, ENUM_VALUE_2 }
+enum MyOtherEnum { OTHER_ENUM_VALUE_1, OTHER_ENUM_VALUE_2 }
+
+func test():
+	var local_var: MyEnum = MyOtherEnum.OTHER_ENUM_VALUE_1
+	print(local_var)
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_init_with_wrong_enum_type.out b/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_init_with_wrong_enum_type.out
new file mode 100644
index 0000000000..b1710c798d
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/enum_local_var_init_with_wrong_enum_type.out
@@ -0,0 +1,2 @@
+GDTEST_ANALYZER_ERROR
+Value of type "MyOtherEnum (enum)" cannot be assigned to a variable of type "MyEnum (enum)".
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_assign_enum_to_int_typed_var.gd b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_enum_to_int_typed_var.gd
new file mode 100644
index 0000000000..edb785c8b6
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_enum_to_int_typed_var.gd
@@ -0,0 +1,13 @@
+enum MyEnum { ENUM_VALUE_1, ENUM_VALUE_2 }
+
+var class_var: int = MyEnum.ENUM_VALUE_1
+
+func test():
+	print(class_var)
+	class_var = MyEnum.ENUM_VALUE_2
+	print(class_var)
+
+	var local_var: int = MyEnum.ENUM_VALUE_1
+	print(local_var)
+	local_var = MyEnum.ENUM_VALUE_2
+	print(local_var)
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_assign_enum_to_int_typed_var.out b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_enum_to_int_typed_var.out
new file mode 100644
index 0000000000..5f53802c33
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_enum_to_int_typed_var.out
@@ -0,0 +1,5 @@
+GDTEST_OK
+0
+1
+0
+1
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_assign_int_cast_to_same_enum.gd b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_int_cast_to_same_enum.gd
new file mode 100644
index 0000000000..726e4fd413
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_int_cast_to_same_enum.gd
@@ -0,0 +1,13 @@
+enum MyEnum { ENUM_VALUE_1, ENUM_VALUE_2 }
+
+var class_var: MyEnum = 0 as MyEnum
+
+func test():
+	print(class_var)
+	class_var = 1 as MyEnum
+	print(class_var)
+
+	var local_var: MyEnum = 0 as MyEnum
+	print(local_var)
+	local_var = 1 as MyEnum
+	print(local_var)
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_assign_int_cast_to_same_enum.out b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_int_cast_to_same_enum.out
new file mode 100644
index 0000000000..5f53802c33
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_int_cast_to_same_enum.out
@@ -0,0 +1,5 @@
+GDTEST_OK
+0
+1
+0
+1
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_assign_other_enum_cast_to_same_enum.gd b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_other_enum_cast_to_same_enum.gd
new file mode 100644
index 0000000000..798912c987
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_other_enum_cast_to_same_enum.gd
@@ -0,0 +1,14 @@
+enum MyEnum { ENUM_VALUE_1, ENUM_VALUE_2 }
+enum MyOtherEnum { OTHER_ENUM_VALUE_1, OTHER_ENUM_VALUE_2 }
+
+var class_var: MyEnum = MyOtherEnum.OTHER_ENUM_VALUE_1 as MyEnum
+
+func test():
+	print(class_var)
+	class_var = MyOtherEnum.OTHER_ENUM_VALUE_2 as MyEnum
+	print(class_var)
+
+	var local_var: MyEnum = MyOtherEnum.OTHER_ENUM_VALUE_1 as MyEnum
+	print(local_var)
+	local_var = MyOtherEnum.OTHER_ENUM_VALUE_2 as MyEnum
+	print(local_var)
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_assign_other_enum_cast_to_same_enum.out b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_other_enum_cast_to_same_enum.out
new file mode 100644
index 0000000000..5f53802c33
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_other_enum_cast_to_same_enum.out
@@ -0,0 +1,5 @@
+GDTEST_OK
+0
+1
+0
+1
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_assign_same_enum.gd b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_same_enum.gd
new file mode 100644
index 0000000000..2bfb318c3c
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_same_enum.gd
@@ -0,0 +1,13 @@
+enum MyEnum { ENUM_VALUE_1, ENUM_VALUE_2 }
+
+var class_var: MyEnum = MyEnum.ENUM_VALUE_1
+
+func test():
+	print(class_var)
+	class_var = MyEnum.ENUM_VALUE_2
+	print(class_var)
+
+	var local_var: MyEnum = MyEnum.ENUM_VALUE_1
+	print(local_var)
+	local_var = MyEnum.ENUM_VALUE_2
+	print(local_var)
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_assign_same_enum.out b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_same_enum.out
new file mode 100644
index 0000000000..5f53802c33
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_assign_same_enum.out
@@ -0,0 +1,5 @@
+GDTEST_OK
+0
+1
+0
+1
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_is_treated_as_int.gd b/modules/gdscript/tests/scripts/analyzer/features/enum_is_treated_as_int.gd
new file mode 100644
index 0000000000..7022d14566
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_is_treated_as_int.gd
@@ -0,0 +1,21 @@
+# Enum is equivalent to int for comparisons and operations.
+enum MyEnum {
+	ZERO,
+	ONE,
+	TWO,
+}
+
+enum OtherEnum {
+	ZERO,
+	ONE,
+	TWO,
+}
+
+func test():
+	print(MyEnum.ZERO == OtherEnum.ZERO)
+	print(MyEnum.ZERO == 1)
+	print(MyEnum.ZERO != OtherEnum.ONE)
+	print(MyEnum.ZERO != 0)
+
+	print(MyEnum.ONE + OtherEnum.TWO)
+	print(2 - MyEnum.ONE)
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_is_treated_as_int.out b/modules/gdscript/tests/scripts/analyzer/features/enum_is_treated_as_int.out
new file mode 100644
index 0000000000..c8f34c11db
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_is_treated_as_int.out
@@ -0,0 +1,7 @@
+GDTEST_OK
+true
+false
+true
+false
+3
+1
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_type_is_treated_as_dictionary.gd b/modules/gdscript/tests/scripts/analyzer/features/enum_type_is_treated_as_dictionary.gd
new file mode 100644
index 0000000000..885d70408a
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_type_is_treated_as_dictionary.gd
@@ -0,0 +1,13 @@
+enum MyEnum {
+	ZERO,
+	ONE,
+	TWO,
+}
+
+func test():
+	for key in MyEnum.keys():
+		prints(key, MyEnum[key])
+
+	# https://github.com/godotengine/godot/issues/55491
+	for key in MyEnum:
+		prints(key, MyEnum[key])
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_type_is_treated_as_dictionary.out b/modules/gdscript/tests/scripts/analyzer/features/enum_type_is_treated_as_dictionary.out
new file mode 100644
index 0000000000..d29f53109c
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_type_is_treated_as_dictionary.out
@@ -0,0 +1,7 @@
+GDTEST_OK
+ZERO 0
+ONE 1
+TWO 2
+ZERO 0
+ONE 1
+TWO 2
diff --git a/modules/gdscript/tests/scripts/parser/warnings/enum_assign_int_without_casting.gd b/modules/gdscript/tests/scripts/parser/warnings/enum_assign_int_without_casting.gd
new file mode 100644
index 0000000000..2be1024214
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/warnings/enum_assign_int_without_casting.gd
@@ -0,0 +1,15 @@
+enum MyEnum { ENUM_VALUE_1, ENUM_VALUE_2 }
+
+# Assigning int value to enum-typed variable without explicit cast causes a warning.
+# While it is valid it may be a mistake in the assignment.
+var class_var: MyEnum = 0
+
+func test():
+	print(class_var)
+	class_var = 1
+	print(class_var)
+
+	var local_var: MyEnum = 0
+	print(local_var)
+	local_var = 1
+	print(local_var)
diff --git a/modules/gdscript/tests/scripts/parser/warnings/enum_assign_int_without_casting.out b/modules/gdscript/tests/scripts/parser/warnings/enum_assign_int_without_casting.out
new file mode 100644
index 0000000000..eef13bbff8
--- /dev/null
+++ b/modules/gdscript/tests/scripts/parser/warnings/enum_assign_int_without_casting.out
@@ -0,0 +1,21 @@
+GDTEST_OK
+>> WARNING
+>> Line: 5
+>> INT_ASSIGNED_TO_ENUM
+>> Integer used when an enum value is expected. If this is intended cast the integer to the enum type.
+>> WARNING
+>> Line: 9
+>> INT_ASSIGNED_TO_ENUM
+>> Integer used when an enum value is expected. If this is intended cast the integer to the enum type.
+>> WARNING
+>> Line: 12
+>> INT_ASSIGNED_TO_ENUM
+>> Integer used when an enum value is expected. If this is intended cast the integer to the enum type.
+>> WARNING
+>> Line: 14
+>> INT_ASSIGNED_TO_ENUM
+>> Integer used when an enum value is expected. If this is intended cast the integer to the enum type.
+0
+1
+0
+1
diff --git a/modules/gdscript/tests/scripts/runtime/errors/callable_call_after_free_object.gd b/modules/gdscript/tests/scripts/runtime/errors/callable_call_after_free_object.gd
index 10780b5379..7b3c112fe9 100644
--- a/modules/gdscript/tests/scripts/runtime/errors/callable_call_after_free_object.gd
+++ b/modules/gdscript/tests/scripts/runtime/errors/callable_call_after_free_object.gd
@@ -1,3 +1,4 @@
+#debug-only
 func test():
 	var node := Node.new()
 	var inside_tree = node.is_inside_tree
diff --git a/modules/gdscript/tests/scripts/runtime/errors/callable_call_after_free_object.out b/modules/gdscript/tests/scripts/runtime/errors/callable_call_after_free_object.out
index e585c374e2..fe48ade26b 100644
--- a/modules/gdscript/tests/scripts/runtime/errors/callable_call_after_free_object.out
+++ b/modules/gdscript/tests/scripts/runtime/errors/callable_call_after_free_object.out
@@ -2,5 +2,5 @@ GDTEST_RUNTIME_ERROR
 >> SCRIPT ERROR
 >> on function: test()
 >> runtime/errors/callable_call_after_free_object.gd
->> 5
+>> 6
 >> Attempt to call function 'null::is_inside_tree (Callable)' on a null instance.
diff --git a/modules/gltf/gltf_document.cpp b/modules/gltf/gltf_document.cpp
index e54e51eb8d..baa39a3b80 100644
--- a/modules/gltf/gltf_document.cpp
+++ b/modules/gltf/gltf_document.cpp
@@ -50,6 +50,7 @@
 #include "core/io/file_access.h"
 #include "core/io/file_access_memory.h"
 #include "core/io/json.h"
+#include "core/io/stream_peer.h"
 #include "core/math/disjoint_set.h"
 #include "core/math/vector2.h"
 #include "core/variant/dictionary.h"
diff --git a/modules/mono/editor/GodotTools/GodotTools.OpenVisualStudio/Program.cs b/modules/mono/editor/GodotTools/GodotTools.OpenVisualStudio/Program.cs
index f29b339c0e..7a4641dbbc 100644
--- a/modules/mono/editor/GodotTools/GodotTools.OpenVisualStudio/Program.cs
+++ b/modules/mono/editor/GodotTools/GodotTools.OpenVisualStudio/Program.cs
@@ -183,7 +183,7 @@ namespace GodotTools.OpenVisualStudio
                         continue;
 
                     // The digits after the colon are the process ID
-                    if (!Regex.IsMatch(ppszDisplayName, "!VisualStudio.DTE.16.0:[0-9]"))
+                    if (!Regex.IsMatch(ppszDisplayName, "!VisualStudio.DTE.1[6-7].0:[0-9]"))
                         continue;
 
                     if (pprot.GetObject(moniker[0], out object ppunkObject) == 0)
diff --git a/modules/mono/editor/bindings_generator.cpp b/modules/mono/editor/bindings_generator.cpp
index 1b4ab0ef4b..1de41821f9 100644
--- a/modules/mono/editor/bindings_generator.cpp
+++ b/modules/mono/editor/bindings_generator.cpp
@@ -690,11 +690,11 @@ void BindingsGenerator::_apply_prefix_to_enum_constants(BindingsGenerator::EnumI
 				continue;
 			}
 
-			if (parts[curr_prefix_length][0] >= '0' && parts[curr_prefix_length][0] <= '9') {
+			if (is_digit(parts[curr_prefix_length][0])) {
 				// The name of enum constants may begin with a numeric digit when strip from the enum prefix,
 				// so we make the prefix for this constant one word shorter in those cases.
 				for (curr_prefix_length = curr_prefix_length - 1; curr_prefix_length > 0; curr_prefix_length--) {
-					if (parts[curr_prefix_length][0] < '0' || parts[curr_prefix_length][0] > '9') {
+					if (!is_digit(parts[curr_prefix_length][0])) {
 						break;
 					}
 				}
diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp
index e50a5337cb..c7511f587e 100644
--- a/modules/text_server_adv/text_server_adv.cpp
+++ b/modules/text_server_adv/text_server_adv.cpp
@@ -303,22 +303,6 @@ _FORCE_INLINE_ bool is_connected_to_prev(char32_t p_chr, char32_t p_pchr) {
 	return (prop != U_JT_RIGHT_JOINING) && (prop != U_JT_NON_JOINING) ? !is_ligature(p_pchr, p_chr) : false;
 }
 
-_FORCE_INLINE_ bool is_control(char32_t p_char) {
-	return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009F);
-}
-
-_FORCE_INLINE_ bool is_whitespace(char32_t p_char) {
-	return (p_char == 0x0020) || (p_char == 0x00A0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085);
-}
-
-_FORCE_INLINE_ bool is_linebreak(char32_t p_char) {
-	return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029);
-}
-
-_FORCE_INLINE_ bool is_underscore(char32_t p_char) {
-	return (p_char == 0x005F);
-}
-
 /*************************************************************************/
 
 String TextServerAdvanced::interface_name = "ICU / HarfBuzz / Graphite";
diff --git a/modules/text_server_fb/text_server_fb.cpp b/modules/text_server_fb/text_server_fb.cpp
index dd520a2e40..182d2a02ad 100644
--- a/modules/text_server_fb/text_server_fb.cpp
+++ b/modules/text_server_fb/text_server_fb.cpp
@@ -44,30 +44,6 @@
 #endif
 
 /*************************************************************************/
-/*  Character properties.                                                */
-/*************************************************************************/
-
-_FORCE_INLINE_ bool is_control(char32_t p_char) {
-	return (p_char <= 0x001f) || (p_char >= 0x007f && p_char <= 0x009F);
-}
-
-_FORCE_INLINE_ bool is_whitespace(char32_t p_char) {
-	return (p_char == 0x0020) || (p_char == 0x00A0) || (p_char == 0x1680) || (p_char >= 0x2000 && p_char <= 0x200a) || (p_char == 0x202f) || (p_char == 0x205f) || (p_char == 0x3000) || (p_char == 0x2028) || (p_char == 0x2029) || (p_char >= 0x0009 && p_char <= 0x000d) || (p_char == 0x0085);
-}
-
-_FORCE_INLINE_ bool is_linebreak(char32_t p_char) {
-	return (p_char >= 0x000a && p_char <= 0x000d) || (p_char == 0x0085) || (p_char == 0x2028) || (p_char == 0x2029);
-}
-
-_FORCE_INLINE_ bool is_punct(char32_t p_char) {
-	return (p_char >= 0x0020 && p_char <= 0x002F) || (p_char >= 0x003A && p_char <= 0x0040) || (p_char >= 0x005B && p_char <= 0x005E) || (p_char == 0x0060) || (p_char >= 0x007B && p_char <= 0x007E) || (p_char >= 0x2000 && p_char <= 0x206F) || (p_char >= 0x3000 && p_char <= 0x303F);
-}
-
-_FORCE_INLINE_ bool is_underscore(char32_t p_char) {
-	return (p_char == 0x005F);
-}
-
-/*************************************************************************/
 
 String TextServerFallback::interface_name = "Fallback";
 uint32_t TextServerFallback::interface_features = 0; // Nothing is supported.
diff --git a/modules/visual_script/visual_script_expression.cpp b/modules/visual_script/visual_script_expression.cpp
index 17a3566ed2..e8942b9788 100644
--- a/modules/visual_script/visual_script_expression.cpp
+++ b/modules/visual_script/visual_script_expression.cpp
@@ -377,13 +377,13 @@ Error VisualScriptExpression::_get_token(Token &r_token) {
 										r_token.type = TK_ERROR;
 										return ERR_PARSE_ERROR;
 									}
-									if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
+									if (!is_hex_digit(c)) {
 										_set_error("Malformed hex constant in string");
 										r_token.type = TK_ERROR;
 										return ERR_PARSE_ERROR;
 									}
 									char32_t v;
-									if (c >= '0' && c <= '9') {
+									if (is_digit(c)) {
 										v = c - '0';
 									} else if (c >= 'a' && c <= 'f') {
 										v = c - 'a';
@@ -457,7 +457,7 @@ Error VisualScriptExpression::_get_token(Token &r_token) {
 					break;
 				}
 
-				if (cchar >= '0' && cchar <= '9') {
+				if (is_digit(cchar)) {
 					//a number
 
 					String num;
@@ -476,7 +476,7 @@ Error VisualScriptExpression::_get_token(Token &r_token) {
 					while (true) {
 						switch (reading) {
 							case READING_INT: {
-								if (c >= '0' && c <= '9') {
+								if (is_digit(c)) {
 									//pass
 								} else if (c == '.') {
 									reading = READING_DEC;
@@ -489,7 +489,7 @@ Error VisualScriptExpression::_get_token(Token &r_token) {
 
 							} break;
 							case READING_DEC: {
-								if (c >= '0' && c <= '9') {
+								if (is_digit(c)) {
 								} else if (c == 'e') {
 									reading = READING_EXP;
 
@@ -499,7 +499,7 @@ Error VisualScriptExpression::_get_token(Token &r_token) {
 
 							} break;
 							case READING_EXP: {
-								if (c >= '0' && c <= '9') {
+								if (is_digit(c)) {
 									exp_beg = true;
 
 								} else if ((c == '-' || c == '+') && !exp_sign && !exp_beg) {
@@ -532,11 +532,11 @@ Error VisualScriptExpression::_get_token(Token &r_token) {
 					}
 					return OK;
 
-				} else if ((cchar >= 'A' && cchar <= 'Z') || (cchar >= 'a' && cchar <= 'z') || cchar == '_') {
+				} else if (is_ascii_char(cchar) || cchar == '_') {
 					String id;
 					bool first = true;
 
-					while ((cchar >= 'A' && cchar <= 'Z') || (cchar >= 'a' && cchar <= 'z') || cchar == '_' || (!first && cchar >= '0' && cchar <= '9')) {
+					while (is_ascii_char(cchar) || cchar == '_' || (!first && is_digit(cchar))) {
 						id += String::chr(cchar);
 						cchar = GET_CHAR();
 						first = false;
diff --git a/platform/android/display_server_android.cpp b/platform/android/display_server_android.cpp
index 3d0dabc56e..b0f16337ed 100644
--- a/platform/android/display_server_android.cpp
+++ b/platform/android/display_server_android.cpp
@@ -161,6 +161,16 @@ int DisplayServerAndroid::screen_get_dpi(int p_screen) const {
 	return godot_io_java->get_screen_dpi();
 }
 
+float DisplayServerAndroid::screen_get_refresh_rate(int p_screen) const {
+	GodotIOJavaWrapper *godot_io_java = OS_Android::get_singleton()->get_godot_io_java();
+	if (!godot_io_java) {
+		ERR_PRINT("An error occured while trying to get the screen refresh rate.");
+		return SCREEN_REFRESH_RATE_FALLBACK;
+	}
+
+	return godot_io_java->get_screen_refresh_rate(SCREEN_REFRESH_RATE_FALLBACK);
+}
+
 bool DisplayServerAndroid::screen_is_touchscreen(int p_screen) const {
 	return true;
 }
diff --git a/platform/android/display_server_android.h b/platform/android/display_server_android.h
index e52e07bf1a..23077a6529 100644
--- a/platform/android/display_server_android.h
+++ b/platform/android/display_server_android.h
@@ -106,6 +106,7 @@ public:
 	virtual Size2i screen_get_size(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual Rect2i screen_get_usable_rect(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual int screen_get_dpi(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
+	virtual float screen_get_refresh_rate(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual bool screen_is_touchscreen(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 
 	virtual void virtual_keyboard_show(const String &p_existing_text, const Rect2 &p_screen_rect = Rect2(), bool p_multiline = false, int p_max_length = -1, int p_cursor_start = -1, int p_cursor_end = -1) override;
diff --git a/platform/android/export/export_plugin.cpp b/platform/android/export/export_plugin.cpp
index 61d2f897ef..df2d32e152 100644
--- a/platform/android/export/export_plugin.cpp
+++ b/platform/android/export/export_plugin.cpp
@@ -416,10 +416,10 @@ String EditorExportPlatformAndroid::get_package_name(const String &p_package) co
 	bool first = true;
 	for (int i = 0; i < basename.length(); i++) {
 		char32_t c = basename[i];
-		if (c >= '0' && c <= '9' && first) {
+		if (is_digit(c) && first) {
 			continue;
 		}
-		if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) {
+		if (is_ascii_alphanumeric_char(c)) {
 			name += String::chr(c);
 			first = false;
 		}
@@ -462,19 +462,19 @@ bool EditorExportPlatformAndroid::is_package_name_valid(const String &p_package,
 			first = true;
 			continue;
 		}
-		if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_')) {
+		if (!is_ascii_identifier_char(c)) {
 			if (r_error) {
 				*r_error = vformat(TTR("The character '%s' is not allowed in Android application package names."), String::chr(c));
 			}
 			return false;
 		}
-		if (first && (c >= '0' && c <= '9')) {
+		if (first && is_digit(c)) {
 			if (r_error) {
 				*r_error = TTR("A digit cannot be the first character in a package segment.");
 			}
 			return false;
 		}
-		if (first && c == '_') {
+		if (first && is_underscore(c)) {
 			if (r_error) {
 				*r_error = vformat(TTR("The character '%s' cannot be the first character in a package segment."), String::chr(c));
 			}
diff --git a/platform/android/java/lib/src/org/godotengine/godot/GodotIO.java b/platform/android/java/lib/src/org/godotengine/godot/GodotIO.java
index d679fd92c0..b151e7eec1 100644
--- a/platform/android/java/lib/src/org/godotengine/godot/GodotIO.java
+++ b/platform/android/java/lib/src/org/godotengine/godot/GodotIO.java
@@ -226,6 +226,14 @@ public class GodotIO {
 		return (int)(metrics.density * 160f);
 	}
 
+	public double getScreenRefreshRate(double fallback) {
+		Display display = activity.getWindowManager().getDefaultDisplay();
+		if (display != null) {
+			return display.getRefreshRate();
+		}
+		return fallback;
+	}
+
 	public int[] screenGetUsableRect() {
 		DisplayMetrics metrics = activity.getResources().getDisplayMetrics();
 		Display display = activity.getWindowManager().getDefaultDisplay();
diff --git a/platform/android/java_godot_io_wrapper.cpp b/platform/android/java_godot_io_wrapper.cpp
index e0a535f16e..8a2788e848 100644
--- a/platform/android/java_godot_io_wrapper.cpp
+++ b/platform/android/java_godot_io_wrapper.cpp
@@ -53,6 +53,7 @@ GodotIOJavaWrapper::GodotIOJavaWrapper(JNIEnv *p_env, jobject p_godot_io_instanc
 		_get_locale = p_env->GetMethodID(cls, "getLocale", "()Ljava/lang/String;");
 		_get_model = p_env->GetMethodID(cls, "getModel", "()Ljava/lang/String;");
 		_get_screen_DPI = p_env->GetMethodID(cls, "getScreenDPI", "()I");
+		_get_screen_refresh_rate = p_env->GetMethodID(cls, "getScreenRefreshRate", "(D)D");
 		_screen_get_usable_rect = p_env->GetMethodID(cls, "screenGetUsableRect", "()[I"),
 		_get_unique_id = p_env->GetMethodID(cls, "getUniqueID", "()Ljava/lang/String;");
 		_show_keyboard = p_env->GetMethodID(cls, "showKeyboard", "(Ljava/lang/String;ZIII)V");
@@ -136,6 +137,19 @@ int GodotIOJavaWrapper::get_screen_dpi() {
 	}
 }
 
+float GodotIOJavaWrapper::get_screen_refresh_rate(float fallback) {
+	if (_get_screen_refresh_rate) {
+		JNIEnv *env = get_jni_env();
+		if (env == nullptr) {
+			ERR_PRINT("An error occured while trying to get screen refresh rate.");
+			return fallback;
+		}
+		return (float)env->CallDoubleMethod(godot_io_instance, _get_screen_refresh_rate, (double)fallback);
+	}
+	ERR_PRINT("An error occured while trying to get the screen refresh rate.");
+	return fallback;
+}
+
 void GodotIOJavaWrapper::screen_get_usable_rect(int (&p_rect_xywh)[4]) {
 	if (_screen_get_usable_rect) {
 		JNIEnv *env = get_jni_env();
diff --git a/platform/android/java_godot_io_wrapper.h b/platform/android/java_godot_io_wrapper.h
index c96abf1101..38a2b710a9 100644
--- a/platform/android/java_godot_io_wrapper.h
+++ b/platform/android/java_godot_io_wrapper.h
@@ -51,6 +51,7 @@ private:
 	jmethodID _get_locale = 0;
 	jmethodID _get_model = 0;
 	jmethodID _get_screen_DPI = 0;
+	jmethodID _get_screen_refresh_rate = 0;
 	jmethodID _screen_get_usable_rect = 0;
 	jmethodID _get_unique_id = 0;
 	jmethodID _show_keyboard = 0;
@@ -71,6 +72,7 @@ public:
 	String get_locale();
 	String get_model();
 	int get_screen_dpi();
+	float get_screen_refresh_rate(float fallback);
 	void screen_get_usable_rect(int (&p_rect_xywh)[4]);
 	String get_unique_id();
 	bool has_vk();
diff --git a/platform/iphone/display_server_iphone.h b/platform/iphone/display_server_iphone.h
index 6434483641..7441550f67 100644
--- a/platform/iphone/display_server_iphone.h
+++ b/platform/iphone/display_server_iphone.h
@@ -129,6 +129,7 @@ public:
 	virtual Rect2i screen_get_usable_rect(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual int screen_get_dpi(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual float screen_get_scale(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
+	virtual float screen_get_refresh_rate(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 
 	virtual Vector<DisplayServer::WindowID> get_window_list() const override;
 
diff --git a/platform/iphone/display_server_iphone.mm b/platform/iphone/display_server_iphone.mm
index 48bda89fc3..9491c9cf90 100644
--- a/platform/iphone/display_server_iphone.mm
+++ b/platform/iphone/display_server_iphone.mm
@@ -393,6 +393,10 @@ int DisplayServerIPhone::screen_get_dpi(int p_screen) const {
 	}
 }
 
+float DisplayServerIPhone::screen_get_refresh_rate(int p_screen) const {
+	return [UIScreen mainScreen].maximumFramesPerSecond;
+}
+
 float DisplayServerIPhone::screen_get_scale(int p_screen) const {
 	return [UIScreen mainScreen].nativeScale;
 }
diff --git a/platform/iphone/export/export_plugin.h b/platform/iphone/export/export_plugin.h
index 756bca14dd..93b23f7ee2 100644
--- a/platform/iphone/export/export_plugin.h
+++ b/platform/iphone/export/export_plugin.h
@@ -130,7 +130,7 @@ class EditorExportPlatformIOS : public EditorExportPlatform {
 
 		for (int i = 0; i < pname.length(); i++) {
 			char32_t c = pname[i];
-			if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '-' || c == '.')) {
+			if (!(is_ascii_alphanumeric_char(c) || c == '-' || c == '.')) {
 				if (r_error) {
 					*r_error = vformat(TTR("The character '%s' is not allowed in Identifier."), String::chr(c));
 				}
diff --git a/platform/javascript/display_server_javascript.cpp b/platform/javascript/display_server_javascript.cpp
index 2842fc2f5e..a0e1246c55 100644
--- a/platform/javascript/display_server_javascript.cpp
+++ b/platform/javascript/display_server_javascript.cpp
@@ -663,7 +663,7 @@ DisplayServerJavaScript::DisplayServerJavaScript(const String &p_rendering_drive
 	godot_js_config_canvas_id_get(canvas_id, 256);
 
 	// Handle contextmenu, webglcontextlost
-	godot_js_display_setup_canvas(p_resolution.x, p_resolution.y, p_window_mode == WINDOW_MODE_FULLSCREEN, OS::get_singleton()->is_hidpi_allowed() ? 1 : 0);
+	godot_js_display_setup_canvas(p_resolution.x, p_resolution.y, (p_window_mode == WINDOW_MODE_FULLSCREEN || p_window_mode == WINDOW_MODE_EXCLUSIVE_FULLSCREEN), OS::get_singleton()->is_hidpi_allowed() ? 1 : 0);
 
 	// Check if it's windows.
 	swap_cancel_ok = godot_js_display_is_swap_ok_cancel() == 1;
@@ -794,6 +794,10 @@ float DisplayServerJavaScript::screen_get_scale(int p_screen) const {
 	return godot_js_display_pixel_ratio_get();
 }
 
+float DisplayServerJavaScript::screen_get_refresh_rate(int p_screen) const {
+	return SCREEN_REFRESH_RATE_FALLBACK; // Javascript doesn't have much of a need for the screen refresh rate, and there's no native way to do so.
+}
+
 Vector<DisplayServer::WindowID> DisplayServerJavaScript::get_window_list() const {
 	Vector<WindowID> ret;
 	ret.push_back(MAIN_WINDOW_ID);
@@ -897,6 +901,7 @@ void DisplayServerJavaScript::window_set_mode(WindowMode p_mode, WindowID p_wind
 			}
 			window_mode = WINDOW_MODE_WINDOWED;
 		} break;
+		case WINDOW_MODE_EXCLUSIVE_FULLSCREEN:
 		case WINDOW_MODE_FULLSCREEN: {
 			int result = godot_js_display_fullscreen_request();
 			ERR_FAIL_COND_MSG(result, "The request was denied. Remember that enabling fullscreen is only possible from an input callback for the HTML5 platform.");
diff --git a/platform/javascript/display_server_javascript.h b/platform/javascript/display_server_javascript.h
index 1ae5d68787..b50956d91c 100644
--- a/platform/javascript/display_server_javascript.h
+++ b/platform/javascript/display_server_javascript.h
@@ -139,6 +139,7 @@ public:
 	virtual Rect2i screen_get_usable_rect(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual int screen_get_dpi(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual float screen_get_scale(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
+	virtual float screen_get_refresh_rate(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 
 	virtual void virtual_keyboard_show(const String &p_existing_text, const Rect2 &p_screen_rect = Rect2(), bool p_multiline = false, int p_max_input_length = -1, int p_cursor_start = -1, int p_cursor_end = -1) override;
 	virtual void virtual_keyboard_hide() override;
diff --git a/platform/linuxbsd/display_server_x11.cpp b/platform/linuxbsd/display_server_x11.cpp
index 198eacd1f3..844b5616c4 100644
--- a/platform/linuxbsd/display_server_x11.cpp
+++ b/platform/linuxbsd/display_server_x11.cpp
@@ -1054,6 +1054,66 @@ int DisplayServerX11::screen_get_dpi(int p_screen) const {
 	return 96;
 }
 
+float DisplayServerX11::screen_get_refresh_rate(int p_screen) const {
+	_THREAD_SAFE_METHOD_
+
+	if (p_screen == SCREEN_OF_MAIN_WINDOW) {
+		p_screen = window_get_current_screen();
+	}
+
+	//invalid screen?
+	ERR_FAIL_INDEX_V(p_screen, get_screen_count(), SCREEN_REFRESH_RATE_FALLBACK);
+
+	//Use xrandr to get screen refresh rate.
+	if (xrandr_ext_ok) {
+		XRRScreenResources *screen_info = XRRGetScreenResources(x11_display, windows[MAIN_WINDOW_ID].x11_window);
+		if (screen_info) {
+			RRMode current_mode = 0;
+			xrr_monitor_info *monitors = nullptr;
+
+			if (xrr_get_monitors) {
+				int count = 0;
+				monitors = xrr_get_monitors(x11_display, windows[MAIN_WINDOW_ID].x11_window, true, &count);
+				ERR_FAIL_INDEX_V(p_screen, count, SCREEN_REFRESH_RATE_FALLBACK);
+			} else {
+				ERR_PRINT("An error occured while trying to get the screen refresh rate.");
+				return SCREEN_REFRESH_RATE_FALLBACK;
+			}
+
+			bool found_active_mode = false;
+			for (int crtc = 0; crtc < screen_info->ncrtc; crtc++) { // Loop through outputs to find which one is currently outputting.
+				XRRCrtcInfo *monitor_info = XRRGetCrtcInfo(x11_display, screen_info, screen_info->crtcs[crtc]);
+				if (monitor_info->x != monitors[p_screen].x || monitor_info->y != monitors[p_screen].y) { // If X and Y aren't the same as the monitor we're looking for, this isn't the right monitor. Continue.
+					continue;
+				}
+
+				if (monitor_info->mode != None) {
+					current_mode = monitor_info->mode;
+					found_active_mode = true;
+					break;
+				}
+			}
+
+			if (found_active_mode) {
+				for (int mode = 0; mode < screen_info->nmode; mode++) {
+					XRRModeInfo m_info = screen_info->modes[mode];
+					if (m_info.id == current_mode) {
+						return (float)m_info.dotClock / ((float)m_info.hTotal * (float)m_info.vTotal);
+					}
+				}
+			}
+
+			ERR_PRINT("An error occured while trying to get the screen refresh rate."); // We should have returned the refresh rate by now. An error must have occured.
+			return SCREEN_REFRESH_RATE_FALLBACK;
+		} else {
+			ERR_PRINT("An error occured while trying to get the screen refresh rate.");
+			return SCREEN_REFRESH_RATE_FALLBACK;
+		}
+	}
+	ERR_PRINT("An error occured while trying to get the screen refresh rate.");
+	return SCREEN_REFRESH_RATE_FALLBACK;
+}
+
 bool DisplayServerX11::screen_is_touchscreen(int p_screen) const {
 	_THREAD_SAFE_METHOD_
 
@@ -1825,6 +1885,7 @@ void DisplayServerX11::window_set_mode(WindowMode p_mode, WindowID p_window) {
 
 			XSendEvent(x11_display, DefaultRootWindow(x11_display), False, SubstructureRedirectMask | SubstructureNotifyMask, &xev);
 		} break;
+		case WINDOW_MODE_EXCLUSIVE_FULLSCREEN:
 		case WINDOW_MODE_FULLSCREEN: {
 			//Remove full-screen
 			wd.fullscreen = false;
@@ -1877,6 +1938,7 @@ void DisplayServerX11::window_set_mode(WindowMode p_mode, WindowID p_window) {
 
 			XSendEvent(x11_display, DefaultRootWindow(x11_display), False, SubstructureRedirectMask | SubstructureNotifyMask, &xev);
 		} break;
+		case WINDOW_MODE_EXCLUSIVE_FULLSCREEN:
 		case WINDOW_MODE_FULLSCREEN: {
 			wd.last_position_before_fs = wd.position;
 
@@ -2417,7 +2479,7 @@ Key DisplayServerX11::keyboard_get_keycode_from_physical(Key p_keycode) const {
 	Key keycode_no_mod = p_keycode & KeyModifierMask::CODE_MASK;
 	unsigned int xkeycode = KeyMappingX11::get_xlibcode(keycode_no_mod);
 	KeySym xkeysym = XkbKeycodeToKeysym(x11_display, xkeycode, 0, 0);
-	if (xkeysym >= 'a' && xkeysym <= 'z') {
+	if (is_ascii_lower_case(xkeysym)) {
 		xkeysym -= ('a' - 'A');
 	}
 
diff --git a/platform/linuxbsd/display_server_x11.h b/platform/linuxbsd/display_server_x11.h
index c17e120db5..2d07361deb 100644
--- a/platform/linuxbsd/display_server_x11.h
+++ b/platform/linuxbsd/display_server_x11.h
@@ -303,6 +303,7 @@ public:
 	virtual Size2i screen_get_size(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual Rect2i screen_get_usable_rect(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual int screen_get_dpi(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
+	virtual float screen_get_refresh_rate(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual bool screen_is_touchscreen(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 
 #if defined(DBUS_ENABLED)
diff --git a/platform/osx/display_server_osx.h b/platform/osx/display_server_osx.h
index 719a45c2a8..4fc733fe2c 100644
--- a/platform/osx/display_server_osx.h
+++ b/platform/osx/display_server_osx.h
@@ -230,6 +230,7 @@ public:
 	virtual float screen_get_scale(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual float screen_get_max_scale() const override;
 	virtual Rect2i screen_get_usable_rect(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
+	virtual float screen_get_refresh_rate(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 
 	virtual Vector<int> get_window_list() const override;
 
diff --git a/platform/osx/display_server_osx.mm b/platform/osx/display_server_osx.mm
index f7add5b688..000be79852 100644
--- a/platform/osx/display_server_osx.mm
+++ b/platform/osx/display_server_osx.mm
@@ -2203,6 +2203,24 @@ Rect2i DisplayServerOSX::screen_get_usable_rect(int p_screen) const {
 	return Rect2i();
 }
 
+float DisplayServerOSX::screen_get_refresh_rate(int p_screen) const {
+	_THREAD_SAFE_METHOD_
+
+	if (p_screen == SCREEN_OF_MAIN_WINDOW) {
+		p_screen = window_get_current_screen();
+	}
+
+	NSArray *screenArray = [NSScreen screens];
+	if ((NSUInteger)p_screen < [screenArray count]) {
+		NSDictionary *description = [[screenArray objectAtIndex:p_screen] deviceDescription];
+		const CGDisplayModeRef displayMode = CGDisplayCopyDisplayMode([[description objectForKey:@"NSScreenNumber"] unsignedIntValue]);
+		const double displayRefreshRate = CGDisplayModeGetRefreshRate(displayMode);
+		return (float)displayRefreshRate;
+	}
+	ERR_PRINT("An error occured while trying to get the screen refresh rate.");
+	return SCREEN_REFRESH_RATE_FALLBACK;
+}
+
 Vector<DisplayServer::WindowID> DisplayServerOSX::get_window_list() const {
 	_THREAD_SAFE_METHOD_
 
@@ -2652,6 +2670,7 @@ void DisplayServerOSX::window_set_mode(WindowMode p_mode, WindowID p_window) {
 		case WINDOW_MODE_MINIMIZED: {
 			[wd.window_object deminiaturize:nil];
 		} break;
+		case WINDOW_MODE_EXCLUSIVE_FULLSCREEN:
 		case WINDOW_MODE_FULLSCREEN: {
 			[wd.window_object setLevel:NSNormalWindowLevel];
 			if (wd.layered_window) {
@@ -2685,6 +2704,7 @@ void DisplayServerOSX::window_set_mode(WindowMode p_mode, WindowID p_window) {
 		case WINDOW_MODE_MINIMIZED: {
 			[wd.window_object performMiniaturize:nil];
 		} break;
+		case WINDOW_MODE_EXCLUSIVE_FULLSCREEN:
 		case WINDOW_MODE_FULLSCREEN: {
 			if (wd.layered_window)
 				_set_window_per_pixel_transparency_enabled(false, p_window);
diff --git a/platform/osx/export/export_plugin.h b/platform/osx/export/export_plugin.h
index 0c2ac90206..931ce7e41a 100644
--- a/platform/osx/export/export_plugin.h
+++ b/platform/osx/export/export_plugin.h
@@ -87,7 +87,7 @@ class EditorExportPlatformOSX : public EditorExportPlatform {
 
 		for (int i = 0; i < pname.length(); i++) {
 			char32_t c = pname[i];
-			if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '-' || c == '.')) {
+			if (!(is_ascii_alphanumeric_char(c) || c == '-' || c == '.')) {
 				if (r_error) {
 					*r_error = vformat(TTR("The character '%s' is not allowed in Identifier."), String::chr(c));
 				}
diff --git a/platform/windows/display_server_windows.cpp b/platform/windows/display_server_windows.cpp
index d288c27016..b340129a16 100644
--- a/platform/windows/display_server_windows.cpp
+++ b/platform/windows/display_server_windows.cpp
@@ -326,6 +326,12 @@ typedef struct {
 	Rect2i rect;
 } EnumRectData;
 
+typedef struct {
+	int count;
+	int screen;
+	float rate;
+} EnumRefreshRateData;
+
 static BOOL CALLBACK _MonitorEnumProcSize(HMONITOR hMonitor, HDC hdcMonitor, LPRECT lprcMonitor, LPARAM dwData) {
 	EnumSizeData *data = (EnumSizeData *)dwData;
 	if (data->count == data->screen) {
@@ -363,6 +369,26 @@ static BOOL CALLBACK _MonitorEnumProcUsableSize(HMONITOR hMonitor, HDC hdcMonito
 	return TRUE;
 }
 
+static BOOL CALLBACK _MonitorEnumProcRefreshRate(HMONITOR hMonitor, HDC hdcMonitor, LPRECT lprcMonitor, LPARAM dwData) {
+	EnumRefreshRateData *data = (EnumRefreshRateData *)dwData;
+	if (data->count == data->screen) {
+		MONITORINFOEXW minfo;
+		memset(&minfo, 0, sizeof(minfo));
+		minfo.cbSize = sizeof(minfo);
+		GetMonitorInfoW(hMonitor, &minfo);
+
+		DEVMODEW dm;
+		memset(&dm, 0, sizeof(dm));
+		dm.dmSize = sizeof(dm);
+		EnumDisplaySettingsW(minfo.szDevice, ENUM_CURRENT_SETTINGS, &dm);
+
+		data->rate = dm.dmDisplayFrequency;
+	}
+
+	data->count++;
+	return TRUE;
+}
+
 Rect2i DisplayServerWindows::screen_get_usable_rect(int p_screen) const {
 	_THREAD_SAFE_METHOD_
 
@@ -446,6 +472,13 @@ int DisplayServerWindows::screen_get_dpi(int p_screen) const {
 	EnumDisplayMonitors(nullptr, nullptr, _MonitorEnumProcDpi, (LPARAM)&data);
 	return data.dpi;
 }
+float DisplayServerWindows::screen_get_refresh_rate(int p_screen) const {
+	_THREAD_SAFE_METHOD_
+
+	EnumRefreshRateData data = { 0, p_screen == SCREEN_OF_MAIN_WINDOW ? window_get_current_screen() : p_screen, SCREEN_REFRESH_RATE_FALLBACK };
+	EnumDisplayMonitors(nullptr, nullptr, _MonitorEnumProcRefreshRate, (LPARAM)&data);
+	return data.rate;
+}
 
 bool DisplayServerWindows::screen_is_touchscreen(int p_screen) const {
 #ifndef _MSC_VER
@@ -506,7 +539,7 @@ DisplayServer::WindowID DisplayServerWindows::create_sub_window(WindowMode p_mod
 	if (p_flags & WINDOW_FLAG_BORDERLESS_BIT) {
 		wd.borderless = true;
 	}
-	if (p_flags & WINDOW_FLAG_ALWAYS_ON_TOP_BIT && p_mode != WINDOW_MODE_FULLSCREEN) {
+	if (p_flags & WINDOW_FLAG_ALWAYS_ON_TOP_BIT && p_mode != WINDOW_MODE_FULLSCREEN && p_mode != WINDOW_MODE_EXCLUSIVE_FULLSCREEN) {
 		wd.always_on_top = true;
 	}
 	if (p_flags & WINDOW_FLAG_NO_FOCUS_BIT) {
@@ -946,7 +979,7 @@ Size2i DisplayServerWindows::window_get_real_size(WindowID p_window) const {
 	return Size2();
 }
 
-void DisplayServerWindows::_get_window_style(bool p_main_window, bool p_fullscreen, bool p_borderless, bool p_resizable, bool p_maximized, bool p_no_activate_focus, DWORD &r_style, DWORD &r_style_ex) {
+void DisplayServerWindows::_get_window_style(bool p_main_window, bool p_fullscreen, bool p_multiwindow_fs, bool p_borderless, bool p_resizable, bool p_maximized, bool p_no_activate_focus, DWORD &r_style, DWORD &r_style_ex) {
 	// Windows docs for window styles:
 	// https://docs.microsoft.com/en-us/windows/win32/winmsg/window-styles
 	// https://docs.microsoft.com/en-us/windows/win32/winmsg/extended-window-styles
@@ -959,6 +992,9 @@ void DisplayServerWindows::_get_window_style(bool p_main_window, bool p_fullscre
 
 	if (p_fullscreen || p_borderless) {
 		r_style |= WS_POPUP; // p_borderless was WS_EX_TOOLWINDOW in the past.
+		if (p_fullscreen && p_multiwindow_fs) {
+			r_style |= WS_BORDER; // Allows child windows to be displayed on top of full screen.
+		}
 	} else {
 		if (p_resizable) {
 			if (p_maximized) {
@@ -989,7 +1025,7 @@ void DisplayServerWindows::_update_window_style(WindowID p_window, bool p_repain
 	DWORD style = 0;
 	DWORD style_ex = 0;
 
-	_get_window_style(p_window == MAIN_WINDOW_ID, wd.fullscreen, wd.borderless, wd.resizable, wd.maximized, wd.no_focus, style, style_ex);
+	_get_window_style(p_window == MAIN_WINDOW_ID, wd.fullscreen, wd.multiwindow_fs, wd.borderless, wd.resizable, wd.maximized, wd.no_focus, style, style_ex);
 
 	SetWindowLongPtr(wd.hWnd, GWL_STYLE, style);
 	SetWindowLongPtr(wd.hWnd, GWL_EXSTYLE, style_ex);
@@ -1009,10 +1045,11 @@ void DisplayServerWindows::window_set_mode(WindowMode p_mode, WindowID p_window)
 	ERR_FAIL_COND(!windows.has(p_window));
 	WindowData &wd = windows[p_window];
 
-	if (wd.fullscreen && p_mode != WINDOW_MODE_FULLSCREEN) {
+	if (wd.fullscreen && p_mode != WINDOW_MODE_FULLSCREEN && p_mode != WINDOW_MODE_EXCLUSIVE_FULLSCREEN) {
 		RECT rect;
 
 		wd.fullscreen = false;
+		wd.multiwindow_fs = false;
 		wd.maximized = wd.was_maximized;
 
 		if (wd.pre_fs_valid) {
@@ -1051,7 +1088,15 @@ void DisplayServerWindows::window_set_mode(WindowMode p_mode, WindowID p_window)
 		wd.minimized = true;
 	}
 
-	if (p_mode == WINDOW_MODE_FULLSCREEN && !wd.fullscreen) {
+	if (p_mode == WINDOW_MODE_EXCLUSIVE_FULLSCREEN) {
+		wd.multiwindow_fs = false;
+		_update_window_style(false);
+	} else {
+		wd.multiwindow_fs = true;
+		_update_window_style(false);
+	}
+
+	if ((p_mode == WINDOW_MODE_FULLSCREEN || p_mode == WINDOW_MODE_EXCLUSIVE_FULLSCREEN) && !wd.fullscreen) {
 		if (wd.minimized) {
 			ShowWindow(wd.hWnd, SW_RESTORE);
 		}
@@ -1098,7 +1143,11 @@ DisplayServer::WindowMode DisplayServerWindows::window_get_mode(WindowID p_windo
 	const WindowData &wd = windows[p_window];
 
 	if (wd.fullscreen) {
-		return WINDOW_MODE_FULLSCREEN;
+		if (wd.multiwindow_fs) {
+			return WINDOW_MODE_FULLSCREEN;
+		} else {
+			return WINDOW_MODE_EXCLUSIVE_FULLSCREEN;
+		}
 	} else if (wd.minimized) {
 		return WINDOW_MODE_MINIMIZED;
 	} else if (wd.maximized) {
@@ -2649,98 +2698,72 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 			}
 
 		} break;
-		case WM_MOVE: {
-			if (!IsIconic(windows[window_id].hWnd)) {
-				int x = int16_t(LOWORD(lParam));
-				int y = int16_t(HIWORD(lParam));
-				windows[window_id].last_pos = Point2(x, y);
-
-				if (!windows[window_id].rect_changed_callback.is_null()) {
-					Variant size = Rect2i(windows[window_id].last_pos.x, windows[window_id].last_pos.y, windows[window_id].width, windows[window_id].height);
-					Variant *sizep = &size;
-					Variant ret;
-					Callable::CallError ce;
-					windows[window_id].rect_changed_callback.call((const Variant **)&sizep, 1, ret, ce);
+
+		case WM_WINDOWPOSCHANGED: {
+			Rect2i window_client_rect;
+			{
+				RECT rect;
+				GetClientRect(hWnd, &rect);
+				ClientToScreen(hWnd, (POINT *)&rect.left);
+				ClientToScreen(hWnd, (POINT *)&rect.right);
+				window_client_rect = Rect2i(rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top);
+			}
+
+			WINDOWPOS *window_pos_params = (WINDOWPOS *)lParam;
+			WindowData &window = windows[window_id];
+
+			bool rect_changed = false;
+			if (!(window_pos_params->flags & SWP_NOSIZE) || window_pos_params->flags & SWP_FRAMECHANGED) {
+				int screen_id = window_get_current_screen(window_id);
+				Size2i screen_size = screen_get_size(screen_id);
+				Point2i screen_position = screen_get_position(screen_id);
+
+				window.maximized = false;
+				window.minimized = false;
+				window.fullscreen = false;
+
+				if (IsIconic(hWnd)) {
+					window.minimized = true;
+				} else if (IsZoomed(hWnd)) {
+					window.maximized = true;
+				} else if (window_client_rect.position == screen_position && window_client_rect.size == screen_size) {
+					window.fullscreen = true;
 				}
-			}
-		} break;
-		case WM_SIZE: {
-			// Ignore window size change when a SIZE_MINIMIZED event is triggered.
-			if (wParam != SIZE_MINIMIZED) {
-				// The new width and height of the client area.
-				int window_w = LOWORD(lParam);
-				int window_h = HIWORD(lParam);
-
-				// Set new value to the size if it isn't preserved.
-				if (window_w > 0 && window_h > 0 && !windows[window_id].preserve_window_size) {
-					windows[window_id].width = window_w;
-					windows[window_id].height = window_h;
+
+				if (!window.minimized) {
+					window.width = window_client_rect.size.width;
+					window.height = window_client_rect.size.height;
 
 #if defined(VULKAN_ENABLED)
 					if (context_vulkan && window_created) {
-						context_vulkan->window_resize(window_id, windows[window_id].width, windows[window_id].height);
+						context_vulkan->window_resize(window_id, window.width, window.height);
 					}
 #endif
+					rect_changed = true;
+				}
+			}
 
-				} else { // If the size is preserved.
-					windows[window_id].preserve_window_size = false;
+			if (!window.minimized && (!(window_pos_params->flags & SWP_NOMOVE) || window_pos_params->flags & SWP_FRAMECHANGED)) {
+				window.last_pos = window_client_rect.position;
+				rect_changed = true;
+			}
 
-					// Restore the old size.
-					window_set_size(Size2(windows[window_id].width, windows[window_id].height), window_id);
+			if (rect_changed) {
+				if (!window.rect_changed_callback.is_null()) {
+					Variant size = Rect2i(window.last_pos.x, window.last_pos.y, window.width, window.height);
+					const Variant *args[] = { &size };
+					Variant ret;
+					Callable::CallError ce;
+					window.rect_changed_callback.call(args, 1, ret, ce);
 				}
-			} else { // When the window has been minimized, preserve its size.
-				windows[window_id].preserve_window_size = true;
 			}
 
-			// Call windows rect change callback.
-			if (!windows[window_id].rect_changed_callback.is_null()) {
-				Variant size = Rect2i(windows[window_id].last_pos.x, windows[window_id].last_pos.y, windows[window_id].width, windows[window_id].height);
-				Variant *size_ptr = &size;
-				Variant ret;
-				Callable::CallError ce;
-				windows[window_id].rect_changed_callback.call((const Variant **)&size_ptr, 1, ret, ce);
-			}
-
-			// The window has been maximized.
-			if (wParam == SIZE_MAXIMIZED) {
-				windows[window_id].maximized = true;
-				windows[window_id].minimized = false;
-			}
-			// The window has been minimized.
-			else if (wParam == SIZE_MINIMIZED) {
-				windows[window_id].maximized = false;
-				windows[window_id].minimized = true;
-				windows[window_id].preserve_window_size = false;
-			}
-			// The window has been resized, but neither the SIZE_MINIMIZED nor SIZE_MAXIMIZED value applies.
-			else if (wParam == SIZE_RESTORED) {
-				windows[window_id].maximized = false;
-				windows[window_id].minimized = false;
-			}
-#if 0
-			if (is_layered_allowed() && layered_window) {
-				DeleteObject(hBitmap);
-
-				RECT r;
-				GetWindowRect(hWnd, &r);
-				dib_size = Size2i(r.right - r.left, r.bottom - r.top);
-
-				BITMAPINFO bmi;
-				ZeroMemory(&bmi, sizeof(BITMAPINFO));
-				bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
-				bmi.bmiHeader.biWidth = dib_size.x;
-				bmi.bmiHeader.biHeight = dib_size.y;
-				bmi.bmiHeader.biPlanes = 1;
-				bmi.bmiHeader.biBitCount = 32;
-				bmi.bmiHeader.biCompression = BI_RGB;
-				bmi.bmiHeader.biSizeImage = dib_size.x * dib_size.y * 4;
-				hBitmap = CreateDIBSection(hDC_dib, &bmi, DIB_RGB_COLORS, (void **)&dib_data, nullptr, 0x0);
-				SelectObject(hDC_dib, hBitmap);
-
-				ZeroMemory(dib_data, dib_size.x * dib_size.y * 4);
-			}
-#endif
+			// Return here to prevent WM_MOVE and WM_SIZE from being sent
+			// See: https://docs.microsoft.com/en-us/windows/win32/winmsg/wm-windowposchanged#remarks
+			return 0;
+
 		} break;
+
 		case WM_ENTERSIZEMOVE: {
 			Input::get_singleton()->release_pressed_events();
 			windows[window_id].move_timer_id = SetTimer(windows[window_id].hWnd, 1, USER_TIMER_MINIMUM, (TIMERPROC) nullptr);
@@ -3088,7 +3111,7 @@ DisplayServer::WindowID DisplayServerWindows::_create_window(WindowMode p_mode,
 	DWORD dwExStyle;
 	DWORD dwStyle;
 
-	_get_window_style(window_id_counter == MAIN_WINDOW_ID, p_mode == WINDOW_MODE_FULLSCREEN, p_flags & WINDOW_FLAG_BORDERLESS_BIT, !(p_flags & WINDOW_FLAG_RESIZE_DISABLED_BIT), p_mode == WINDOW_MODE_MAXIMIZED, (p_flags & WINDOW_FLAG_NO_FOCUS_BIT), dwStyle, dwExStyle);
+	_get_window_style(window_id_counter == MAIN_WINDOW_ID, (p_mode == WINDOW_MODE_FULLSCREEN || p_mode == WINDOW_MODE_EXCLUSIVE_FULLSCREEN), p_mode == WINDOW_MODE_EXCLUSIVE_FULLSCREEN, p_flags & WINDOW_FLAG_BORDERLESS_BIT, !(p_flags & WINDOW_FLAG_RESIZE_DISABLED_BIT), p_mode == WINDOW_MODE_MAXIMIZED, (p_flags & WINDOW_FLAG_NO_FOCUS_BIT), dwStyle, dwExStyle);
 
 	RECT WindowRect;
 
@@ -3097,7 +3120,7 @@ DisplayServer::WindowID DisplayServerWindows::_create_window(WindowMode p_mode,
 	WindowRect.top = p_rect.position.y;
 	WindowRect.bottom = p_rect.position.y + p_rect.size.y;
 
-	if (p_mode == WINDOW_MODE_FULLSCREEN) {
+	if (p_mode == WINDOW_MODE_FULLSCREEN || p_mode == WINDOW_MODE_EXCLUSIVE_FULLSCREEN) {
 		int nearest_area = 0;
 		Rect2i screen_rect;
 		for (int i = 0; i < get_screen_count(); i++) {
@@ -3140,7 +3163,7 @@ DisplayServer::WindowID DisplayServerWindows::_create_window(WindowMode p_mode,
 			windows.erase(id);
 			return INVALID_WINDOW_ID;
 		}
-		if (p_mode != WINDOW_MODE_FULLSCREEN) {
+		if (p_mode != WINDOW_MODE_FULLSCREEN && p_mode != WINDOW_MODE_EXCLUSIVE_FULLSCREEN) {
 			wd.pre_fs_valid = true;
 		}
 
diff --git a/platform/windows/display_server_windows.h b/platform/windows/display_server_windows.h
index 803c2d4836..d36ca97ebe 100644
--- a/platform/windows/display_server_windows.h
+++ b/platform/windows/display_server_windows.h
@@ -326,12 +326,12 @@ class DisplayServerWindows : public DisplayServer {
 
 		Vector<Vector2> mpath;
 
-		bool preserve_window_size = false;
 		bool pre_fs_valid = false;
 		RECT pre_fs_rect;
 		bool maximized = false;
 		bool minimized = false;
 		bool fullscreen = false;
+		bool multiwindow_fs = false;
 		bool borderless = false;
 		bool resizable = true;
 		bool window_focused = false;
@@ -401,7 +401,7 @@ class DisplayServerWindows : public DisplayServer {
 	WNDPROC user_proc = nullptr;
 
 	void _send_window_event(const WindowData &wd, WindowEvent p_event);
-	void _get_window_style(bool p_main_window, bool p_fullscreen, bool p_borderless, bool p_resizable, bool p_maximized, bool p_no_activate_focus, DWORD &r_style, DWORD &r_style_ex);
+	void _get_window_style(bool p_main_window, bool p_fullscreen, bool p_multiwindow_fs, bool p_borderless, bool p_resizable, bool p_maximized, bool p_no_activate_focus, DWORD &r_style, DWORD &r_style_ex);
 
 	MouseMode mouse_mode;
 	int restore_mouse_trails = 0;
@@ -458,6 +458,7 @@ public:
 	virtual Size2i screen_get_size(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual Rect2i screen_get_usable_rect(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual int screen_get_dpi(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
+	virtual float screen_get_refresh_rate(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual bool screen_is_touchscreen(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 
 	virtual void screen_set_orientation(ScreenOrientation p_orientation, int p_screen = SCREEN_OF_MAIN_WINDOW) override;
diff --git a/scene/SCsub b/scene/SCsub
index 92288211bb..a7b23af598 100644
--- a/scene/SCsub
+++ b/scene/SCsub
@@ -9,6 +9,7 @@ env.add_source_files(env.scene_sources, "*.cpp")
 
 # Chain load SCsubs
 SConscript("main/SCsub")
+SConscript("multiplayer/SCsub")
 SConscript("gui/SCsub")
 if not env["disable_3d"]:
     SConscript("3d/SCsub")
diff --git a/scene/gui/code_edit.cpp b/scene/gui/code_edit.cpp
index 7e21a43ab6..8cb8a78e8d 100644
--- a/scene/gui/code_edit.cpp
+++ b/scene/gui/code_edit.cpp
@@ -34,14 +34,6 @@
 #include "core/string/string_builder.h"
 #include "core/string/ustring.h"
 
-static bool _is_whitespace(char32_t c) {
-	return c == '\t' || c == ' ';
-}
-
-static bool _is_char(char32_t c) {
-	return !is_symbol(c);
-}
-
 void CodeEdit::_notification(int p_what) {
 	switch (p_what) {
 		case NOTIFICATION_THEME_CHANGED:
@@ -607,9 +599,9 @@ void CodeEdit::_handle_unicode_input_internal(const uint32_t p_unicode) {
 
 			int post_brace_pair = cc < get_line(cl).length() ? _get_auto_brace_pair_close_at_pos(cl, cc) : -1;
 
-			if (has_string_delimiter(chr) && cc > 0 && _is_char(get_line(cl)[cc - 1]) && post_brace_pair == -1) {
+			if (has_string_delimiter(chr) && cc > 0 && !is_symbol(get_line(cl)[cc - 1]) && post_brace_pair == -1) {
 				insert_text_at_caret(chr);
-			} else if (cc < get_line(cl).length() && _is_char(get_line(cl)[cc])) {
+			} else if (cc < get_line(cl).length() && !is_symbol(get_line(cl)[cc])) {
 				insert_text_at_caret(chr);
 			} else if (post_brace_pair != -1 && auto_brace_completion_pairs[post_brace_pair].close_key[0] == chr[0]) {
 				caret_move_offset = auto_brace_completion_pairs[post_brace_pair].close_key.length();
@@ -1001,7 +993,7 @@ void CodeEdit::_new_line(bool p_split_current_line, bool p_above) {
 			}
 
 			/* Make sure this is the last char, trailing whitespace or comments are okay. */
-			if (should_indent && (!_is_whitespace(c) && is_in_comment(cl, cc) == -1)) {
+			if (should_indent && (!is_whitespace(c) && is_in_comment(cl, cc) == -1)) {
 				should_indent = false;
 			}
 		}
@@ -1817,7 +1809,7 @@ void CodeEdit::request_code_completion(bool p_force) {
 	String line = get_line(get_caret_line());
 	int ofs = CLAMP(get_caret_column(), 0, line.length());
 
-	if (ofs > 0 && (is_in_string(get_caret_line(), ofs) != -1 || _is_char(line[ofs - 1]) || code_completion_prefixes.has(line[ofs - 1]))) {
+	if (ofs > 0 && (is_in_string(get_caret_line(), ofs) != -1 || !is_symbol(line[ofs - 1]) || code_completion_prefixes.has(line[ofs - 1]))) {
 		emit_signal(SNAME("code_completion_requested"));
 	} else if (ofs > 1 && line[ofs - 1] == ' ' && code_completion_prefixes.has(line[ofs - 2])) {
 		emit_signal(SNAME("code_completion_requested"));
@@ -1926,7 +1918,7 @@ void CodeEdit::confirm_code_completion(bool p_replace) {
 
 		if (merge_text) {
 			for (; caret_col < line.length(); caret_col++) {
-				if (!_is_char(line[caret_col])) {
+				if (is_symbol(line[caret_col])) {
 					break;
 				}
 			}
@@ -2562,7 +2554,7 @@ int CodeEdit::_is_in_delimiter(int p_line, int p_column, DelimiterType p_type) c
 			region = E->value();
 			in_region = true;
 			for (int i = E->key() - 2; i >= 0; i--) {
-				if (!_is_whitespace(line[i])) {
+				if (!is_whitespace(line[i])) {
 					return -1;
 				}
 			}
@@ -2581,7 +2573,7 @@ int CodeEdit::_is_in_delimiter(int p_line, int p_column, DelimiterType p_type) c
 		}
 
 		for (int i = end_col; i < line.length(); i++) {
-			if (!_is_whitespace(line[i])) {
+			if (!is_whitespace(line[i])) {
 				return -1;
 			}
 		}
@@ -2797,11 +2789,11 @@ void CodeEdit::_filter_code_completion_candidates_impl() {
 		while (ofs > 0 && line[ofs] == ' ') {
 			ofs--;
 		}
-		prev_is_word = _is_char(line[ofs]);
+		prev_is_word = !is_symbol(line[ofs]);
 		/* Otherwise get current word and set cofs to the start. */
 	} else {
 		int start_cofs = cofs;
-		while (cofs > 0 && line[cofs - 1] > 32 && (line[cofs - 1] == '/' || _is_char(line[cofs - 1]))) {
+		while (cofs > 0 && line[cofs - 1] > 32 && (line[cofs - 1] == '/' || !is_symbol(line[cofs - 1]))) {
 			cofs--;
 		}
 		string_to_complete = line.substr(cofs, start_cofs - cofs);
diff --git a/scene/gui/text_edit.cpp b/scene/gui/text_edit.cpp
index 0ee4a6af4e..bb259843b8 100644
--- a/scene/gui/text_edit.cpp
+++ b/scene/gui/text_edit.cpp
@@ -43,18 +43,6 @@
 
 #include "scene/main/window.h"
 
-static bool _is_text_char(char32_t c) {
-	return !is_symbol(c);
-}
-
-static bool _is_whitespace(char32_t c) {
-	return c == '\t' || c == ' ';
-}
-
-static bool _is_char(char32_t c) {
-	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 ///                            TEXT                                         ///
 ///////////////////////////////////////////////////////////////////////////////
@@ -820,8 +808,8 @@ void TextEdit::_notification(int p_what) {
 							int xpos = indent_px + ((xmargin_end + minimap_char_size.x) + (minimap_char_size.x * j)) + tabs;
 							bool out_of_bounds = (xpos >= xmargin_end + minimap_width);
 
-							bool is_whitespace = _is_whitespace(str[j]);
-							if (!is_whitespace) {
+							bool whitespace = is_whitespace(str[j]);
+							if (!whitespace) {
 								characters++;
 
 								if (j < str.length() - 1 && color == previous_color && !out_of_bounds) {
@@ -843,7 +831,7 @@ void TextEdit::_notification(int p_what) {
 							if (characters > 0) {
 								previous_color.a *= 0.6;
 								// take one for zero indexing, and if we hit whitespace / the end of a word.
-								int chars = MAX(0, (j - (characters - 1)) - (is_whitespace ? 1 : 0)) + 1;
+								int chars = MAX(0, (j - (characters - 1)) - (whitespace ? 1 : 0)) + 1;
 								int char_x_ofs = indent_px + ((xmargin_end + minimap_char_size.x) + (minimap_char_size.x * chars)) + tabs;
 								if (rtl) {
 									RenderingServer::get_singleton()->canvas_item_add_rect(ci, Rect2(Point2(size.width - char_x_ofs - minimap_char_size.x * characters, minimap_line_height * i), Point2(minimap_char_size.x * characters, minimap_char_size.y)), previous_color);
@@ -1144,7 +1132,7 @@ void TextEdit::_notification(int p_what) {
 					}
 
 					if (!clipped && lookup_symbol_word.length() != 0) { // Highlight word
-						if (_is_char(lookup_symbol_word[0]) || lookup_symbol_word[0] == '.') {
+						if (is_ascii_char(lookup_symbol_word[0]) || lookup_symbol_word[0] == '_' || lookup_symbol_word[0] == '.') {
 							int highlighted_word_col = _get_column_pos_of_word(lookup_symbol_word, str, SEARCH_MATCH_CASE | SEARCH_WHOLE_WORDS, 0);
 							while (highlighted_word_col != -1) {
 								Vector<Vector2> sel = TS->shaped_text_get_selection(rid, highlighted_word_col + start, highlighted_word_col + lookup_symbol_word.length() + start);
@@ -3037,7 +3025,7 @@ int TextEdit::get_first_non_whitespace_column(int p_line) const {
 	ERR_FAIL_INDEX_V(p_line, text.size(), 0);
 
 	int col = 0;
-	while (col < text[p_line].length() && _is_whitespace(text[p_line][col])) {
+	while (col < text[p_line].length() && is_whitespace(text[p_line][col])) {
 		col++;
 	}
 	return col;
@@ -3622,9 +3610,9 @@ Point2i TextEdit::search(const String &p_key, uint32_t p_search_flags, int p_fro
 
 			if (pos != -1 && (p_search_flags & SEARCH_WHOLE_WORDS)) {
 				// Validate for whole words.
-				if (pos > 0 && _is_text_char(text_line[pos - 1])) {
+				if (pos > 0 && !is_symbol(text_line[pos - 1])) {
 					is_match = false;
-				} else if (pos + p_key.length() < text_line.length() && _is_text_char(text_line[pos + p_key.length()])) {
+				} else if (pos + p_key.length() < text_line.length() && !is_symbol(text_line[pos + p_key.length()])) {
 					is_match = false;
 				}
 			}
@@ -5779,9 +5767,9 @@ int TextEdit::_get_column_pos_of_word(const String &p_key, const String &p_searc
 			if (col != -1 && p_search_flags & SEARCH_WHOLE_WORDS) {
 				p_from_column = col;
 
-				if (col > 0 && _is_text_char(p_search[col - 1])) {
+				if (col > 0 && !is_symbol(p_search[col - 1])) {
 					col = -1;
-				} else if ((col + p_key.length()) < p_search.length() && _is_text_char(p_search[col + p_key.length()])) {
+				} else if ((col + p_key.length()) < p_search.length() && !is_symbol(p_search[col + p_key.length()])) {
 					col = -1;
 				}
 			}
diff --git a/scene/gui/texture_button.cpp b/scene/gui/texture_button.cpp
index 89a17ae854..26acfaaa70 100644
--- a/scene/gui/texture_button.cpp
+++ b/scene/gui/texture_button.cpp
@@ -37,7 +37,7 @@
 Size2 TextureButton::get_minimum_size() const {
 	Size2 rscale = Control::get_minimum_size();
 
-	if (!expand) {
+	if (!ignore_texture_size) {
 		if (normal.is_null()) {
 			if (pressed.is_null()) {
 				if (hover.is_null()) {
@@ -182,50 +182,48 @@ void TextureButton::_notification(int p_what) {
 				size = texdraw->get_size();
 				_texture_region = Rect2(Point2(), texdraw->get_size());
 				_tile = false;
-				if (expand) {
-					switch (stretch_mode) {
-						case STRETCH_KEEP:
-							size = texdraw->get_size();
-							break;
-						case STRETCH_SCALE:
-							size = get_size();
-							break;
-						case STRETCH_TILE:
-							size = get_size();
-							_tile = true;
-							break;
-						case STRETCH_KEEP_CENTERED:
-							ofs = (get_size() - texdraw->get_size()) / 2;
-							size = texdraw->get_size();
-							break;
-						case STRETCH_KEEP_ASPECT_CENTERED:
-						case STRETCH_KEEP_ASPECT: {
-							Size2 _size = get_size();
-							float tex_width = texdraw->get_width() * _size.height / texdraw->get_height();
-							float tex_height = _size.height;
-
-							if (tex_width > _size.width) {
-								tex_width = _size.width;
-								tex_height = texdraw->get_height() * tex_width / texdraw->get_width();
-							}
+				switch (stretch_mode) {
+					case STRETCH_KEEP:
+						size = texdraw->get_size();
+						break;
+					case STRETCH_SCALE:
+						size = get_size();
+						break;
+					case STRETCH_TILE:
+						size = get_size();
+						_tile = true;
+						break;
+					case STRETCH_KEEP_CENTERED:
+						ofs = (get_size() - texdraw->get_size()) / 2;
+						size = texdraw->get_size();
+						break;
+					case STRETCH_KEEP_ASPECT_CENTERED:
+					case STRETCH_KEEP_ASPECT: {
+						Size2 _size = get_size();
+						float tex_width = texdraw->get_width() * _size.height / texdraw->get_height();
+						float tex_height = _size.height;
+
+						if (tex_width > _size.width) {
+							tex_width = _size.width;
+							tex_height = texdraw->get_height() * tex_width / texdraw->get_width();
+						}
 
-							if (stretch_mode == STRETCH_KEEP_ASPECT_CENTERED) {
-								ofs.x = (_size.width - tex_width) / 2;
-								ofs.y = (_size.height - tex_height) / 2;
-							}
-							size.width = tex_width;
-							size.height = tex_height;
-						} break;
-						case STRETCH_KEEP_ASPECT_COVERED: {
-							size = get_size();
-							Size2 tex_size = texdraw->get_size();
-							Size2 scale_size(size.width / tex_size.width, size.height / tex_size.height);
-							float scale = scale_size.width > scale_size.height ? scale_size.width : scale_size.height;
-							Size2 scaled_tex_size = tex_size * scale;
-							Point2 ofs2 = ((scaled_tex_size - size) / scale).abs() / 2.0f;
-							_texture_region = Rect2(ofs2, size / scale);
-						} break;
-					}
+						if (stretch_mode == STRETCH_KEEP_ASPECT_CENTERED) {
+							ofs.x = (_size.width - tex_width) / 2;
+							ofs.y = (_size.height - tex_height) / 2;
+						}
+						size.width = tex_width;
+						size.height = tex_height;
+					} break;
+					case STRETCH_KEEP_ASPECT_COVERED: {
+						size = get_size();
+						Size2 tex_size = texdraw->get_size();
+						Size2 scale_size(size.width / tex_size.width, size.height / tex_size.height);
+						float scale = scale_size.width > scale_size.height ? scale_size.width : scale_size.height;
+						Size2 scaled_tex_size = tex_size * scale;
+						Point2 ofs2 = ((scaled_tex_size - size) / scale).abs() / 2.0f;
+						_texture_region = Rect2(ofs2, size / scale);
+					} break;
 				}
 
 				_position_rect = Rect2(ofs, size);
@@ -258,7 +256,7 @@ void TextureButton::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_disabled_texture", "texture"), &TextureButton::set_disabled_texture);
 	ClassDB::bind_method(D_METHOD("set_focused_texture", "texture"), &TextureButton::set_focused_texture);
 	ClassDB::bind_method(D_METHOD("set_click_mask", "mask"), &TextureButton::set_click_mask);
-	ClassDB::bind_method(D_METHOD("set_expand", "expand"), &TextureButton::set_expand);
+	ClassDB::bind_method(D_METHOD("set_ignore_texture_size", "ignore"), &TextureButton::set_ignore_texture_size);
 	ClassDB::bind_method(D_METHOD("set_stretch_mode", "mode"), &TextureButton::set_stretch_mode);
 	ClassDB::bind_method(D_METHOD("set_flip_h", "enable"), &TextureButton::set_flip_h);
 	ClassDB::bind_method(D_METHOD("is_flipped_h"), &TextureButton::is_flipped_h);
@@ -271,7 +269,7 @@ void TextureButton::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_disabled_texture"), &TextureButton::get_disabled_texture);
 	ClassDB::bind_method(D_METHOD("get_focused_texture"), &TextureButton::get_focused_texture);
 	ClassDB::bind_method(D_METHOD("get_click_mask"), &TextureButton::get_click_mask);
-	ClassDB::bind_method(D_METHOD("get_expand"), &TextureButton::get_expand);
+	ClassDB::bind_method(D_METHOD("get_ignore_texture_size"), &TextureButton::get_ignore_texture_size);
 	ClassDB::bind_method(D_METHOD("get_stretch_mode"), &TextureButton::get_stretch_mode);
 
 	ADD_GROUP("Textures", "texture_");
@@ -281,7 +279,7 @@ void TextureButton::_bind_methods() {
 	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "texture_disabled", PROPERTY_HINT_RESOURCE_TYPE, "Texture2D"), "set_disabled_texture", "get_disabled_texture");
 	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "texture_focused", PROPERTY_HINT_RESOURCE_TYPE, "Texture2D"), "set_focused_texture", "get_focused_texture");
 	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "texture_click_mask", PROPERTY_HINT_RESOURCE_TYPE, "BitMap"), "set_click_mask", "get_click_mask");
-	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "expand", PROPERTY_HINT_RESOURCE_TYPE, "bool"), "set_expand", "get_expand");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "ignore_texture_size", PROPERTY_HINT_RESOURCE_TYPE, "bool"), "set_ignore_texture_size", "get_ignore_texture_size");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "stretch_mode", PROPERTY_HINT_ENUM, "Scale,Tile,Keep,Keep Centered,Keep Aspect,Keep Aspect Centered,Keep Aspect Covered"), "set_stretch_mode", "get_stretch_mode");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "flip_h", PROPERTY_HINT_RESOURCE_TYPE, "bool"), "set_flip_h", "is_flipped_h");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "flip_v", PROPERTY_HINT_RESOURCE_TYPE, "bool"), "set_flip_v", "is_flipped_v");
@@ -352,12 +350,12 @@ void TextureButton::set_focused_texture(const Ref<Texture2D> &p_focused) {
 	focused = p_focused;
 };
 
-bool TextureButton::get_expand() const {
-	return expand;
+bool TextureButton::get_ignore_texture_size() const {
+	return ignore_texture_size;
 }
 
-void TextureButton::set_expand(bool p_expand) {
-	expand = p_expand;
+void TextureButton::set_ignore_texture_size(bool p_ignore) {
+	ignore_texture_size = p_ignore;
 	update_minimum_size();
 	update();
 }
diff --git a/scene/gui/texture_button.h b/scene/gui/texture_button.h
index 1428a79a1d..5762949acd 100644
--- a/scene/gui/texture_button.h
+++ b/scene/gui/texture_button.h
@@ -54,8 +54,8 @@ private:
 	Ref<Texture2D> disabled;
 	Ref<Texture2D> focused;
 	Ref<BitMap> click_mask;
-	bool expand = false;
-	StretchMode stretch_mode = STRETCH_SCALE;
+	bool ignore_texture_size = false;
+	StretchMode stretch_mode = STRETCH_KEEP;
 
 	Rect2 _texture_region;
 	Rect2 _position_rect;
@@ -85,8 +85,8 @@ public:
 	Ref<Texture2D> get_focused_texture() const;
 	Ref<BitMap> get_click_mask() const;
 
-	bool get_expand() const;
-	void set_expand(bool p_expand);
+	bool get_ignore_texture_size() const;
+	void set_ignore_texture_size(bool p_ignore);
 
 	void set_stretch_mode(StretchMode p_stretch_mode);
 	StretchMode get_stretch_mode() const;
diff --git a/scene/gui/tree.cpp b/scene/gui/tree.cpp
index 1b32884880..a190e08088 100644
--- a/scene/gui/tree.cpp
+++ b/scene/gui/tree.cpp
@@ -2490,7 +2490,7 @@ int Tree::propagate_mouse_event(const Point2i &p_pos, int x_ofs, int y_ofs, int
 			/* process selection */
 
 			if (p_double_click && (!c.editable || c.mode == TreeItem::CELL_MODE_CUSTOM || c.mode == TreeItem::CELL_MODE_ICON /*|| c.mode==TreeItem::CELL_MODE_CHECK*/)) { //it's confusing for check
-
+				// Emits the "item_activated" signal.
 				propagate_mouse_activated = true;
 
 				incr_search.clear();
diff --git a/scene/main/canvas_item.cpp b/scene/main/canvas_item.cpp
index a0916c6291..a62bbb146c 100644
--- a/scene/main/canvas_item.cpp
+++ b/scene/main/canvas_item.cpp
@@ -72,6 +72,15 @@ bool CanvasItem::is_visible_in_tree() const {
 		p = p->get_parent_item();
 	}
 
+	const Node *n = get_parent();
+	while (n) {
+		const CanvasLayer *c = Object::cast_to<CanvasLayer>(n);
+		if (c && !c->is_visible()) {
+			return false;
+		}
+		n = n->get_parent();
+	}
+
 	return true;
 }
 
diff --git a/scene/main/canvas_item.h b/scene/main/canvas_item.h
index 3d49d89746..08fea52c3a 100644
--- a/scene/main/canvas_item.h
+++ b/scene/main/canvas_item.h
@@ -46,6 +46,8 @@ class World2D;
 class CanvasItem : public Node {
 	GDCLASS(CanvasItem, Node);
 
+	friend class CanvasLayer;
+
 public:
 	enum TextureFilter {
 		TEXTURE_FILTER_PARENT_NODE,
diff --git a/scene/main/canvas_layer.cpp b/scene/main/canvas_layer.cpp
index 282ab6b497..3f3e72357b 100644
--- a/scene/main/canvas_layer.cpp
+++ b/scene/main/canvas_layer.cpp
@@ -29,6 +29,7 @@
 /*************************************************************************/
 
 #include "canvas_layer.h"
+#include "canvas_item.h"
 #include "viewport.h"
 
 void CanvasLayer::set_layer(int p_xform) {
@@ -42,6 +43,32 @@ int CanvasLayer::get_layer() const {
 	return layer;
 }
 
+void CanvasLayer::set_visible(bool p_visible) {
+	if (p_visible == visible) {
+		return;
+	}
+
+	visible = p_visible;
+	emit_signal(SNAME("visibility_changed"));
+
+	for (int i = 0; i < get_child_count(); i++) {
+		CanvasItem *c = Object::cast_to<CanvasItem>(get_child(i));
+		if (c) {
+			RenderingServer::get_singleton()->canvas_item_set_visible(c->get_canvas_item(), p_visible && c->is_visible());
+
+			if (c->is_visible()) {
+				c->_propagate_visibility_changed(p_visible);
+			} else {
+				c->notification(CanvasItem::NOTIFICATION_VISIBILITY_CHANGED);
+			}
+		}
+	}
+}
+
+bool CanvasLayer::is_visible() const {
+	return visible;
+}
+
 void CanvasLayer::set_transform(const Transform2D &p_xform) {
 	transform = p_xform;
 	locrotscale_dirty = true;
@@ -264,6 +291,9 @@ void CanvasLayer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_layer", "layer"), &CanvasLayer::set_layer);
 	ClassDB::bind_method(D_METHOD("get_layer"), &CanvasLayer::get_layer);
 
+	ClassDB::bind_method(D_METHOD("set_visible", "visible"), &CanvasLayer::set_visible);
+	ClassDB::bind_method(D_METHOD("is_visible"), &CanvasLayer::is_visible);
+
 	ClassDB::bind_method(D_METHOD("set_transform", "transform"), &CanvasLayer::set_transform);
 	ClassDB::bind_method(D_METHOD("get_transform"), &CanvasLayer::get_transform);
 
@@ -289,6 +319,7 @@ void CanvasLayer::_bind_methods() {
 
 	ADD_GROUP("Layer", "");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "layer", PROPERTY_HINT_RANGE, "-128,128,1"), "set_layer", "get_layer");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "visible"), "set_visible", "is_visible");
 	ADD_GROUP("Transform", "");
 	ADD_PROPERTY(PropertyInfo(Variant::VECTOR2, "offset"), "set_offset", "get_offset");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "rotation", PROPERTY_HINT_RANGE, "-1080,1080,0.1,or_lesser,or_greater,radians"), "set_rotation", "get_rotation");
@@ -299,6 +330,8 @@ void CanvasLayer::_bind_methods() {
 	ADD_GROUP("Follow Viewport", "follow_viewport");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "follow_viewport_enable"), "set_follow_viewport", "is_following_viewport");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "follow_viewport_scale", PROPERTY_HINT_RANGE, "0.001,1000,0.001,or_greater,or_lesser"), "set_follow_viewport_scale", "get_follow_viewport_scale");
+
+	ADD_SIGNAL(MethodInfo("visibility_changed"));
 }
 
 CanvasLayer::CanvasLayer() {
diff --git a/scene/main/canvas_layer.h b/scene/main/canvas_layer.h
index 93a0152787..b7bd793440 100644
--- a/scene/main/canvas_layer.h
+++ b/scene/main/canvas_layer.h
@@ -52,6 +52,7 @@ class CanvasLayer : public Node {
 	Viewport *vp = nullptr;
 
 	int sort_index = 0;
+	bool visible = true;
 
 	bool follow_viewport = false;
 	float follow_viewport_scale = 1.0;
@@ -69,6 +70,9 @@ public:
 	void set_layer(int p_xform);
 	int get_layer() const;
 
+	void set_visible(bool p_visible);
+	bool is_visible() const;
+
 	void set_transform(const Transform2D &p_xform);
 	Transform2D get_transform() const;
 
diff --git a/scene/main/node.cpp b/scene/main/node.cpp
index 2665a5695b..6b9d8ab211 100644
--- a/scene/main/node.cpp
+++ b/scene/main/node.cpp
@@ -32,6 +32,7 @@
 
 #include "core/core_string_names.h"
 #include "core/io/resource_loader.h"
+#include "core/multiplayer/multiplayer_api.h"
 #include "core/object/message_queue.h"
 #include "core/string/print_string.h"
 #include "instance_placeholder.h"
@@ -110,9 +111,6 @@ void Node::_notification(int p_notification) {
 				memdelete(data.path_cache);
 				data.path_cache = nullptr;
 			}
-			if (data.scene_file_path.length()) {
-				get_multiplayer()->scene_enter_exit_notify(data.scene_file_path, this, false);
-			}
 		} break;
 		case NOTIFICATION_PATH_RENAMED: {
 			if (data.path_cache) {
@@ -141,12 +139,6 @@ void Node::_notification(int p_notification) {
 			}
 
 			GDVIRTUAL_CALL(_ready);
-
-			if (data.scene_file_path.length()) {
-				ERR_FAIL_COND(!is_inside_tree());
-				get_multiplayer()->scene_enter_exit_notify(data.scene_file_path, this, true);
-			}
-
 		} break;
 		case NOTIFICATION_POSTINITIALIZE: {
 			data.in_constructor = false;
@@ -1061,7 +1053,7 @@ void Node::_generate_serial_child_name(const Node *p_child, StringName &name) co
 	String nums;
 	for (int i = name_string.length() - 1; i >= 0; i--) {
 		char32_t n = name_string[i];
-		if (n >= '0' && n <= '9') {
+		if (is_digit(n)) {
 			nums = String::chr(name_string[i]) + nums;
 		} else {
 			break;
diff --git a/scene/main/node.h b/scene/main/node.h
index a1fc672a15..0ac10f4381 100644
--- a/scene/main/node.h
+++ b/scene/main/node.h
@@ -212,7 +212,6 @@ protected:
 	static String _get_name_num_separator();
 
 	friend class SceneState;
-	friend class MultiplayerReplicator;
 
 	void _add_child_nocheck(Node *p_child, const StringName &p_name);
 	void _set_owner_nocheck(Node *p_owner);
@@ -467,7 +466,7 @@ public:
 	bool is_displayed_folded() const;
 	/* NETWORK */
 
-	void set_multiplayer_authority(int p_peer_id, bool p_recursive = true);
+	virtual void set_multiplayer_authority(int p_peer_id, bool p_recursive = true);
 	int get_multiplayer_authority() const;
 	bool is_multiplayer_authority() const;
 
diff --git a/scene/main/scene_tree.cpp b/scene/main/scene_tree.cpp
index 45f04b28b9..0e4a6a4b5c 100644
--- a/scene/main/scene_tree.cpp
+++ b/scene/main/scene_tree.cpp
@@ -36,6 +36,7 @@
 #include "core/io/dir_access.h"
 #include "core/io/marshalls.h"
 #include "core/io/resource_loader.h"
+#include "core/multiplayer/multiplayer_api.h"
 #include "core/object/message_queue.h"
 #include "core/os/keyboard.h"
 #include "core/os/os.h"
diff --git a/scene/main/scene_tree.h b/scene/main/scene_tree.h
index 1dff1dab4f..a5cd52b4ca 100644
--- a/scene/main/scene_tree.h
+++ b/scene/main/scene_tree.h
@@ -31,7 +31,6 @@
 #ifndef SCENE_TREE_H
 #define SCENE_TREE_H
 
-#include "core/multiplayer/multiplayer_api.h"
 #include "core/os/main_loop.h"
 #include "core/os/thread_safe.h"
 #include "core/templates/self_list.h"
@@ -46,6 +45,7 @@ class Node;
 class Window;
 class Material;
 class Mesh;
+class MultiplayerAPI;
 class SceneDebugger;
 class Tween;
 
diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp
index 09880ad6cf..522997cdf5 100644
--- a/scene/main/viewport.cpp
+++ b/scene/main/viewport.cpp
@@ -1231,7 +1231,7 @@ void Viewport::_gui_show_tooltip() {
 
 	base_tooltip->set_anchors_and_offsets_preset(Control::PRESET_WIDE);
 
-	panel->set_transient(false);
+	panel->set_transient(true);
 	panel->set_flag(Window::FLAG_NO_FOCUS, true);
 	panel->set_wrap_controls(true);
 	panel->add_child(base_tooltip);
diff --git a/scene/main/window.cpp b/scene/main/window.cpp
index fbc0bc5301..f2ebe50fa3 100644
--- a/scene/main/window.cpp
+++ b/scene/main/window.cpp
@@ -1614,6 +1614,7 @@ void Window::_bind_methods() {
 	BIND_ENUM_CONSTANT(MODE_MINIMIZED);
 	BIND_ENUM_CONSTANT(MODE_MAXIMIZED);
 	BIND_ENUM_CONSTANT(MODE_FULLSCREEN);
+	BIND_ENUM_CONSTANT(MODE_EXCLUSIVE_FULLSCREEN);
 
 	BIND_ENUM_CONSTANT(FLAG_RESIZE_DISABLED);
 	BIND_ENUM_CONSTANT(FLAG_BORDERLESS);
diff --git a/scene/main/window.h b/scene/main/window.h
index 2dd1dd6601..f37689f905 100644
--- a/scene/main/window.h
+++ b/scene/main/window.h
@@ -46,6 +46,7 @@ public:
 		MODE_MINIMIZED = DisplayServer::WINDOW_MODE_MINIMIZED,
 		MODE_MAXIMIZED = DisplayServer::WINDOW_MODE_MAXIMIZED,
 		MODE_FULLSCREEN = DisplayServer::WINDOW_MODE_FULLSCREEN,
+		MODE_EXCLUSIVE_FULLSCREEN = DisplayServer::WINDOW_MODE_EXCLUSIVE_FULLSCREEN,
 	};
 
 	enum Flags {
diff --git a/scene/multiplayer/SCsub b/scene/multiplayer/SCsub
new file mode 100644
index 0000000000..fc61250247
--- /dev/null
+++ b/scene/multiplayer/SCsub
@@ -0,0 +1,5 @@
+#!/usr/bin/env python
+
+Import("env")
+
+env.add_source_files(env.scene_sources, "*.cpp")
diff --git a/scene/multiplayer/multiplayer_spawner.cpp b/scene/multiplayer/multiplayer_spawner.cpp
new file mode 100644
index 0000000000..4f2a9d9e83
--- /dev/null
+++ b/scene/multiplayer/multiplayer_spawner.cpp
@@ -0,0 +1,227 @@
+/*************************************************************************/
+/*  multiplayer_spawner.cpp                                              */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "multiplayer_spawner.h"
+
+#include "core/io/marshalls.h"
+#include "core/multiplayer/multiplayer_api.h"
+#include "scene/main/window.h"
+#include "scene/scene_string_names.h"
+
+void MultiplayerSpawner::_bind_methods() {
+	ClassDB::bind_method(D_METHOD("spawn", "data"), &MultiplayerSpawner::spawn, DEFVAL(Variant()));
+
+	ClassDB::bind_method(D_METHOD("get_spawnable_scenes"), &MultiplayerSpawner::get_spawnable_scenes);
+	ClassDB::bind_method(D_METHOD("set_spawnable_scenes", "scenes"), &MultiplayerSpawner::set_spawnable_scenes);
+	ADD_PROPERTY(PropertyInfo(Variant::ARRAY, "replication", PROPERTY_HINT_ARRAY_TYPE, vformat("%s/%s:%s", Variant::OBJECT, PROPERTY_HINT_RESOURCE_TYPE, "PackedScene"), (PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_SCRIPT_VARIABLE)), "set_spawnable_scenes", "get_spawnable_scenes");
+
+	ClassDB::bind_method(D_METHOD("get_spawn_path"), &MultiplayerSpawner::get_spawn_path);
+	ClassDB::bind_method(D_METHOD("set_spawn_path", "path"), &MultiplayerSpawner::set_spawn_path);
+	ADD_PROPERTY(PropertyInfo(Variant::NODE_PATH, "spawn_path", PROPERTY_HINT_NONE, ""), "set_spawn_path", "get_spawn_path");
+
+	ClassDB::bind_method(D_METHOD("get_spawn_limit"), &MultiplayerSpawner::get_spawn_limit);
+	ClassDB::bind_method(D_METHOD("set_spawn_limit", "limit"), &MultiplayerSpawner::set_spawn_limit);
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "spawn_limit", PROPERTY_HINT_RANGE, "0,1024,1,or_greater"), "set_spawn_limit", "get_spawn_limit");
+
+	ClassDB::bind_method(D_METHOD("set_auto_spawning", "enabled"), &MultiplayerSpawner::set_auto_spawning);
+	ClassDB::bind_method(D_METHOD("is_auto_spawning"), &MultiplayerSpawner::is_auto_spawning);
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "auto_spawn"), "set_auto_spawning", "is_auto_spawning");
+
+	GDVIRTUAL_BIND(_spawn_custom, "data");
+
+	ADD_SIGNAL(MethodInfo("despawned", PropertyInfo(Variant::INT, "scene_id"), PropertyInfo(Variant::OBJECT, "node", PROPERTY_HINT_RESOURCE_TYPE, "Node")));
+	ADD_SIGNAL(MethodInfo("spawned", PropertyInfo(Variant::INT, "scene_id"), PropertyInfo(Variant::OBJECT, "node", PROPERTY_HINT_RESOURCE_TYPE, "Node")));
+}
+
+void MultiplayerSpawner::_update_spawn_node() {
+#ifdef TOOLS_ENABLED
+	if (Engine::get_singleton()->is_editor_hint()) {
+		return;
+	}
+#endif
+	if (spawn_node.is_valid()) {
+		Node *node = Object::cast_to<Node>(ObjectDB::get_instance(spawn_node));
+		if (node && node->is_connected("child_entered_tree", callable_mp(this, &MultiplayerSpawner::_node_added))) {
+			node->disconnect("child_entered_tree", callable_mp(this, &MultiplayerSpawner::_node_added));
+		}
+	}
+	Node *node = spawn_path.is_empty() && is_inside_tree() ? nullptr : get_node_or_null(spawn_path);
+	if (node) {
+		spawn_node = node->get_instance_id();
+		if (auto_spawn) {
+			node->connect("child_entered_tree", callable_mp(this, &MultiplayerSpawner::_node_added));
+		}
+	} else {
+		spawn_node = ObjectID();
+	}
+}
+
+void MultiplayerSpawner::_notification(int p_what) {
+	if (p_what == NOTIFICATION_POST_ENTER_TREE) {
+		_update_spawn_node();
+	} else if (p_what == NOTIFICATION_EXIT_TREE) {
+		_update_spawn_node();
+		const ObjectID *oid = nullptr;
+		while ((oid = tracked_nodes.next(oid))) {
+			Node *node = Object::cast_to<Node>(ObjectDB::get_instance(*oid));
+			ERR_CONTINUE(!node);
+			node->disconnect(SceneStringNames::get_singleton()->tree_exiting, callable_mp(this, &MultiplayerSpawner::_node_exit));
+			// This is unlikely, but might still crash the engine.
+			if (node->is_connected(SceneStringNames::get_singleton()->ready, callable_mp(this, &MultiplayerSpawner::_node_ready))) {
+				node->disconnect(SceneStringNames::get_singleton()->ready, callable_mp(this, &MultiplayerSpawner::_node_ready));
+			}
+			get_multiplayer()->despawn(node, this);
+		}
+		tracked_nodes.clear();
+	}
+}
+
+void MultiplayerSpawner::_node_added(Node *p_node) {
+	if (!get_multiplayer()->has_multiplayer_peer() || !is_multiplayer_authority()) {
+		return;
+	}
+	if (tracked_nodes.has(p_node->get_instance_id())) {
+		return;
+	}
+	const Node *parent = get_spawn_node();
+	if (!parent || p_node->get_parent() != parent) {
+		return;
+	}
+	int id = get_scene_id(p_node->get_scene_file_path());
+	if (id == INVALID_ID) {
+		return;
+	}
+	const String name = p_node->get_name();
+	ERR_FAIL_COND_MSG(name.validate_node_name() != name, vformat("Unable to auto-spawn node with reserved name: %s. Make sure to add your replicated scenes via 'add_child(node, true)' to produce valid names.", name));
+	_track(p_node, Variant(), id);
+}
+
+void MultiplayerSpawner::set_auto_spawning(bool p_enabled) {
+	auto_spawn = p_enabled;
+	_update_spawn_node();
+}
+
+bool MultiplayerSpawner::is_auto_spawning() const {
+	return auto_spawn;
+}
+
+TypedArray<PackedScene> MultiplayerSpawner::get_spawnable_scenes() {
+	return spawnable_scenes;
+}
+
+void MultiplayerSpawner::set_spawnable_scenes(TypedArray<PackedScene> p_scenes) {
+	spawnable_scenes = p_scenes;
+}
+
+NodePath MultiplayerSpawner::get_spawn_path() const {
+	return spawn_path;
+}
+
+void MultiplayerSpawner::set_spawn_path(const NodePath &p_path) {
+	spawn_path = p_path;
+	_update_spawn_node();
+}
+
+void MultiplayerSpawner::_track(Node *p_node, const Variant &p_argument, int p_scene_id) {
+	ObjectID oid = p_node->get_instance_id();
+	if (!tracked_nodes.has(oid)) {
+		tracked_nodes[oid] = SpawnInfo(p_argument.duplicate(true), p_scene_id);
+		p_node->connect(SceneStringNames::get_singleton()->tree_exiting, callable_mp(this, &MultiplayerSpawner::_node_exit), varray(p_node->get_instance_id()), CONNECT_ONESHOT);
+		p_node->connect(SceneStringNames::get_singleton()->ready, callable_mp(this, &MultiplayerSpawner::_node_ready), varray(p_node->get_instance_id()), CONNECT_ONESHOT);
+	}
+}
+
+void MultiplayerSpawner::_node_ready(ObjectID p_id) {
+	get_multiplayer()->spawn(ObjectDB::get_instance(p_id), this);
+}
+
+void MultiplayerSpawner::_node_exit(ObjectID p_id) {
+	Node *node = Object::cast_to<Node>(ObjectDB::get_instance(p_id));
+	ERR_FAIL_COND(!node);
+	if (tracked_nodes.has(p_id)) {
+		tracked_nodes.erase(p_id);
+		get_multiplayer()->despawn(node, this);
+	}
+}
+
+int MultiplayerSpawner::get_scene_id(const String &p_scene) const {
+	for (int i = 0; i < spawnable_scenes.size(); i++) {
+		Ref<PackedScene> ps = spawnable_scenes[i];
+		ERR_CONTINUE(ps.is_null());
+		if (ps->get_path() == p_scene) {
+			return i;
+		}
+	}
+	return INVALID_ID;
+}
+
+int MultiplayerSpawner::get_spawn_id(const ObjectID &p_id) const {
+	const SpawnInfo *info = tracked_nodes.getptr(p_id);
+	return info ? info->id : INVALID_ID;
+}
+
+const Variant MultiplayerSpawner::get_spawn_argument(const ObjectID &p_id) const {
+	const SpawnInfo *info = tracked_nodes.getptr(p_id);
+	return info ? info->args : Variant();
+}
+
+Node *MultiplayerSpawner::instantiate_scene(int p_id) {
+	ERR_FAIL_COND_V_MSG(spawn_limit && spawn_limit <= tracked_nodes.size(), nullptr, "Spawn limit reached!");
+	ERR_FAIL_INDEX_V(p_id, spawnable_scenes.size(), nullptr);
+	Ref<PackedScene> scene = spawnable_scenes[p_id];
+	ERR_FAIL_COND_V(scene.is_null(), nullptr);
+	return scene->instantiate();
+}
+
+Node *MultiplayerSpawner::instantiate_custom(const Variant &p_data) {
+	ERR_FAIL_COND_V_MSG(spawn_limit && spawn_limit <= tracked_nodes.size(), nullptr, "Spawn limit reached!");
+	Object *obj = nullptr;
+	Node *node = nullptr;
+	if (GDVIRTUAL_CALL(_spawn_custom, p_data, obj)) {
+		node = Object::cast_to<Node>(obj);
+	}
+	return node;
+}
+
+Node *MultiplayerSpawner::spawn(const Variant &p_data) {
+	ERR_FAIL_COND_V(!is_inside_tree() || !get_multiplayer()->has_multiplayer_peer() || !is_multiplayer_authority(), nullptr);
+	ERR_FAIL_COND_V_MSG(spawn_limit && spawn_limit <= tracked_nodes.size(), nullptr, "Spawn limit reached!");
+	ERR_FAIL_COND_V_MSG(!GDVIRTUAL_IS_OVERRIDDEN(_spawn_custom), nullptr, "Custom spawn requires the '_spawn_custom' virtual method to be implemented via script.");
+
+	Node *parent = get_spawn_node();
+	ERR_FAIL_COND_V_MSG(!parent, nullptr, "Cannot find spawn node.");
+
+	Node *node = instantiate_custom(p_data);
+	ERR_FAIL_COND_V_MSG(!node, nullptr, "The '_spawn_custom' implementation must return a valid Node.");
+
+	_track(node, p_data);
+	parent->add_child(node, true);
+	return node;
+}
diff --git a/scene/multiplayer/multiplayer_spawner.h b/scene/multiplayer/multiplayer_spawner.h
new file mode 100644
index 0000000000..63948e39a5
--- /dev/null
+++ b/scene/multiplayer/multiplayer_spawner.h
@@ -0,0 +1,101 @@
+/*************************************************************************/
+/*  multiplayer_spawner.h                                                */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef MULTIPLAYER_SPAWNER_H
+#define MULTIPLAYER_SPAWNER_H
+
+#include "scene/main/node.h"
+
+#include "core/variant/typed_array.h"
+#include "scene/resources/packed_scene.h"
+#include "scene/resources/scene_replication_config.h"
+
+class MultiplayerSpawner : public Node {
+	GDCLASS(MultiplayerSpawner, Node);
+
+public:
+	enum {
+		INVALID_ID = 0xFF,
+	};
+
+private:
+	TypedArray<PackedScene> spawnable_scenes;
+	Set<ResourceUID::ID> spawnable_ids;
+	NodePath spawn_path;
+
+	struct SpawnInfo {
+		Variant args;
+		int id = INVALID_ID;
+		SpawnInfo(Variant p_args, int p_id) {
+			id = p_id;
+			args = p_args;
+		}
+		SpawnInfo() {}
+	};
+
+	ObjectID spawn_node;
+	HashMap<ObjectID, SpawnInfo> tracked_nodes;
+	bool auto_spawn = false;
+	uint32_t spawn_limit = 0;
+
+	void _update_spawn_node();
+	void _track(Node *p_node, const Variant &p_argument, int p_scene_id = INVALID_ID);
+	void _node_added(Node *p_node);
+	void _node_exit(ObjectID p_id);
+	void _node_ready(ObjectID p_id);
+
+protected:
+	static void _bind_methods();
+	void _notification(int p_what);
+
+public:
+	Node *get_spawn_node() const { return spawn_node.is_valid() ? Object::cast_to<Node>(ObjectDB::get_instance(spawn_node)) : nullptr; }
+	TypedArray<PackedScene> get_spawnable_scenes();
+	void set_spawnable_scenes(TypedArray<PackedScene> p_scenes);
+	NodePath get_spawn_path() const;
+	void set_spawn_path(const NodePath &p_path);
+	uint32_t get_spawn_limit() const { return spawn_limit; }
+	void set_spawn_limit(uint32_t p_limit) { spawn_limit = p_limit; }
+	bool is_auto_spawning() const;
+	void set_auto_spawning(bool p_enabled);
+
+	const Variant get_spawn_argument(const ObjectID &p_id) const;
+	int get_spawn_id(const ObjectID &p_id) const;
+	int get_scene_id(const String &p_path) const;
+	Node *spawn(const Variant &p_data = Variant());
+	Node *instantiate_custom(const Variant &p_data);
+	Node *instantiate_scene(int p_idx);
+
+	GDVIRTUAL1R(Object *, _spawn_custom, const Variant &);
+
+	MultiplayerSpawner() {}
+};
+
+#endif // MULTIPLAYER_SPAWNER_H
diff --git a/scene/multiplayer/multiplayer_synchronizer.cpp b/scene/multiplayer/multiplayer_synchronizer.cpp
new file mode 100644
index 0000000000..fbe1b99cc9
--- /dev/null
+++ b/scene/multiplayer/multiplayer_synchronizer.cpp
@@ -0,0 +1,158 @@
+/*************************************************************************/
+/*  multiplayer_synchronizer.cpp                                         */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "multiplayer_synchronizer.h"
+
+#include "core/config/engine.h"
+#include "core/multiplayer/multiplayer_api.h"
+
+Object *MultiplayerSynchronizer::_get_prop_target(Object *p_obj, const NodePath &p_path) {
+	if (p_path.get_name_count() == 0) {
+		return p_obj;
+	}
+	Node *node = Object::cast_to<Node>(p_obj);
+	ERR_FAIL_COND_V_MSG(!node || !node->has_node(p_path), nullptr, vformat("Node '%s' not found.", p_path));
+	return node->get_node(p_path);
+}
+
+void MultiplayerSynchronizer::_stop() {
+	Node *node = is_inside_tree() ? get_node_or_null(root_path) : nullptr;
+	if (node) {
+		get_multiplayer()->replication_stop(node, this);
+	}
+}
+
+void MultiplayerSynchronizer::_start() {
+	Node *node = is_inside_tree() ? get_node_or_null(root_path) : nullptr;
+	if (node) {
+		get_multiplayer()->replication_start(node, this);
+	}
+}
+
+Error MultiplayerSynchronizer::get_state(const List<NodePath> &p_properties, Object *p_obj, Vector<Variant> &r_variant, Vector<const Variant *> &r_variant_ptrs) {
+	ERR_FAIL_COND_V(!p_obj, ERR_INVALID_PARAMETER);
+	r_variant.resize(p_properties.size());
+	r_variant_ptrs.resize(r_variant.size());
+	int i = 0;
+	for (const NodePath &prop : p_properties) {
+		bool valid = false;
+		const Object *obj = _get_prop_target(p_obj, prop);
+		ERR_FAIL_COND_V(!obj, FAILED);
+		r_variant.write[i] = obj->get(prop.get_concatenated_subnames(), &valid);
+		r_variant_ptrs.write[i] = &r_variant[i];
+		ERR_FAIL_COND_V_MSG(!valid, ERR_INVALID_DATA, vformat("Property '%s' not found.", prop));
+		i++;
+	}
+	return OK;
+}
+
+Error MultiplayerSynchronizer::set_state(const List<NodePath> &p_properties, Object *p_obj, const Vector<Variant> &p_state) {
+	ERR_FAIL_COND_V(!p_obj, ERR_INVALID_PARAMETER);
+	int i = 0;
+	for (const NodePath &prop : p_properties) {
+		Object *obj = _get_prop_target(p_obj, prop);
+		ERR_FAIL_COND_V(!obj, FAILED);
+		obj->set(prop.get_concatenated_subnames(), p_state[i]);
+		i += 1;
+	}
+	return OK;
+}
+
+void MultiplayerSynchronizer::_bind_methods() {
+	ClassDB::bind_method(D_METHOD("set_root_path", "path"), &MultiplayerSynchronizer::set_root_path);
+	ClassDB::bind_method(D_METHOD("get_root_path"), &MultiplayerSynchronizer::get_root_path);
+	ADD_PROPERTY(PropertyInfo(Variant::NODE_PATH, "root_path"), "set_root_path", "get_root_path");
+
+	ClassDB::bind_method(D_METHOD("set_replication_interval", "milliseconds"), &MultiplayerSynchronizer::set_replication_interval);
+	ClassDB::bind_method(D_METHOD("get_replication_interval"), &MultiplayerSynchronizer::get_replication_interval);
+	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "replication_interval", PROPERTY_HINT_RANGE, "0,5,0.001"), "set_replication_interval", "get_replication_interval");
+
+	ClassDB::bind_method(D_METHOD("set_replication_config", "config"), &MultiplayerSynchronizer::set_replication_config);
+	ClassDB::bind_method(D_METHOD("get_replication_config"), &MultiplayerSynchronizer::get_replication_config);
+	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "resource", PROPERTY_HINT_RESOURCE_TYPE, "SceneReplicationConfig"), "set_replication_config", "get_replication_config");
+}
+
+void MultiplayerSynchronizer::_notification(int p_what) {
+#ifdef TOOLS_ENABLED
+	if (Engine::get_singleton()->is_editor_hint()) {
+		return;
+	}
+#endif
+	if (root_path.is_empty()) {
+		return;
+	}
+	if (p_what == NOTIFICATION_ENTER_TREE) {
+		_start();
+	} else if (p_what == NOTIFICATION_EXIT_TREE) {
+		_stop();
+	}
+}
+
+void MultiplayerSynchronizer::set_replication_interval(double p_interval) {
+	ERR_FAIL_COND_MSG(p_interval < 0, "Interval must be greater or equal to 0 (where 0 means default)");
+	interval_msec = uint64_t(p_interval * 1000);
+}
+
+double MultiplayerSynchronizer::get_replication_interval() const {
+	return double(interval_msec) / 1000.0;
+}
+
+uint64_t MultiplayerSynchronizer::get_replication_interval_msec() const {
+	return interval_msec;
+}
+
+void MultiplayerSynchronizer::set_replication_config(Ref<SceneReplicationConfig> p_config) {
+	replication_config = p_config;
+}
+
+Ref<SceneReplicationConfig> MultiplayerSynchronizer::get_replication_config() {
+	return replication_config;
+}
+
+void MultiplayerSynchronizer::set_root_path(const NodePath &p_path) {
+	_stop();
+	root_path = p_path;
+	_start();
+}
+
+NodePath MultiplayerSynchronizer::get_root_path() const {
+	return root_path;
+}
+
+void MultiplayerSynchronizer::set_multiplayer_authority(int p_peer_id, bool p_recursive) {
+	Node *node = is_inside_tree() ? get_node_or_null(root_path) : nullptr;
+	if (!node) {
+		Node::set_multiplayer_authority(p_peer_id, p_recursive);
+		return;
+	}
+	get_multiplayer()->replication_stop(node, this);
+	Node::set_multiplayer_authority(p_peer_id, p_recursive);
+	get_multiplayer()->replication_start(node, this);
+}
diff --git a/scene/multiplayer/multiplayer_synchronizer.h b/scene/multiplayer/multiplayer_synchronizer.h
new file mode 100644
index 0000000000..e856745379
--- /dev/null
+++ b/scene/multiplayer/multiplayer_synchronizer.h
@@ -0,0 +1,72 @@
+/*************************************************************************/
+/*  multiplayer_synchronizer.h                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef MULTIPLAYER_SYNCHRONIZER_H
+#define MULTIPLAYER_SYNCHRONIZER_H
+
+#include "scene/main/node.h"
+
+#include "scene/resources/scene_replication_config.h"
+
+class MultiplayerSynchronizer : public Node {
+	GDCLASS(MultiplayerSynchronizer, Node);
+
+private:
+	Ref<SceneReplicationConfig> replication_config;
+	NodePath root_path;
+	uint64_t interval_msec = 0;
+
+	static Object *_get_prop_target(Object *p_obj, const NodePath &p_prop);
+	void _start();
+	void _stop();
+
+protected:
+	static void _bind_methods();
+	void _notification(int p_what);
+
+public:
+	static Error get_state(const List<NodePath> &p_properties, Object *p_obj, Vector<Variant> &r_variant, Vector<const Variant *> &r_variant_ptrs);
+	static Error set_state(const List<NodePath> &p_properties, Object *p_obj, const Vector<Variant> &p_state);
+
+	void set_replication_interval(double p_interval);
+	double get_replication_interval() const;
+	uint64_t get_replication_interval_msec() const;
+
+	void set_replication_config(Ref<SceneReplicationConfig> p_config);
+	Ref<SceneReplicationConfig> get_replication_config();
+
+	void set_root_path(const NodePath &p_path);
+	NodePath get_root_path() const;
+	virtual void set_multiplayer_authority(int p_peer_id, bool p_recursive = true) override;
+
+	MultiplayerSynchronizer() {}
+};
+
+#endif // MULTIPLAYER_SYNCHRONIZER_H
diff --git a/scene/multiplayer/scene_replication_interface.cpp b/scene/multiplayer/scene_replication_interface.cpp
new file mode 100644
index 0000000000..7155935084
--- /dev/null
+++ b/scene/multiplayer/scene_replication_interface.cpp
@@ -0,0 +1,415 @@
+/*************************************************************************/
+/*  scene_replication_interface.cpp                                      */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "scene_replication_interface.h"
+
+#include "core/io/marshalls.h"
+#include "scene/main/node.h"
+#include "scene/multiplayer/multiplayer_spawner.h"
+#include "scene/multiplayer/multiplayer_synchronizer.h"
+
+#define MAKE_ROOM(m_amount)             \
+	if (packet_cache.size() < m_amount) \
+		packet_cache.resize(m_amount);
+
+MultiplayerReplicationInterface *SceneReplicationInterface::_create(MultiplayerAPI *p_multiplayer) {
+	return memnew(SceneReplicationInterface(p_multiplayer));
+}
+
+void SceneReplicationInterface::make_default() {
+	MultiplayerAPI::create_default_replication_interface = _create;
+}
+
+void SceneReplicationInterface::_free_remotes(int p_id) {
+	const HashMap<uint32_t, ObjectID> remotes = rep_state->peer_get_remotes(p_id);
+	const uint32_t *k = nullptr;
+	while ((k = remotes.next(k))) {
+		Node *node = rep_state->get_node(remotes.get(*k));
+		ERR_CONTINUE(!node);
+		node->queue_delete();
+	}
+}
+
+void SceneReplicationInterface::on_peer_change(int p_id, bool p_connected) {
+	if (p_connected) {
+		rep_state->on_peer_change(p_id, p_connected);
+		for (const ObjectID &oid : rep_state->get_spawned_nodes()) {
+			_send_spawn(rep_state->get_node(oid), rep_state->get_spawner(oid), p_id);
+		}
+		for (const ObjectID &oid : rep_state->get_path_only_nodes()) {
+			Node *node = rep_state->get_node(oid);
+			MultiplayerSynchronizer *sync = rep_state->get_synchronizer(oid);
+			ERR_CONTINUE(!node || !sync);
+			if (sync->is_multiplayer_authority()) {
+				rep_state->peer_add_node(p_id, oid);
+			}
+		}
+	} else {
+		_free_remotes(p_id);
+		rep_state->on_peer_change(p_id, p_connected);
+	}
+}
+
+void SceneReplicationInterface::on_reset() {
+	for (int pid : rep_state->get_peers()) {
+		_free_remotes(pid);
+	}
+	rep_state->reset();
+}
+
+void SceneReplicationInterface::on_network_process() {
+	uint64_t msec = OS::get_singleton()->get_ticks_msec();
+	for (int peer : rep_state->get_peers()) {
+		_send_sync(peer, msec);
+	}
+}
+
+Error SceneReplicationInterface::on_spawn(Object *p_obj, Variant p_config) {
+	Node *node = Object::cast_to<Node>(p_obj);
+	ERR_FAIL_COND_V(!node || p_config.get_type() != Variant::OBJECT, ERR_INVALID_PARAMETER);
+	MultiplayerSpawner *spawner = Object::cast_to<MultiplayerSpawner>(p_config.get_validated_object());
+	ERR_FAIL_COND_V(!spawner, ERR_INVALID_PARAMETER);
+	Error err = rep_state->config_add_spawn(node, spawner);
+	ERR_FAIL_COND_V(err != OK, err);
+	return _send_spawn(node, spawner, 0);
+}
+
+Error SceneReplicationInterface::on_despawn(Object *p_obj, Variant p_config) {
+	Node *node = Object::cast_to<Node>(p_obj);
+	ERR_FAIL_COND_V(!node || p_config.get_type() != Variant::OBJECT, ERR_INVALID_PARAMETER);
+	MultiplayerSpawner *spawner = Object::cast_to<MultiplayerSpawner>(p_config.get_validated_object());
+	ERR_FAIL_COND_V(!p_obj || !spawner, ERR_INVALID_PARAMETER);
+	Error err = rep_state->config_del_spawn(node, spawner);
+	ERR_FAIL_COND_V(err != OK, err);
+	return _send_despawn(node, 0);
+}
+
+Error SceneReplicationInterface::on_replication_start(Object *p_obj, Variant p_config) {
+	Node *node = Object::cast_to<Node>(p_obj);
+	ERR_FAIL_COND_V(!node || p_config.get_type() != Variant::OBJECT, ERR_INVALID_PARAMETER);
+	MultiplayerSynchronizer *sync = Object::cast_to<MultiplayerSynchronizer>(p_config.get_validated_object());
+	ERR_FAIL_COND_V(!sync, ERR_INVALID_PARAMETER);
+	rep_state->config_add_sync(node, sync);
+	// Try to apply initial state if spawning (hack to apply if before ready).
+	if (pending_spawn == p_obj->get_instance_id()) {
+		pending_spawn = ObjectID(); // Make sure this only happens once.
+		const List<NodePath> props = sync->get_replication_config()->get_spawn_properties();
+		Vector<Variant> vars;
+		vars.resize(props.size());
+		int consumed;
+		Error err = MultiplayerAPI::decode_and_decompress_variants(vars, pending_buffer, pending_buffer_size, consumed);
+		ERR_FAIL_COND_V(err, err);
+		err = MultiplayerSynchronizer::set_state(props, node, vars);
+		ERR_FAIL_COND_V(err, err);
+	} else if (multiplayer->has_multiplayer_peer() && sync->is_multiplayer_authority()) {
+		// Either it's a spawn or a static sync, in any case add it to the list of known nodes.
+		rep_state->peer_add_node(0, p_obj->get_instance_id());
+	}
+	return OK;
+}
+
+Error SceneReplicationInterface::on_replication_stop(Object *p_obj, Variant p_config) {
+	Node *node = Object::cast_to<Node>(p_obj);
+	ERR_FAIL_COND_V(!node || p_config.get_type() != Variant::OBJECT, ERR_INVALID_PARAMETER);
+	MultiplayerSynchronizer *sync = Object::cast_to<MultiplayerSynchronizer>(p_config.get_validated_object());
+	ERR_FAIL_COND_V(!p_obj || !sync, ERR_INVALID_PARAMETER);
+	return rep_state->config_del_sync(node, sync);
+}
+
+Error SceneReplicationInterface::_send_raw(const uint8_t *p_buffer, int p_size, int p_peer, bool p_reliable) {
+	ERR_FAIL_COND_V(!p_buffer || p_size < 1, ERR_INVALID_PARAMETER);
+	ERR_FAIL_COND_V(!multiplayer, ERR_UNCONFIGURED);
+	ERR_FAIL_COND_V(!multiplayer->has_multiplayer_peer(), ERR_UNCONFIGURED);
+	Ref<MultiplayerPeer> peer = multiplayer->get_multiplayer_peer();
+	peer->set_target_peer(p_peer);
+	peer->set_transfer_channel(0);
+	peer->set_transfer_mode(p_reliable ? Multiplayer::TRANSFER_MODE_RELIABLE : Multiplayer::TRANSFER_MODE_UNRELIABLE);
+	return peer->put_packet(p_buffer, p_size);
+}
+
+Error SceneReplicationInterface::_send_spawn(Node *p_node, MultiplayerSpawner *p_spawner, int p_peer) {
+	ERR_FAIL_COND_V(p_peer < 0, ERR_BUG);
+	ERR_FAIL_COND_V(!multiplayer, ERR_BUG);
+	ERR_FAIL_COND_V(!p_spawner || !p_node, ERR_BUG);
+
+	const ObjectID oid = p_node->get_instance_id();
+	uint32_t nid = rep_state->ensure_net_id(oid);
+
+	// Prepare custom arg and scene_id
+	uint8_t scene_id = p_spawner->get_spawn_id(oid);
+	bool is_custom = scene_id == MultiplayerSpawner::INVALID_ID;
+	Variant spawn_arg = p_spawner->get_spawn_argument(oid);
+	int spawn_arg_size = 0;
+	if (is_custom) {
+		Error err = MultiplayerAPI::encode_and_compress_variant(spawn_arg, nullptr, spawn_arg_size, false);
+		ERR_FAIL_COND_V(err, err);
+	}
+
+	// Prepare spawn state.
+	int state_size = 0;
+	Vector<Variant> state_vars;
+	Vector<const Variant *> state_varp;
+	MultiplayerSynchronizer *synchronizer = rep_state->get_synchronizer(oid);
+	if (synchronizer && synchronizer->get_replication_config().is_valid()) {
+		const List<NodePath> props = synchronizer->get_replication_config()->get_spawn_properties();
+		Error err = MultiplayerSynchronizer::get_state(props, p_node, state_vars, state_varp);
+		ERR_FAIL_COND_V_MSG(err != OK, err, "Unable to retrieve spawn state.");
+		err = MultiplayerAPI::encode_and_compress_variants(state_varp.ptrw(), state_varp.size(), nullptr, state_size);
+		ERR_FAIL_COND_V_MSG(err != OK, err, "Unable to encode spawn state.");
+	}
+
+	// Prepare simplified path.
+	const Node *root_node = multiplayer->get_root_node();
+	ERR_FAIL_COND_V(!root_node, ERR_UNCONFIGURED);
+	NodePath rel_path = (root_node->get_path()).rel_path_to(p_spawner->get_path());
+
+	int path_id = 0;
+	multiplayer->send_confirm_path(p_spawner, rel_path, p_peer, path_id);
+
+	// Encode name and parent ID.
+	CharString cname = p_node->get_name().operator String().utf8();
+	int nlen = encode_cstring(cname.get_data(), nullptr);
+	MAKE_ROOM(1 + 1 + 4 + 4 + 4 + nlen + (is_custom ? 4 + spawn_arg_size : 0) + state_size);
+	uint8_t *ptr = packet_cache.ptrw();
+	ptr[0] = (uint8_t)MultiplayerAPI::NETWORK_COMMAND_SPAWN;
+	ptr[1] = scene_id;
+	int ofs = 2;
+	ofs += encode_uint32(path_id, &ptr[ofs]);
+	ofs += encode_uint32(nid, &ptr[ofs]);
+	ofs += encode_uint32(nlen, &ptr[ofs]);
+	ofs += encode_cstring(cname.get_data(), &ptr[ofs]);
+	// Write args
+	if (is_custom) {
+		ofs += encode_uint32(spawn_arg_size, &ptr[ofs]);
+		Error err = MultiplayerAPI::encode_and_compress_variant(spawn_arg, &ptr[ofs], spawn_arg_size, false);
+		ERR_FAIL_COND_V(err, err);
+		ofs += spawn_arg_size;
+	}
+	// Write state.
+	if (state_size) {
+		Error err = MultiplayerAPI::encode_and_compress_variants(state_varp.ptrw(), state_varp.size(), &ptr[ofs], state_size);
+		ERR_FAIL_COND_V(err, err);
+		ofs += state_size;
+	}
+	Error err = _send_raw(ptr, ofs, p_peer, true);
+	ERR_FAIL_COND_V(err, err);
+	return rep_state->peer_add_node(p_peer, oid);
+}
+
+Error SceneReplicationInterface::_send_despawn(Node *p_node, int p_peer) {
+	const ObjectID oid = p_node->get_instance_id();
+	MAKE_ROOM(5);
+	uint8_t *ptr = packet_cache.ptrw();
+	ptr[0] = (uint8_t)MultiplayerAPI::NETWORK_COMMAND_DESPAWN;
+	int ofs = 1;
+	uint32_t nid = rep_state->get_net_id(oid);
+	ofs += encode_uint32(nid, &ptr[ofs]);
+	Error err = _send_raw(ptr, ofs, p_peer, true);
+	ERR_FAIL_COND_V(err, err);
+	return rep_state->peer_del_node(p_peer, oid);
+}
+
+Error SceneReplicationInterface::on_spawn_receive(int p_from, const uint8_t *p_buffer, int p_buffer_len) {
+	ERR_FAIL_COND_V_MSG(p_buffer_len < 14, ERR_INVALID_DATA, "Invalid spawn packet received");
+	int ofs = 1; // The spawn/despawn command.
+	uint8_t scene_id = p_buffer[ofs];
+	ofs += 1;
+	uint32_t node_target = decode_uint32(&p_buffer[ofs]);
+	ofs += 4;
+	MultiplayerSpawner *spawner = Object::cast_to<MultiplayerSpawner>(multiplayer->get_cached_node(p_from, node_target));
+	ERR_FAIL_COND_V(!spawner, ERR_DOES_NOT_EXIST);
+	ERR_FAIL_COND_V(p_from != spawner->get_multiplayer_authority(), ERR_UNAUTHORIZED);
+
+	uint32_t net_id = decode_uint32(&p_buffer[ofs]);
+	ofs += 4;
+	uint32_t name_len = decode_uint32(&p_buffer[ofs]);
+	ofs += 4;
+	ERR_FAIL_COND_V_MSG(name_len > uint32_t(p_buffer_len - ofs), ERR_INVALID_DATA, vformat("Invalid spawn packet size: %d, wants: %d", p_buffer_len, ofs + name_len));
+	ERR_FAIL_COND_V_MSG(name_len < 1, ERR_INVALID_DATA, "Zero spawn name size.");
+
+	// We need to make sure no trickery happens here, but we want to allow autogenerated ("@") node names.
+	const String name = String::utf8((const char *)&p_buffer[ofs], name_len);
+	ERR_FAIL_COND_V_MSG(name.validate_node_name() != name, ERR_INVALID_DATA, vformat("Invalid node name received: '%s'. Make sure to add nodes via 'add_child(node, true)' remotely.", name));
+	ofs += name_len;
+
+	// Check that we can spawn.
+	Node *parent = spawner->get_node_or_null(spawner->get_spawn_path());
+	ERR_FAIL_COND_V(!parent, ERR_UNCONFIGURED);
+	ERR_FAIL_COND_V(parent->has_node(name), ERR_INVALID_DATA);
+
+	Node *node = nullptr;
+	if (scene_id == MultiplayerSpawner::INVALID_ID) {
+		// Custom spawn.
+		ERR_FAIL_COND_V(p_buffer_len - ofs < 4, ERR_INVALID_DATA);
+		uint32_t arg_size = decode_uint32(&p_buffer[ofs]);
+		ofs += 4;
+		ERR_FAIL_COND_V(arg_size > uint32_t(p_buffer_len - ofs), ERR_INVALID_DATA);
+		Variant v;
+		Error err = MultiplayerAPI::decode_and_decompress_variant(v, &p_buffer[ofs], arg_size, nullptr, false);
+		ERR_FAIL_COND_V(err != OK, err);
+		ofs += arg_size;
+		node = spawner->instantiate_custom(v);
+	} else {
+		// Scene based spawn.
+		node = spawner->instantiate_scene(scene_id);
+	}
+	ERR_FAIL_COND_V(!node, ERR_UNAUTHORIZED);
+	node->set_name(name);
+	rep_state->peer_add_remote(p_from, net_id, node, spawner);
+	// The initial state will be applied during the sync config (i.e. before _ready).
+	int state_len = p_buffer_len - ofs;
+	if (state_len) {
+		pending_spawn = node->get_instance_id();
+		pending_buffer = &p_buffer[ofs];
+		pending_buffer_size = state_len;
+	}
+	parent->add_child(node);
+	pending_spawn = ObjectID();
+	pending_buffer = nullptr;
+	pending_buffer_size = 0;
+	return OK;
+}
+
+Error SceneReplicationInterface::on_despawn_receive(int p_from, const uint8_t *p_buffer, int p_buffer_len) {
+	ERR_FAIL_COND_V_MSG(p_buffer_len < 5, ERR_INVALID_DATA, "Invalid spawn packet received");
+	int ofs = 1; // The spawn/despawn command.
+	uint32_t net_id = decode_uint32(&p_buffer[ofs]);
+	ofs += 4;
+	Node *node = nullptr;
+	Error err = rep_state->peer_del_remote(p_from, net_id, &node);
+	ERR_FAIL_COND_V(err != OK, err);
+	ERR_FAIL_COND_V(!node, ERR_BUG);
+	node->queue_delete();
+	return OK;
+}
+
+void SceneReplicationInterface::_send_sync(int p_peer, uint64_t p_msec) {
+	const Set<ObjectID> &known = rep_state->get_known_nodes(p_peer);
+	if (known.is_empty()) {
+		return;
+	}
+	MAKE_ROOM(sync_mtu);
+	uint8_t *ptr = packet_cache.ptrw();
+	ptr[0] = MultiplayerAPI::NETWORK_COMMAND_SYNC;
+	int ofs = 1;
+	ofs += encode_uint16(rep_state->peer_sync_next(p_peer), &ptr[1]);
+	// Can only send updates for already notified nodes.
+	// This is a lazy implementation, we could optimize much more here with by grouping by replication config.
+	for (const ObjectID &oid : known) {
+		if (!rep_state->update_sync_time(oid, p_msec)) {
+			continue; // nothing to sync.
+		}
+		MultiplayerSynchronizer *sync = rep_state->get_synchronizer(oid);
+		ERR_CONTINUE(!sync);
+		Node *node = rep_state->get_node(oid);
+		ERR_CONTINUE(!node);
+		int size;
+		Vector<Variant> vars;
+		Vector<const Variant *> varp;
+		const List<NodePath> props = sync->get_replication_config()->get_sync_properties();
+		Error err = MultiplayerSynchronizer::get_state(props, node, vars, varp);
+		ERR_CONTINUE_MSG(err != OK, "Unable to retrieve sync state.");
+		err = MultiplayerAPI::encode_and_compress_variants(varp.ptrw(), varp.size(), nullptr, size);
+		ERR_CONTINUE_MSG(err != OK, "Unable to encode sync state.");
+		// TODO Handle single state above MTU.
+		ERR_CONTINUE_MSG(size > 3 + 4 + 4 + sync_mtu, vformat("Node states bigger then MTU will not be sent (%d > %d): %s", size, sync_mtu, node->get_path()));
+		if (ofs + 4 + 4 + size > sync_mtu) {
+			// Send what we got, and reset write.
+			_send_raw(packet_cache.ptr(), ofs, p_peer, false);
+			ofs = 3;
+		}
+		if (size) {
+			uint32_t net_id = rep_state->get_net_id(oid);
+			if (net_id == 0) {
+				// First time path based ID.
+				const Node *root_node = multiplayer->get_root_node();
+				ERR_FAIL_COND(!root_node);
+				NodePath rel_path = (root_node->get_path()).rel_path_to(sync->get_path());
+				int path_id = 0;
+				multiplayer->send_confirm_path(sync, rel_path, p_peer, path_id);
+				net_id = path_id;
+				rep_state->set_net_id(oid, net_id | 0x80000000);
+			}
+			ofs += encode_uint32(rep_state->get_net_id(oid), &ptr[ofs]);
+			ofs += encode_uint32(size, &ptr[ofs]);
+			MultiplayerAPI::encode_and_compress_variants(varp.ptrw(), varp.size(), &ptr[ofs], size);
+			ofs += size;
+		}
+	}
+	if (ofs > 3) {
+		// Got some left over to send.
+		_send_raw(packet_cache.ptr(), ofs, p_peer, false);
+	}
+}
+
+Error SceneReplicationInterface::on_sync_receive(int p_from, const uint8_t *p_buffer, int p_buffer_len) {
+	ERR_FAIL_COND_V_MSG(p_buffer_len < 11, ERR_INVALID_DATA, "Invalid sync packet received");
+	uint16_t time = decode_uint16(&p_buffer[1]);
+	int ofs = 3;
+	rep_state->peer_sync_recv(p_from, time);
+	while (ofs + 8 < p_buffer_len) {
+		uint32_t net_id = decode_uint32(&p_buffer[ofs]);
+		ofs += 4;
+		uint32_t size = decode_uint32(&p_buffer[ofs]);
+		ofs += 4;
+		Node *node = nullptr;
+		if (net_id & 0x80000000) {
+			MultiplayerSynchronizer *sync = Object::cast_to<MultiplayerSynchronizer>(multiplayer->get_cached_node(p_from, net_id & 0x7FFFFFFF));
+			ERR_FAIL_COND_V(!sync || sync->get_multiplayer_authority() != p_from, ERR_UNAUTHORIZED);
+			node = sync->get_node(sync->get_root_path());
+		} else {
+			node = rep_state->peer_get_remote(p_from, net_id);
+		}
+		if (!node) {
+			// Not received yet.
+			ofs += size;
+			continue;
+		}
+		const ObjectID oid = node->get_instance_id();
+		if (!rep_state->update_last_node_sync(oid, time)) {
+			// State is too old.
+			ofs += size;
+			continue;
+		}
+		MultiplayerSynchronizer *sync = rep_state->get_synchronizer(oid);
+		ERR_FAIL_COND_V(!sync, ERR_BUG);
+		ERR_FAIL_COND_V(size > uint32_t(p_buffer_len - ofs), ERR_BUG);
+		const List<NodePath> props = sync->get_replication_config()->get_sync_properties();
+		Vector<Variant> vars;
+		vars.resize(props.size());
+		int consumed;
+		Error err = MultiplayerAPI::decode_and_decompress_variants(vars, &p_buffer[ofs], size, consumed);
+		ERR_FAIL_COND_V(err, err);
+		err = MultiplayerSynchronizer::set_state(props, node, vars);
+		ERR_FAIL_COND_V(err, err);
+		ofs += size;
+	}
+	return OK;
+}
diff --git a/scene/multiplayer/scene_replication_interface.h b/scene/multiplayer/scene_replication_interface.h
new file mode 100644
index 0000000000..855878d029
--- /dev/null
+++ b/scene/multiplayer/scene_replication_interface.h
@@ -0,0 +1,84 @@
+/*************************************************************************/
+/*  scene_replication_interface.h                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef SCENE_TREE_REPLICATOR_INTERFACE_H
+#define SCENE_TREE_REPLICATOR_INTERFACE_H
+
+#include "core/multiplayer/multiplayer_api.h"
+
+#include "scene/multiplayer/scene_replication_state.h"
+
+class SceneReplicationInterface : public MultiplayerReplicationInterface {
+	GDCLASS(SceneReplicationInterface, MultiplayerReplicationInterface);
+
+private:
+	void _send_sync(int p_peer, uint64_t p_msec);
+	Error _send_spawn(Node *p_node, MultiplayerSpawner *p_spawner, int p_peer);
+	Error _send_despawn(Node *p_node, int p_peer);
+	Error _send_raw(const uint8_t *p_buffer, int p_size, int p_peer, bool p_reliable);
+
+	void _free_remotes(int p_peer);
+
+	Ref<SceneReplicationState> rep_state;
+	MultiplayerAPI *multiplayer;
+	PackedByteArray packet_cache;
+	int sync_mtu = 1350; // Highly dependent on underlying protocol.
+
+	// An hack to apply the initial state before ready.
+	ObjectID pending_spawn;
+	const uint8_t *pending_buffer = nullptr;
+	int pending_buffer_size = 0;
+
+protected:
+	static MultiplayerReplicationInterface *_create(MultiplayerAPI *p_multiplayer);
+
+public:
+	static void make_default();
+
+	virtual void on_reset() override;
+	virtual void on_peer_change(int p_id, bool p_connected) override;
+
+	virtual Error on_spawn(Object *p_obj, Variant p_config) override;
+	virtual Error on_despawn(Object *p_obj, Variant p_config) override;
+	virtual Error on_replication_start(Object *p_obj, Variant p_config) override;
+	virtual Error on_replication_stop(Object *p_obj, Variant p_config) override;
+	virtual void on_network_process() override;
+
+	virtual Error on_spawn_receive(int p_from, const uint8_t *p_buffer, int p_buffer_len) override;
+	virtual Error on_despawn_receive(int p_from, const uint8_t *p_buffer, int p_buffer_len) override;
+	virtual Error on_sync_receive(int p_from, const uint8_t *p_buffer, int p_buffer_len) override;
+
+	SceneReplicationInterface(MultiplayerAPI *p_multiplayer) {
+		rep_state.instantiate();
+		multiplayer = p_multiplayer;
+	}
+};
+
+#endif // SCENE_TREE_REPLICATOR_INTERFACE_H
diff --git a/scene/multiplayer/scene_replication_state.cpp b/scene/multiplayer/scene_replication_state.cpp
new file mode 100644
index 0000000000..b8dadeff24
--- /dev/null
+++ b/scene/multiplayer/scene_replication_state.cpp
@@ -0,0 +1,258 @@
+/*************************************************************************/
+/*  scene_replication_state.cpp                                          */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "scene/multiplayer/scene_replication_state.h"
+
+#include "core/multiplayer/multiplayer_api.h"
+#include "scene/multiplayer/multiplayer_spawner.h"
+#include "scene/multiplayer/multiplayer_synchronizer.h"
+#include "scene/scene_string_names.h"
+
+SceneReplicationState::TrackedNode &SceneReplicationState::_track(const ObjectID &p_id) {
+	if (!tracked_nodes.has(p_id)) {
+		tracked_nodes[p_id] = TrackedNode(p_id);
+		Node *node = Object::cast_to<Node>(ObjectDB::get_instance(p_id));
+		node->connect(SceneStringNames::get_singleton()->tree_exited, callable_mp(this, &SceneReplicationState::_untrack), varray(p_id), Node::CONNECT_ONESHOT);
+	}
+	return tracked_nodes[p_id];
+}
+
+void SceneReplicationState::_untrack(const ObjectID &p_id) {
+	if (tracked_nodes.has(p_id)) {
+		uint32_t net_id = tracked_nodes[p_id].net_id;
+		uint32_t peer = tracked_nodes[p_id].remote_peer;
+		tracked_nodes.erase(p_id);
+		// If it was spawned by a remote, remove it from the received nodes.
+		if (peer && peers_info.has(peer)) {
+			peers_info[peer].recv_nodes.erase(net_id);
+		}
+		// If we spawned or synced it, we need to remove it from any peer it was sent to.
+		if (net_id || peer == 0) {
+			const int *k = nullptr;
+			while ((k = peers_info.next(k))) {
+				peers_info.get(*k).known_nodes.erase(p_id);
+			}
+		}
+	}
+}
+
+const HashMap<uint32_t, ObjectID> SceneReplicationState::peer_get_remotes(int p_peer) const {
+	return peers_info.has(p_peer) ? peers_info[p_peer].recv_nodes : HashMap<uint32_t, ObjectID>();
+}
+
+bool SceneReplicationState::update_last_node_sync(const ObjectID &p_id, uint16_t p_time) {
+	TrackedNode *tnode = tracked_nodes.getptr(p_id);
+	ERR_FAIL_COND_V(!tnode, false);
+	if (p_time <= tnode->last_sync && tnode->last_sync - p_time < 32767) {
+		return false;
+	}
+	tnode->last_sync = p_time;
+	return true;
+}
+
+bool SceneReplicationState::update_sync_time(const ObjectID &p_id, uint64_t p_msec) {
+	TrackedNode *tnode = tracked_nodes.getptr(p_id);
+	ERR_FAIL_COND_V(!tnode, false);
+	MultiplayerSynchronizer *sync = get_synchronizer(p_id);
+	if (!sync) {
+		return false;
+	}
+	if (tnode->last_sync_msec == p_msec) {
+		return true;
+	}
+	if (p_msec >= tnode->last_sync_msec + sync->get_replication_interval_msec()) {
+		tnode->last_sync_msec = p_msec;
+		return true;
+	}
+	return false;
+}
+
+const Set<ObjectID> SceneReplicationState::get_known_nodes(int p_peer) {
+	ERR_FAIL_COND_V(!peers_info.has(p_peer), Set<ObjectID>());
+	return peers_info[p_peer].known_nodes;
+}
+
+uint32_t SceneReplicationState::get_net_id(const ObjectID &p_id) const {
+	const TrackedNode *tnode = tracked_nodes.getptr(p_id);
+	ERR_FAIL_COND_V(!tnode, 0);
+	return tnode->net_id;
+}
+
+void SceneReplicationState::set_net_id(const ObjectID &p_id, uint32_t p_net_id) {
+	TrackedNode *tnode = tracked_nodes.getptr(p_id);
+	ERR_FAIL_COND(!tnode);
+	tnode->net_id = p_net_id;
+}
+
+uint32_t SceneReplicationState::ensure_net_id(const ObjectID &p_id) {
+	TrackedNode *tnode = tracked_nodes.getptr(p_id);
+	ERR_FAIL_COND_V(!tnode, 0);
+	if (tnode->net_id == 0) {
+		tnode->net_id = ++last_net_id;
+	}
+	return tnode->net_id;
+}
+
+void SceneReplicationState::on_peer_change(int p_peer, bool p_connected) {
+	if (p_connected) {
+		peers_info[p_peer] = PeerInfo();
+		known_peers.insert(p_peer);
+	} else {
+		peers_info.erase(p_peer);
+		known_peers.erase(p_peer);
+	}
+}
+
+void SceneReplicationState::reset() {
+	peers_info.clear();
+	known_peers.clear();
+	// Tracked nodes are cleared on deletion, here we only reset the ids so they can be later re-assigned.
+	const ObjectID *oid = nullptr;
+	while ((oid = tracked_nodes.next(oid))) {
+		TrackedNode &tobj = tracked_nodes[*oid];
+		tobj.net_id = 0;
+		tobj.remote_peer = 0;
+		tobj.last_sync = 0;
+	}
+}
+
+Error SceneReplicationState::config_add_spawn(Node *p_node, MultiplayerSpawner *p_spawner) {
+	const ObjectID oid = p_node->get_instance_id();
+	TrackedNode &tobj = _track(oid);
+	ERR_FAIL_COND_V(tobj.spawner != ObjectID(), ERR_ALREADY_IN_USE);
+	tobj.spawner = p_spawner->get_instance_id();
+	spawned_nodes.insert(oid);
+	// The spawner may be notified after the synchronizer.
+	path_only_nodes.erase(oid);
+	return OK;
+}
+
+Error SceneReplicationState::config_del_spawn(Node *p_node, MultiplayerSpawner *p_spawner) {
+	const ObjectID oid = p_node->get_instance_id();
+	ERR_FAIL_COND_V(!is_tracked(oid), ERR_INVALID_PARAMETER);
+	TrackedNode &tobj = _track(oid);
+	ERR_FAIL_COND_V(tobj.spawner != p_spawner->get_instance_id(), ERR_INVALID_PARAMETER);
+	tobj.spawner = ObjectID();
+	spawned_nodes.erase(oid);
+	return OK;
+}
+
+Error SceneReplicationState::config_add_sync(Node *p_node, MultiplayerSynchronizer *p_sync) {
+	const ObjectID oid = p_node->get_instance_id();
+	TrackedNode &tobj = _track(oid);
+	ERR_FAIL_COND_V(tobj.synchronizer != ObjectID(), ERR_ALREADY_IN_USE);
+	tobj.synchronizer = p_sync->get_instance_id();
+	// If it doesn't have a spawner, we might need to assign ID for this node using it's path.
+	if (tobj.spawner.is_null()) {
+		path_only_nodes.insert(oid);
+	}
+	return OK;
+}
+
+Error SceneReplicationState::config_del_sync(Node *p_node, MultiplayerSynchronizer *p_sync) {
+	const ObjectID oid = p_node->get_instance_id();
+	ERR_FAIL_COND_V(!is_tracked(oid), ERR_INVALID_PARAMETER);
+	TrackedNode &tobj = _track(oid);
+	ERR_FAIL_COND_V(tobj.synchronizer != p_sync->get_instance_id(), ERR_INVALID_PARAMETER);
+	tobj.synchronizer = ObjectID();
+	if (path_only_nodes.has(oid)) {
+		p_node->disconnect(SceneStringNames::get_singleton()->tree_exited, callable_mp(this, &SceneReplicationState::_untrack));
+		_untrack(oid);
+		path_only_nodes.erase(oid);
+	}
+	return OK;
+}
+
+Error SceneReplicationState::peer_add_node(int p_peer, const ObjectID &p_id) {
+	if (p_peer) {
+		ERR_FAIL_COND_V(!peers_info.has(p_peer), ERR_INVALID_PARAMETER);
+		peers_info[p_peer].known_nodes.insert(p_id);
+	} else {
+		const int *pid = nullptr;
+		while ((pid = peers_info.next(pid))) {
+			peers_info.get(*pid).known_nodes.insert(p_id);
+		}
+	}
+	return OK;
+}
+
+Error SceneReplicationState::peer_del_node(int p_peer, const ObjectID &p_id) {
+	if (p_peer) {
+		ERR_FAIL_COND_V(!peers_info.has(p_peer), ERR_INVALID_PARAMETER);
+		peers_info[p_peer].known_nodes.erase(p_id);
+	} else {
+		const int *pid = nullptr;
+		while ((pid = peers_info.next(pid))) {
+			peers_info.get(*pid).known_nodes.erase(p_id);
+		}
+	}
+	return OK;
+}
+
+Node *SceneReplicationState::peer_get_remote(int p_peer, uint32_t p_net_id) {
+	PeerInfo *info = peers_info.getptr(p_peer);
+	return info && info->recv_nodes.has(p_net_id) ? Object::cast_to<Node>(ObjectDB::get_instance(info->recv_nodes[p_net_id])) : nullptr;
+}
+
+Error SceneReplicationState::peer_add_remote(int p_peer, uint32_t p_net_id, Node *p_node, MultiplayerSpawner *p_spawner) {
+	ERR_FAIL_COND_V(!p_node || !p_spawner, ERR_INVALID_PARAMETER);
+	ERR_FAIL_COND_V(!peers_info.has(p_peer), ERR_UNAVAILABLE);
+	PeerInfo &pinfo = peers_info[p_peer];
+	ObjectID oid = p_node->get_instance_id();
+	TrackedNode &tobj = _track(oid);
+	tobj.spawner = p_spawner->get_instance_id();
+	tobj.net_id = p_net_id;
+	tobj.remote_peer = p_peer;
+	tobj.last_sync = pinfo.last_recv_sync;
+	// Also track as a remote.
+	ERR_FAIL_COND_V(pinfo.recv_nodes.has(p_net_id), ERR_ALREADY_IN_USE);
+	pinfo.recv_nodes[p_net_id] = oid;
+	return OK;
+}
+
+Error SceneReplicationState::peer_del_remote(int p_peer, uint32_t p_net_id, Node **r_node) {
+	ERR_FAIL_COND_V(!peers_info.has(p_peer), ERR_UNAUTHORIZED);
+	PeerInfo &info = peers_info[p_peer];
+	ERR_FAIL_COND_V(!info.recv_nodes.has(p_net_id), ERR_UNAUTHORIZED);
+	*r_node = Object::cast_to<Node>(ObjectDB::get_instance(info.recv_nodes[p_net_id]));
+	info.recv_nodes.erase(p_net_id);
+	return OK;
+}
+
+uint16_t SceneReplicationState::peer_sync_next(int p_peer) {
+	ERR_FAIL_COND_V(!peers_info.has(p_peer), 0);
+	PeerInfo &info = peers_info[p_peer];
+	return ++info.last_sent_sync;
+}
+
+void SceneReplicationState::peer_sync_recv(int p_peer, uint16_t p_time) {
+	ERR_FAIL_COND(!peers_info.has(p_peer));
+	peers_info[p_peer].last_recv_sync = p_time;
+}
diff --git a/scene/multiplayer/scene_replication_state.h b/scene/multiplayer/scene_replication_state.h
new file mode 100644
index 0000000000..18e4d9fa39
--- /dev/null
+++ b/scene/multiplayer/scene_replication_state.h
@@ -0,0 +1,121 @@
+/*************************************************************************/
+/*  scene_replication_state.h                                            */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef SCENE_REPLICATON_STATE_H
+#define SCENE_REPLICATON_STATE_H
+
+#include "core/object/ref_counted.h"
+
+class MultiplayerSpawner;
+class MultiplayerSynchronizer;
+class Node;
+
+class SceneReplicationState : public RefCounted {
+private:
+	struct TrackedNode {
+		ObjectID id;
+		uint32_t net_id = 0;
+		uint32_t remote_peer = 0;
+		ObjectID spawner;
+		ObjectID synchronizer;
+		uint16_t last_sync = 0;
+		uint64_t last_sync_msec = 0;
+
+		bool operator==(const ObjectID &p_other) { return id == p_other; }
+
+		Node *get_node() const { return id.is_valid() ? Object::cast_to<Node>(ObjectDB::get_instance(id)) : nullptr; }
+		MultiplayerSpawner *get_spawner() const { return spawner.is_valid() ? Object::cast_to<MultiplayerSpawner>(ObjectDB::get_instance(spawner)) : nullptr; }
+		MultiplayerSynchronizer *get_synchronizer() const { return synchronizer.is_valid() ? Object::cast_to<MultiplayerSynchronizer>(ObjectDB::get_instance(synchronizer)) : nullptr; }
+		TrackedNode() {}
+		TrackedNode(const ObjectID &p_id) { id = p_id; }
+		TrackedNode(const ObjectID &p_id, uint32_t p_net_id) {
+			id = p_id;
+			net_id = p_net_id;
+		}
+	};
+
+	struct PeerInfo {
+		Set<ObjectID> known_nodes;
+		HashMap<uint32_t, ObjectID> recv_nodes;
+		uint16_t last_sent_sync = 0;
+		uint16_t last_recv_sync = 0;
+	};
+
+	Set<int> known_peers;
+	uint32_t last_net_id = 0;
+	HashMap<ObjectID, TrackedNode> tracked_nodes;
+	HashMap<int, PeerInfo> peers_info;
+	Set<ObjectID> spawned_nodes;
+	Set<ObjectID> path_only_nodes;
+
+	TrackedNode &_track(const ObjectID &p_id);
+	void _untrack(const ObjectID &p_id);
+	bool is_tracked(const ObjectID &p_id) const { return tracked_nodes.has(p_id); }
+
+public:
+	const Set<int> get_peers() const { return known_peers; }
+	const Set<ObjectID> get_spawned_nodes() const { return spawned_nodes; }
+	const Set<ObjectID> get_path_only_nodes() const { return path_only_nodes; }
+
+	MultiplayerSynchronizer *get_synchronizer(const ObjectID &p_id) { return tracked_nodes.has(p_id) ? tracked_nodes[p_id].get_synchronizer() : nullptr; }
+	MultiplayerSpawner *get_spawner(const ObjectID &p_id) { return tracked_nodes.has(p_id) ? tracked_nodes[p_id].get_spawner() : nullptr; }
+	Node *get_node(const ObjectID &p_id) { return tracked_nodes.has(p_id) ? tracked_nodes[p_id].get_node() : nullptr; }
+	bool update_last_node_sync(const ObjectID &p_id, uint16_t p_time);
+	bool update_sync_time(const ObjectID &p_id, uint64_t p_msec);
+
+	const Set<ObjectID> get_known_nodes(int p_peer);
+	uint32_t get_net_id(const ObjectID &p_id) const;
+	void set_net_id(const ObjectID &p_id, uint32_t p_net_id);
+	uint32_t ensure_net_id(const ObjectID &p_id);
+
+	void reset();
+	void on_peer_change(int p_peer, bool p_connected);
+
+	Error config_add_spawn(Node *p_node, MultiplayerSpawner *p_spawner);
+	Error config_del_spawn(Node *p_node, MultiplayerSpawner *p_spawner);
+
+	Error config_add_sync(Node *p_node, MultiplayerSynchronizer *p_sync);
+	Error config_del_sync(Node *p_node, MultiplayerSynchronizer *p_sync);
+
+	Error peer_add_node(int p_peer, const ObjectID &p_id);
+	Error peer_del_node(int p_peer, const ObjectID &p_id);
+
+	const HashMap<uint32_t, ObjectID> peer_get_remotes(int p_peer) const;
+	Node *peer_get_remote(int p_peer, uint32_t p_net_id);
+	Error peer_add_remote(int p_peer, uint32_t p_net_id, Node *p_node, MultiplayerSpawner *p_spawner);
+	Error peer_del_remote(int p_peer, uint32_t p_net_id, Node **r_node);
+
+	uint16_t peer_sync_next(int p_peer);
+	void peer_sync_recv(int p_peer, uint16_t p_time);
+
+	SceneReplicationState() {}
+};
+
+#endif // SCENE_REPLICATON_STATE_H
diff --git a/scene/property_utils.cpp b/scene/property_utils.cpp
index 2540a633a9..a9b7e9acbe 100644
--- a/scene/property_utils.cpp
+++ b/scene/property_utils.cpp
@@ -130,7 +130,7 @@ Variant PropertyUtils::get_property_default_value(const Object *p_object, const
 			if (p != -1 && p < prop_str.length() - 1) {
 				bool all_digits = true;
 				for (int i = p + 1; i < prop_str.length(); i++) {
-					if (prop_str[i] < '0' || prop_str[i] > '9') {
+					if (!is_digit(prop_str[i])) {
 						all_digits = false;
 						break;
 					}
diff --git a/scene/register_scene_types.cpp b/scene/register_scene_types.cpp
index 838cef824b..f19b018982 100644
--- a/scene/register_scene_types.cpp
+++ b/scene/register_scene_types.cpp
@@ -134,6 +134,9 @@
 #include "scene/main/timer.h"
 #include "scene/main/viewport.h"
 #include "scene/main/window.h"
+#include "scene/multiplayer/multiplayer_spawner.h"
+#include "scene/multiplayer/multiplayer_synchronizer.h"
+#include "scene/multiplayer/scene_replication_interface.h"
 #include "scene/resources/audio_stream_sample.h"
 #include "scene/resources/bit_map.h"
 #include "scene/resources/box_shape_3d.h"
@@ -301,6 +304,8 @@ void register_scene_types() {
 	GDREGISTER_CLASS(SubViewport);
 	GDREGISTER_CLASS(ViewportTexture);
 	GDREGISTER_CLASS(HTTPRequest);
+	GDREGISTER_CLASS(MultiplayerSpawner);
+	GDREGISTER_CLASS(MultiplayerSynchronizer);
 	GDREGISTER_CLASS(Timer);
 	GDREGISTER_CLASS(CanvasLayer);
 	GDREGISTER_CLASS(CanvasModulate);
@@ -822,6 +827,8 @@ void register_scene_types() {
 	GDREGISTER_CLASS(Font);
 	GDREGISTER_CLASS(Curve);
 
+	GDREGISTER_CLASS(SceneReplicationConfig);
+
 	GDREGISTER_CLASS(TextLine);
 	GDREGISTER_CLASS(TextParagraph);
 
@@ -1050,6 +1057,7 @@ void register_scene_types() {
 	}
 
 	SceneDebugger::initialize();
+	SceneReplicationInterface::make_default();
 
 	NativeExtensionManager::get_singleton()->initialize_extensions(NativeExtension::INITIALIZATION_LEVEL_SCENE);
 }
diff --git a/scene/resources/scene_replication_config.cpp b/scene/resources/scene_replication_config.cpp
new file mode 100644
index 0000000000..2acc0f1922
--- /dev/null
+++ b/scene/resources/scene_replication_config.cpp
@@ -0,0 +1,187 @@
+/*************************************************************************/
+/*  scene_replication_config.cpp                                         */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "scene_replication_config.h"
+
+#include "core/multiplayer/multiplayer_api.h"
+#include "scene/main/node.h"
+
+bool SceneReplicationConfig::_set(const StringName &p_name, const Variant &p_value) {
+	String name = p_name;
+
+	if (name.begins_with("properties/")) {
+		int idx = name.get_slicec('/', 1).to_int();
+		String what = name.get_slicec('/', 2);
+
+		if (properties.size() == idx && what == "path") {
+			ERR_FAIL_COND_V(p_value.get_type() != Variant::NODE_PATH, false);
+			NodePath path = p_value;
+			ERR_FAIL_COND_V(path.is_empty() || path.get_subname_count() == 0, false);
+			add_property(path);
+			return true;
+		}
+		ERR_FAIL_COND_V(p_value.get_type() != Variant::BOOL, false);
+		ERR_FAIL_INDEX_V(idx, properties.size(), false);
+		ReplicationProperty &prop = properties[idx];
+		if (what == "sync") {
+			prop.sync = p_value;
+			sync_props.push_back(prop.name);
+			return true;
+		} else if (what == "spawn") {
+			prop.spawn = p_value;
+			spawn_props.push_back(prop.name);
+			return true;
+		}
+	}
+	return false;
+}
+
+bool SceneReplicationConfig::_get(const StringName &p_name, Variant &r_ret) const {
+	String name = p_name;
+
+	if (name.begins_with("properties/")) {
+		int idx = name.get_slicec('/', 1).to_int();
+		String what = name.get_slicec('/', 2);
+		ERR_FAIL_INDEX_V(idx, properties.size(), false);
+		const ReplicationProperty &prop = properties[idx];
+		if (what == "path") {
+			r_ret = prop.name;
+			return true;
+		} else if (what == "sync") {
+			r_ret = prop.sync;
+			return true;
+		} else if (what == "spawn") {
+			r_ret = prop.spawn;
+			return true;
+		}
+	}
+	return false;
+}
+
+void SceneReplicationConfig::_get_property_list(List<PropertyInfo> *p_list) const {
+	for (int i = 0; i < properties.size(); i++) {
+		p_list->push_back(PropertyInfo(Variant::STRING, "properties/" + itos(i) + "/path", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR | PROPERTY_USAGE_INTERNAL));
+		p_list->push_back(PropertyInfo(Variant::STRING, "properties/" + itos(i) + "/spawn", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR | PROPERTY_USAGE_INTERNAL));
+		p_list->push_back(PropertyInfo(Variant::STRING, "properties/" + itos(i) + "/sync", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR | PROPERTY_USAGE_INTERNAL));
+	}
+}
+
+TypedArray<NodePath> SceneReplicationConfig::get_properties() const {
+	TypedArray<NodePath> paths;
+	for (const ReplicationProperty &prop : properties) {
+		paths.push_back(prop.name);
+	}
+	return paths;
+}
+
+void SceneReplicationConfig::add_property(const NodePath &p_path, int p_index) {
+	ERR_FAIL_COND(properties.find(p_path));
+
+	if (p_index < 0 || p_index == properties.size()) {
+		properties.push_back(ReplicationProperty(p_path));
+		return;
+	}
+
+	ERR_FAIL_INDEX(p_index, properties.size());
+
+	List<ReplicationProperty>::Element *I = properties.front();
+	int c = 0;
+	while (c < p_index) {
+		I = I->next();
+		c++;
+	}
+	properties.insert_before(I, ReplicationProperty(p_path));
+}
+
+void SceneReplicationConfig::remove_property(const NodePath &p_path) {
+	properties.erase(p_path);
+}
+
+int SceneReplicationConfig::property_get_index(const NodePath &p_path) const {
+	for (int i = 0; i < properties.size(); i++) {
+		if (properties[i].name == p_path) {
+			return i;
+		}
+	}
+	ERR_FAIL_V(-1);
+}
+
+bool SceneReplicationConfig::property_get_spawn(const NodePath &p_path) {
+	List<ReplicationProperty>::Element *E = properties.find(p_path);
+	ERR_FAIL_COND_V(!E, false);
+	return E->get().spawn;
+}
+
+void SceneReplicationConfig::property_set_spawn(const NodePath &p_path, bool p_enabled) {
+	List<ReplicationProperty>::Element *E = properties.find(p_path);
+	ERR_FAIL_COND(!E);
+	if (E->get().spawn == p_enabled) {
+		return;
+	}
+	E->get().spawn = p_enabled;
+	spawn_props.clear();
+	for (const ReplicationProperty &prop : properties) {
+		if (prop.spawn) {
+			spawn_props.push_back(p_path);
+		}
+	}
+}
+
+bool SceneReplicationConfig::property_get_sync(const NodePath &p_path) {
+	List<ReplicationProperty>::Element *E = properties.find(p_path);
+	ERR_FAIL_COND_V(!E, false);
+	return E->get().sync;
+}
+
+void SceneReplicationConfig::property_set_sync(const NodePath &p_path, bool p_enabled) {
+	List<ReplicationProperty>::Element *E = properties.find(p_path);
+	ERR_FAIL_COND(!E);
+	if (E->get().sync == p_enabled) {
+		return;
+	}
+	E->get().sync = p_enabled;
+	sync_props.clear();
+	for (const ReplicationProperty &prop : properties) {
+		if (prop.sync) {
+			sync_props.push_back(p_path);
+		}
+	}
+}
+
+void SceneReplicationConfig::_bind_methods() {
+	ClassDB::bind_method(D_METHOD("get_properties"), &SceneReplicationConfig::get_properties);
+	ClassDB::bind_method(D_METHOD("add_property", "path", "index"), &SceneReplicationConfig::add_property, DEFVAL(-1));
+	ClassDB::bind_method(D_METHOD("remove_property", "path"), &SceneReplicationConfig::remove_property);
+	ClassDB::bind_method(D_METHOD("property_get_index", "path"), &SceneReplicationConfig::property_get_index);
+	ClassDB::bind_method(D_METHOD("property_get_spawn", "path"), &SceneReplicationConfig::property_get_spawn);
+	ClassDB::bind_method(D_METHOD("property_set_spawn", "path", "enabled"), &SceneReplicationConfig::property_set_spawn);
+	ClassDB::bind_method(D_METHOD("property_get_sync", "path"), &SceneReplicationConfig::property_get_sync);
+	ClassDB::bind_method(D_METHOD("property_set_sync", "path", "enabled"), &SceneReplicationConfig::property_set_sync);
+}
diff --git a/scene/resources/scene_replication_config.h b/scene/resources/scene_replication_config.h
new file mode 100644
index 0000000000..b791be9414
--- /dev/null
+++ b/scene/resources/scene_replication_config.h
@@ -0,0 +1,90 @@
+/*************************************************************************/
+/*  scene_replication_config.h                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef SCENE_REPLICATION_CONFIG_H
+#define SCENE_REPLICATION_CONFIG_H
+
+#include "core/io/resource.h"
+
+#include "core/variant/typed_array.h"
+
+class SceneReplicationConfig : public Resource {
+	GDCLASS(SceneReplicationConfig, Resource);
+	OBJ_SAVE_TYPE(SceneReplicationConfig);
+	RES_BASE_EXTENSION("repl");
+
+private:
+	struct ReplicationProperty {
+		NodePath name;
+		bool spawn = true;
+		bool sync = true;
+
+		bool operator==(const ReplicationProperty &p_to) {
+			return name == p_to.name;
+		}
+
+		ReplicationProperty() {}
+
+		ReplicationProperty(const NodePath &p_name) {
+			name = p_name;
+		}
+	};
+
+	List<ReplicationProperty> properties;
+	List<NodePath> spawn_props;
+	List<NodePath> sync_props;
+
+protected:
+	static void _bind_methods();
+
+	bool _set(const StringName &p_name, const Variant &p_value);
+	bool _get(const StringName &p_name, Variant &r_ret) const;
+	void _get_property_list(List<PropertyInfo> *p_list) const;
+
+public:
+	TypedArray<NodePath> get_properties() const;
+
+	void add_property(const NodePath &p_path, int p_index = -1);
+	void remove_property(const NodePath &p_path);
+
+	int property_get_index(const NodePath &p_path) const;
+	bool property_get_spawn(const NodePath &p_path);
+	void property_set_spawn(const NodePath &p_path, bool p_enabled);
+
+	bool property_get_sync(const NodePath &p_path);
+	void property_set_sync(const NodePath &p_path, bool p_enabled);
+
+	const List<NodePath> &get_spawn_properties() { return spawn_props; }
+	const List<NodePath> &get_sync_properties() { return sync_props; }
+
+	SceneReplicationConfig() {}
+};
+
+#endif // SCENE_REPLICATION_CONFIG_H
diff --git a/scene/resources/syntax_highlighter.cpp b/scene/resources/syntax_highlighter.cpp
index e0aa21ac37..f1eddd8ffc 100644
--- a/scene/resources/syntax_highlighter.cpp
+++ b/scene/resources/syntax_highlighter.cpp
@@ -116,14 +116,6 @@ void SyntaxHighlighter::_bind_methods() {
 
 ////////////////////////////////////////////////////////////////////////////////
 
-static bool _is_char(char32_t c) {
-	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
-}
-
-static bool _is_hex_symbol(char32_t c) {
-	return ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
-}
-
 Dictionary CodeHighlighter::_get_line_syntax_highlighting_impl(int p_line) {
 	Dictionary color_map;
 
@@ -166,7 +158,7 @@ Dictionary CodeHighlighter::_get_line_syntax_highlighting_impl(int p_line) {
 		color = font_color;
 		bool is_char = !is_symbol(str[j]);
 		bool is_a_symbol = is_symbol(str[j]);
-		bool is_number = (str[j] >= '0' && str[j] <= '9');
+		bool is_number = is_digit(str[j]);
 
 		/* color regions */
 		if (is_a_symbol || in_region != -1) {
@@ -304,7 +296,7 @@ Dictionary CodeHighlighter::_get_line_syntax_highlighting_impl(int p_line) {
 		}
 
 		// Allow ABCDEF in hex notation.
-		if (is_hex_notation && (_is_hex_symbol(str[j]) || is_number)) {
+		if (is_hex_notation && (is_hex_digit(str[j]) || is_number)) {
 			is_number = true;
 		} else {
 			is_hex_notation = false;
@@ -321,7 +313,7 @@ Dictionary CodeHighlighter::_get_line_syntax_highlighting_impl(int p_line) {
 			}
 		}
 
-		if (!in_word && _is_char(str[j]) && !is_number) {
+		if (!in_word && (is_ascii_char(str[j]) || is_underscore(str[j])) && !is_number) {
 			in_word = true;
 		}
 
diff --git a/scene/resources/visual_shader.cpp b/scene/resources/visual_shader.cpp
index 0c7f069004..dd2f3d2202 100644
--- a/scene/resources/visual_shader.cpp
+++ b/scene/resources/visual_shader.cpp
@@ -1106,10 +1106,6 @@ String VisualShader::generate_preview_shader(Type p_type, int p_node, int p_port
 	return final_code;
 }
 
-#define IS_INITIAL_CHAR(m_d) (((m_d) >= 'a' && (m_d) <= 'z') || ((m_d) >= 'A' && (m_d) <= 'Z'))
-
-#define IS_SYMBOL_CHAR(m_d) (((m_d) >= 'a' && (m_d) <= 'z') || ((m_d) >= 'A' && (m_d) <= 'Z') || ((m_d) >= '0' && (m_d) <= '9') || (m_d) == '_')
-
 String VisualShader::validate_port_name(const String &p_port_name, VisualShaderNode *p_node, int p_port_id, bool p_output) const {
 	String name = p_port_name;
 
@@ -1117,7 +1113,7 @@ String VisualShader::validate_port_name(const String &p_port_name, VisualShaderN
 		return String();
 	}
 
-	while (name.length() && !IS_INITIAL_CHAR(name[0])) {
+	while (name.length() && !is_ascii_char(name[0])) {
 		name = name.substr(1, name.length() - 1);
 	}
 
@@ -1125,7 +1121,7 @@ String VisualShader::validate_port_name(const String &p_port_name, VisualShaderN
 		String valid_name;
 
 		for (int i = 0; i < name.length(); i++) {
-			if (IS_SYMBOL_CHAR(name[i])) {
+			if (is_ascii_identifier_char(name[i])) {
 				valid_name += String::chr(name[i]);
 			} else if (name[i] == ' ') {
 				valid_name += "_";
@@ -1162,14 +1158,14 @@ String VisualShader::validate_port_name(const String &p_port_name, VisualShaderN
 
 String VisualShader::validate_uniform_name(const String &p_name, const Ref<VisualShaderNodeUniform> &p_uniform) const {
 	String name = p_name; //validate name first
-	while (name.length() && !IS_INITIAL_CHAR(name[0])) {
+	while (name.length() && !is_ascii_char(name[0])) {
 		name = name.substr(1, name.length() - 1);
 	}
 	if (!name.is_empty()) {
 		String valid_name;
 
 		for (int i = 0; i < name.length(); i++) {
-			if (IS_SYMBOL_CHAR(name[i])) {
+			if (is_ascii_identifier_char(name[i])) {
 				valid_name += String::chr(name[i]);
 			} else if (name[i] == ' ') {
 				valid_name += "_";
@@ -1206,7 +1202,7 @@ String VisualShader::validate_uniform_name(const String &p_name, const Ref<Visua
 		if (exists) {
 			//remove numbers, put new and try again
 			attempt++;
-			while (name.length() && name[name.length() - 1] >= '0' && name[name.length() - 1] <= '9') {
+			while (name.length() && is_digit(name[name.length() - 1])) {
 				name = name.substr(0, name.length() - 1);
 			}
 			ERR_FAIL_COND_V(name.is_empty(), String());
diff --git a/servers/display_server.cpp b/servers/display_server.cpp
index 5ded5cf214..d880df2a9b 100644
--- a/servers/display_server.cpp
+++ b/servers/display_server.cpp
@@ -380,6 +380,7 @@ void DisplayServer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("screen_get_scale", "screen"), &DisplayServer::screen_get_scale, DEFVAL(SCREEN_OF_MAIN_WINDOW));
 	ClassDB::bind_method(D_METHOD("screen_is_touchscreen", "screen"), &DisplayServer::screen_is_touchscreen, DEFVAL(SCREEN_OF_MAIN_WINDOW));
 	ClassDB::bind_method(D_METHOD("screen_get_max_scale"), &DisplayServer::screen_get_max_scale);
+	ClassDB::bind_method(D_METHOD("screen_get_refresh_rate", "screen"), &DisplayServer::screen_get_refresh_rate, DEFVAL(SCREEN_OF_MAIN_WINDOW));
 
 	ClassDB::bind_method(D_METHOD("screen_set_orientation", "orientation", "screen"), &DisplayServer::screen_set_orientation, DEFVAL(SCREEN_OF_MAIN_WINDOW));
 	ClassDB::bind_method(D_METHOD("screen_get_orientation", "screen"), &DisplayServer::screen_get_orientation, DEFVAL(SCREEN_OF_MAIN_WINDOW));
@@ -539,6 +540,7 @@ void DisplayServer::_bind_methods() {
 	BIND_ENUM_CONSTANT(WINDOW_MODE_MINIMIZED);
 	BIND_ENUM_CONSTANT(WINDOW_MODE_MAXIMIZED);
 	BIND_ENUM_CONSTANT(WINDOW_MODE_FULLSCREEN);
+	BIND_ENUM_CONSTANT(WINDOW_MODE_EXCLUSIVE_FULLSCREEN);
 
 	BIND_ENUM_CONSTANT(WINDOW_FLAG_RESIZE_DISABLED);
 	BIND_ENUM_CONSTANT(WINDOW_FLAG_BORDERLESS);
diff --git a/servers/display_server.h b/servers/display_server.h
index 8c6586dc20..19bb111094 100644
--- a/servers/display_server.h
+++ b/servers/display_server.h
@@ -53,7 +53,8 @@ public:
 		WINDOW_MODE_WINDOWED,
 		WINDOW_MODE_MINIMIZED,
 		WINDOW_MODE_MAXIMIZED,
-		WINDOW_MODE_FULLSCREEN
+		WINDOW_MODE_FULLSCREEN,
+		WINDOW_MODE_EXCLUSIVE_FULLSCREEN,
 	};
 
 	// Keep the VSyncMode enum values in sync with the `display/window/vsync/vsync_mode`
@@ -174,6 +175,8 @@ public:
 		SCREEN_OF_MAIN_WINDOW = -1
 	};
 
+	const float SCREEN_REFRESH_RATE_FALLBACK = 60.0; // Returned by screen_get_refresh_rate if the method fails. Most screens are 60hz as of 2022.
+
 	virtual int get_screen_count() const = 0;
 	virtual Point2i screen_get_position(int p_screen = SCREEN_OF_MAIN_WINDOW) const = 0;
 	virtual Size2i screen_get_size(int p_screen = SCREEN_OF_MAIN_WINDOW) const = 0;
@@ -188,6 +191,7 @@ public:
 		}
 		return scale;
 	}
+	virtual float screen_get_refresh_rate(int p_screen = SCREEN_OF_MAIN_WINDOW) const = 0;
 	virtual bool screen_is_touchscreen(int p_screen = SCREEN_OF_MAIN_WINDOW) const;
 
 	// Keep the ScreenOrientation enum values in sync with the `display/window/handheld/orientation`
diff --git a/servers/display_server_headless.h b/servers/display_server_headless.h
index 4ef9dc622f..f74a8fad23 100644
--- a/servers/display_server_headless.h
+++ b/servers/display_server_headless.h
@@ -62,6 +62,7 @@ public:
 	int screen_get_dpi(int p_screen = SCREEN_OF_MAIN_WINDOW) const override { return 96; /* 0 might cause issues */ }
 	float screen_get_scale(int p_screen = SCREEN_OF_MAIN_WINDOW) const override { return 1; }
 	float screen_get_max_scale() const override { return 1; }
+	float screen_get_refresh_rate(int p_screen = SCREEN_OF_MAIN_WINDOW) const override { return SCREEN_REFRESH_RATE_FALLBACK; }
 
 	Vector<DisplayServer::WindowID> get_window_list() const override { return Vector<DisplayServer::WindowID>(); }
 
diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp
index 25a366aa4b..7183fd110f 100644
--- a/servers/rendering/renderer_rd/effects_rd.cpp
+++ b/servers/rendering/renderer_rd/effects_rd.cpp
@@ -1453,7 +1453,6 @@ void EffectsRD::downsample_depth(RID p_depth_buffer, const Vector<RID> &p_depth_
 
 	RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE);
 
-	ss_effects.used_full_mips_last_frame = use_mips;
 	ss_effects.used_full_mips_last_frame = use_full_mips;
 	ss_effects.used_half_size_last_frame = use_half_size;
 }
diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp
index 8ded180633..5b2be8e174 100644
--- a/servers/rendering/renderer_scene_cull.cpp
+++ b/servers/rendering/renderer_scene_cull.cpp
@@ -3871,8 +3871,12 @@ void RendererSceneCull::update_dirty_instances() {
 
 void RendererSceneCull::update() {
 	//optimize bvhs
-	for (uint32_t i = 0; i < scenario_owner.get_rid_count(); i++) {
-		Scenario *s = scenario_owner.get_ptr_by_index(i);
+
+	uint32_t rid_count = scenario_owner.get_rid_count();
+	RID *rids = (RID *)alloca(sizeof(RID) * rid_count);
+	scenario_owner.fill_owned_buffer(rids);
+	for (uint32_t i = 0; i < rid_count; i++) {
+		Scenario *s = scenario_owner.get_or_null(rids[i]);
 		s->indexers[Scenario::INDEXER_GEOMETRY].optimize_incremental(indexer_update_iterations);
 		s->indexers[Scenario::INDEXER_VOLUMES].optimize_incremental(indexer_update_iterations);
 	}
diff --git a/servers/rendering/shader_language.cpp b/servers/rendering/shader_language.cpp
index 2e1ee41406..b10022545c 100644
--- a/servers/rendering/shader_language.cpp
+++ b/servers/rendering/shader_language.cpp
@@ -35,18 +35,6 @@
 
 #define HAS_WARNING(flag) (warning_flags & flag)
 
-static bool _is_text_char(char32_t c) {
-	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
-}
-
-static bool _is_number(char32_t c) {
-	return (c >= '0' && c <= '9');
-}
-
-static bool _is_hex(char32_t c) {
-	return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
-}
-
 String ShaderLanguage::get_operator_text(Operator p_op) {
 	static const char *op_names[OP_MAX] = { "==",
 		"!=",
@@ -543,7 +531,7 @@ ShaderLanguage::Token ShaderLanguage::_get_token() {
 			default: {
 				char_idx--; //go back one, since we have no idea what this is
 
-				if (_is_number(GETCHAR(0)) || (GETCHAR(0) == '.' && _is_number(GETCHAR(1)))) {
+				if (is_digit(GETCHAR(0)) || (GETCHAR(0) == '.' && is_digit(GETCHAR(1)))) {
 					// parse number
 					bool hexa_found = false;
 					bool period_found = false;
@@ -584,7 +572,7 @@ ShaderLanguage::Token ShaderLanguage::_get_token() {
 						const char32_t symbol = String::char_lowercase(GETCHAR(i));
 						bool error = false;
 
-						if (_is_number(symbol)) {
+						if (is_digit(symbol)) {
 							if (end_suffix_found) {
 								error = true;
 							}
@@ -617,8 +605,8 @@ ShaderLanguage::Token ShaderLanguage::_get_token() {
 										break;
 									}
 								}
-							} else if (!hexa_found || !_is_hex(symbol)) {
-								if (_is_text_char(symbol)) {
+							} else if (!hexa_found || !is_hex_digit(symbol)) {
+								if (is_ascii_identifier_char(symbol)) {
 									error = true;
 								} else {
 									break;
@@ -649,7 +637,7 @@ ShaderLanguage::Token ShaderLanguage::_get_token() {
 							return _make_token(TK_ERROR, "Invalid (hexadecimal) numeric constant");
 						}
 					} else if (period_found || exponent_found || float_suffix_found) { // Float
-						if (exponent_found && (!_is_number(last_char) && last_char != 'f')) { // checks for eg: "2E", "2E-", "2E+"
+						if (exponent_found && (!is_digit(last_char) && last_char != 'f')) { // checks for eg: "2E", "2E-", "2E+"
 							return _make_token(TK_ERROR, "Invalid (float) numeric constant");
 						}
 						if (period_found) {
@@ -660,7 +648,7 @@ ShaderLanguage::Token ShaderLanguage::_get_token() {
 								}
 							} else {
 								//checks for eg. "1." or "1.99" notations
-								if (last_char != '.' && !_is_number(last_char)) {
+								if (last_char != '.' && !is_digit(last_char)) {
 									return _make_token(TK_ERROR, "Invalid (float) numeric constant");
 								}
 							}
@@ -723,11 +711,11 @@ ShaderLanguage::Token ShaderLanguage::_get_token() {
 					return _make_token(TK_PERIOD);
 				}
 
-				if (_is_text_char(GETCHAR(0))) {
+				if (is_ascii_identifier_char(GETCHAR(0))) {
 					// parse identifier
 					String str;
 
-					while (_is_text_char(GETCHAR(0))) {
+					while (is_ascii_identifier_char(GETCHAR(0))) {
 						str += char32_t(GETCHAR(0));
 						char_idx++;
 					}
@@ -5267,8 +5255,10 @@ ShaderLanguage::Node *ShaderLanguage::_parse_expression(BlockNode *p_block, cons
 					return nullptr;
 				} else {
 #ifdef DEBUG_ENABLED
-					if (check_warnings && HAS_WARNING(ShaderWarning::FORMATTING_ERROR_FLAG)) {
-						_add_line_warning(ShaderWarning::FORMATTING_ERROR, RTR("Empty statement. Remove ';' to fix this warning."));
+					if (!p_block || (p_block->block_type != BlockNode::BLOCK_TYPE_FOR_INIT && p_block->block_type != BlockNode::BLOCK_TYPE_FOR_CONDITION)) {
+						if (check_warnings && HAS_WARNING(ShaderWarning::FORMATTING_ERROR_FLAG)) {
+							_add_line_warning(ShaderWarning::FORMATTING_ERROR, RTR("Empty statement. Remove ';' to fix this warning."));
+						}
 					}
 #endif // DEBUG_ENABLED
 					_set_tkpos(prepos);
@@ -6382,6 +6372,8 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const FunctionInfo &p_fun
 		}
 
 		bool is_struct = shader->structs.has(tk.text);
+		bool is_var_init = false;
+		bool is_condition = false;
 
 		if (tk.type == TK_CURLY_BRACKET_CLOSE) { //end of block
 			if (p_just_one) {
@@ -6392,6 +6384,8 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const FunctionInfo &p_fun
 			return OK;
 
 		} else if (tk.type == TK_CONST || is_token_precision(tk.type) || is_token_nonvoid_datatype(tk.type) || is_struct) {
+			is_var_init = true;
+
 			String struct_name = "";
 			if (is_struct) {
 				struct_name = tk.text;
@@ -7147,6 +7141,14 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const FunctionInfo &p_fun
 			init_block->block_type = BlockNode::BLOCK_TYPE_FOR_INIT;
 			init_block->parent_block = p_block;
 			init_block->single_statement = true;
+			// Need to find a parent function to correctly proceed unused variable warnings.
+			{
+				BlockNode *block = p_block;
+				while (block && !block->parent_function) {
+					block = block->parent_block;
+				}
+				init_block->parent_function = block->parent_function;
+			}
 			cf->blocks.push_back(init_block);
 			Error err = _parse_block(init_block, p_function_info, true, false, false);
 			if (err != OK) {
@@ -7331,25 +7333,13 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const FunctionInfo &p_fun
 			if (!expr) {
 				return ERR_PARSE_ERROR;
 			}
-
-			bool empty = false;
+			is_condition = expr->type == Node::TYPE_OPERATOR && expr->get_datatype() == TYPE_BOOL;
 
 			if (expr->type == Node::TYPE_OPERATOR) {
 				OperatorNode *op = static_cast<OperatorNode *>(expr);
 				if (op->op == OP_EMPTY) {
-					empty = true;
-				}
-			}
-			if (p_block->block_type == BlockNode::BLOCK_TYPE_FOR_INIT) {
-				if (!empty && expr->type != BlockNode::TYPE_VARIABLE_DECLARATION) {
-					_set_error(RTR("The left expression is expected to be a variable declaration."));
-					return ERR_PARSE_ERROR;
-				}
-			}
-			if (p_block->block_type == BlockNode::BLOCK_TYPE_FOR_CONDITION) {
-				if (!empty && expr->get_datatype() != TYPE_BOOL) {
-					_set_error(RTR("The middle expression is expected to be boolean."));
-					return ERR_PARSE_ERROR;
+					is_var_init = true;
+					is_condition = true;
 				}
 			}
 
@@ -7358,6 +7348,10 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const FunctionInfo &p_fun
 
 			if (p_block->block_type == BlockNode::BLOCK_TYPE_FOR_CONDITION) {
 				if (tk.type == TK_COMMA) {
+					if (!is_condition) {
+						_set_error(RTR("The middle expression is expected to be a boolean operator."));
+						return ERR_PARSE_ERROR;
+					}
 					continue;
 				}
 				if (tk.type != TK_SEMICOLON) {
@@ -7378,6 +7372,17 @@ Error ShaderLanguage::_parse_block(BlockNode *p_block, const FunctionInfo &p_fun
 			}
 		}
 
+		if (p_block) {
+			if (p_block->block_type == BlockNode::BLOCK_TYPE_FOR_INIT && !is_var_init) {
+				_set_error(RTR("The left expression is expected to be a variable declaration."));
+				return ERR_PARSE_ERROR;
+			}
+			if (p_block->block_type == BlockNode::BLOCK_TYPE_FOR_CONDITION && !is_condition) {
+				_set_error(RTR("The middle expression is expected to be a boolean operator."));
+				return ERR_PARSE_ERROR;
+			}
+		}
+
 		if (p_just_one) {
 			break;
 		}
diff --git a/tests/core/math/test_expression.h b/tests/core/math/test_expression.h
index 5a894b20f3..6e3be541b0 100644
--- a/tests/core/math/test_expression.h
+++ b/tests/core/math/test_expression.h
@@ -137,7 +137,7 @@ TEST_CASE("[Expression] Scientific notation") {
 			expression.parse("2e5") == OK,
 			"The expression should parse successfully.");
 	CHECK_MESSAGE(
-			Math::is_equal_approx(double(expression.execute()), 25),
+			Math::is_equal_approx(double(expression.execute()), 2e5),
 			"The expression should return the expected result.");
 
 	CHECK_MESSAGE(
diff --git a/tests/core/math/test_math.cpp b/tests/core/math/test_math.cpp
index a24a8fde2b..4182455b7a 100644
--- a/tests/core/math/test_math.cpp
+++ b/tests/core/math/test_math.cpp
@@ -227,7 +227,7 @@ class GetClassAndNamespace {
 						return TK_SYMBOL;
 					}
 
-					if (code[idx] == '-' || (code[idx] >= '0' && code[idx] <= '9')) {
+					if (code[idx] == '-' || is_digit(code[idx])) {
 						//a number
 						const char32_t *rptr;
 						double number = String::to_float(&code[idx], &rptr);
@@ -235,10 +235,10 @@ class GetClassAndNamespace {
 						value = number;
 						return TK_NUMBER;
 
-					} else if ((code[idx] >= 'A' && code[idx] <= 'Z') || (code[idx] >= 'a' && code[idx] <= 'z') || code[idx] > 127) {
+					} else if (is_ascii_char(code[idx]) || code[idx] > 127) {
 						String id;
 
-						while ((code[idx] >= 'A' && code[idx] <= 'Z') || (code[idx] >= 'a' && code[idx] <= 'z') || code[idx] > 127) {
+						while (is_ascii_char(code[idx]) || code[idx] > 127) {
 							id += code[idx];
 							idx++;
 						}
diff --git a/tests/core/math/test_rect2.h b/tests/core/math/test_rect2.h
index d98a94b1b5..0b1106ac3c 100644
--- a/tests/core/math/test_rect2.h
+++ b/tests/core/math/test_rect2.h
@@ -32,15 +32,11 @@
 #define TEST_RECT2_H
 
 #include "core/math/rect2.h"
+#include "core/math/rect2i.h"
 
 #include "thirdparty/doctest/doctest.h"
 
 namespace TestRect2 {
-// We also test Rect2i here, for consistency with the source code where Rect2
-// and Rect2i are defined in the same file.
-
-// Rect2
-
 TEST_CASE("[Rect2] Constructor methods") {
 	const Rect2 rect = Rect2(0, 100, 1280, 720);
 	const Rect2 rect_vector = Rect2(Vector2(0, 100), Vector2(1280, 720));
@@ -304,278 +300,6 @@ TEST_CASE("[Rect2] Merging") {
 			Rect2(0, 100, 1280, 720).merge(Rect2(-4000, -4000, 100, 100)).is_equal_approx(Rect2(-4000, -4000, 5280, 4820)),
 			"merge() with non-enclosed Rect2 should return the expected result.");
 }
-
-// Rect2i
-
-TEST_CASE("[Rect2i] Constructor methods") {
-	Rect2i recti = Rect2i(0, 100, 1280, 720);
-	Rect2i recti_vector = Rect2i(Vector2i(0, 100), Vector2i(1280, 720));
-	Rect2i recti_copy_recti = Rect2i(recti);
-	Rect2i recti_copy_rect = Rect2i(Rect2(0, 100, 1280, 720));
-
-	CHECK_MESSAGE(
-			recti == recti_vector,
-			"Rect2is created with the same dimensions but by different methods should be equal.");
-	CHECK_MESSAGE(
-			recti == recti_copy_recti,
-			"Rect2is created with the same dimensions but by different methods should be equal.");
-	CHECK_MESSAGE(
-			recti == recti_copy_rect,
-			"Rect2is created with the same dimensions but by different methods should be equal.");
-}
-
-TEST_CASE("[Rect2i] String conversion") {
-	// Note: This also depends on the Vector2 string representation.
-	CHECK_MESSAGE(
-			String(Rect2i(0, 100, 1280, 720)) == "[P: (0, 100), S: (1280, 720)]",
-			"The string representation should match the expected value.");
-}
-
-TEST_CASE("[Rect2i] Basic getters") {
-	const Rect2i rect = Rect2i(0, 100, 1280, 720);
-	CHECK_MESSAGE(
-			rect.get_position() == Vector2i(0, 100),
-			"get_position() should return the expected value.");
-	CHECK_MESSAGE(
-			rect.get_size() == Vector2i(1280, 720),
-			"get_size() should return the expected value.");
-	CHECK_MESSAGE(
-			rect.get_end() == Vector2i(1280, 820),
-			"get_end() should return the expected value.");
-	CHECK_MESSAGE(
-			rect.get_center() == Vector2i(640, 460),
-			"get_center() should return the expected value.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1281, 721).get_center() == Vector2i(640, 460),
-			"get_center() should return the expected value.");
-}
-
-TEST_CASE("[Rect2i] Basic setters") {
-	Rect2i rect = Rect2i(0, 100, 1280, 720);
-	rect.set_end(Vector2i(4000, 4000));
-	CHECK_MESSAGE(
-			rect == Rect2i(0, 100, 4000, 3900),
-			"set_end() should result in the expected Rect2i.");
-
-	rect = Rect2i(0, 100, 1280, 720);
-	rect.set_position(Vector2i(4000, 4000));
-	CHECK_MESSAGE(
-			rect == Rect2i(4000, 4000, 1280, 720),
-			"set_position() should result in the expected Rect2i.");
-
-	rect = Rect2i(0, 100, 1280, 720);
-	rect.set_size(Vector2i(4000, 4000));
-	CHECK_MESSAGE(
-			rect == Rect2i(0, 100, 4000, 4000),
-			"set_size() should result in the expected Rect2i.");
-}
-
-TEST_CASE("[Rect2i] Area getters") {
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).get_area() == 921'600,
-			"get_area() should return the expected value.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, -1280, -720).get_area() == 921'600,
-			"get_area() should return the expected value.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, -720).get_area() == -921'600,
-			"get_area() should return the expected value.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, -1280, 720).get_area() == -921'600,
-			"get_area() should return the expected value.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 0, 720).get_area() == 0,
-			"get_area() should return the expected value.");
-
-	CHECK_MESSAGE(
-			!Rect2i(0, 100, 1280, 720).has_no_area(),
-			"has_no_area() should return the expected value on Rect2i with an area.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 0, 500).has_no_area(),
-			"has_no_area() should return the expected value on Rect2i with no area.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 500, 0).has_no_area(),
-			"has_no_area() should return the expected value on Rect2i with no area.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 0, 0).has_no_area(),
-			"has_no_area() should return the expected value on Rect2i with no area.");
-}
-
-TEST_CASE("[Rect2i] Absolute coordinates") {
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).abs() == Rect2i(0, 100, 1280, 720),
-			"abs() should return the expected Rect2i.");
-	CHECK_MESSAGE(
-			Rect2i(0, -100, 1280, 720).abs() == Rect2i(0, -100, 1280, 720),
-			"abs() should return the expected Rect2i.");
-	CHECK_MESSAGE(
-			Rect2i(0, -100, -1280, -720).abs() == Rect2i(-1280, -820, 1280, 720),
-			"abs() should return the expected Rect2i.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, -1280, 720).abs() == Rect2i(-1280, 100, 1280, 720),
-			"abs() should return the expected Rect2i.");
-}
-
-TEST_CASE("[Rect2i] Intersection") {
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).intersection(Rect2i(0, 300, 100, 100)) == Rect2i(0, 300, 100, 100),
-			"intersection() with fully enclosed Rect2i should return the expected result.");
-	// The resulting Rect2i is 100 pixels high because the first Rect2i is vertically offset by 100 pixels.
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).intersection(Rect2i(1200, 700, 100, 100)) == Rect2i(1200, 700, 80, 100),
-			"intersection() with partially enclosed Rect2i should return the expected result.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).intersection(Rect2i(-4000, -4000, 100, 100)) == Rect2i(),
-			"intersection() with non-enclosed Rect2i should return the expected result.");
-}
-
-TEST_CASE("[Rect2i] Enclosing") {
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).encloses(Rect2i(0, 300, 100, 100)),
-			"encloses() with fully contained Rect2i should return the expected result.");
-	CHECK_MESSAGE(
-			!Rect2i(0, 100, 1280, 720).encloses(Rect2i(1200, 700, 100, 100)),
-			"encloses() with partially contained Rect2i should return the expected result.");
-	CHECK_MESSAGE(
-			!Rect2i(0, 100, 1280, 720).encloses(Rect2i(-4000, -4000, 100, 100)),
-			"encloses() with non-contained Rect2i should return the expected result.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).encloses(Rect2i(0, 100, 1280, 720)),
-			"encloses() with identical Rect2i should return the expected result.");
-}
-
-TEST_CASE("[Rect2i] Expanding") {
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).expand(Vector2i(500, 600)) == Rect2i(0, 100, 1280, 720),
-			"expand() with contained Vector2i should return the expected result.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).expand(Vector2i(0, 0)) == Rect2i(0, 0, 1280, 820),
-			"expand() with non-contained Vector2i should return the expected result.");
-}
-
-TEST_CASE("[Rect2i] Growing") {
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).grow(100) == Rect2i(-100, 0, 1480, 920),
-			"grow() with positive value should return the expected Rect2i.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).grow(-100) == Rect2i(100, 200, 1080, 520),
-			"grow() with negative value should return the expected Rect2i.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).grow(-4000) == Rect2i(4000, 4100, -6720, -7280),
-			"grow() with large negative value should return the expected Rect2i.");
-
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).grow_individual(100, 200, 300, 400) == Rect2i(-100, -100, 1680, 1320),
-			"grow_individual() with positive values should return the expected Rect2i.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).grow_individual(-100, 200, 300, -400) == Rect2i(100, -100, 1480, 520),
-			"grow_individual() with positive and negative values should return the expected Rect2i.");
-
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).grow_side(SIDE_TOP, 500) == Rect2i(0, -400, 1280, 1220),
-			"grow_side() with positive value should return the expected Rect2i.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).grow_side(SIDE_TOP, -500) == Rect2i(0, 600, 1280, 220),
-			"grow_side() with negative value should return the expected Rect2i.");
-}
-
-TEST_CASE("[Rect2i] Has point") {
-	Rect2i rect = Rect2i(0, 100, 1280, 720);
-	CHECK_MESSAGE(
-			rect.has_point(Vector2i(500, 600)),
-			"has_point() with contained Vector2i should return the expected result.");
-	CHECK_MESSAGE(
-			!rect.has_point(Vector2i(0, 0)),
-			"has_point() with non-contained Vector2i should return the expected result.");
-
-	CHECK_MESSAGE(
-			rect.has_point(rect.position),
-			"has_point() with positive size should include `position`.");
-	CHECK_MESSAGE(
-			rect.has_point(rect.position + Vector2i(1, 1)),
-			"has_point() with positive size should include `position + (1, 1)`.");
-	CHECK_MESSAGE(
-			!rect.has_point(rect.position + Vector2i(1, -1)),
-			"has_point() with positive size should not include `position + (1, -1)`.");
-	CHECK_MESSAGE(
-			!rect.has_point(rect.position + rect.size),
-			"has_point() with positive size should not include `position + size`.");
-	CHECK_MESSAGE(
-			!rect.has_point(rect.position + rect.size + Vector2i(1, 1)),
-			"has_point() with positive size should not include `position + size + (1, 1)`.");
-	CHECK_MESSAGE(
-			rect.has_point(rect.position + rect.size + Vector2i(-1, -1)),
-			"has_point() with positive size should include `position + size + (-1, -1)`.");
-	CHECK_MESSAGE(
-			!rect.has_point(rect.position + rect.size + Vector2i(-1, 1)),
-			"has_point() with positive size should not include `position + size + (-1, 1)`.");
-
-	CHECK_MESSAGE(
-			rect.has_point(rect.position + Vector2i(0, 10)),
-			"has_point() with point located on left edge should return true.");
-	CHECK_MESSAGE(
-			!rect.has_point(rect.position + Vector2i(rect.size.x, 10)),
-			"has_point() with point located on right edge should return false.");
-	CHECK_MESSAGE(
-			rect.has_point(rect.position + Vector2i(10, 0)),
-			"has_point() with point located on top edge should return true.");
-	CHECK_MESSAGE(
-			!rect.has_point(rect.position + Vector2i(10, rect.size.y)),
-			"has_point() with point located on bottom edge should return false.");
-
-	/*
-	// FIXME: Disabled for now until GH-37617 is fixed one way or another.
-	// More tests should then be written like for the positive size case.
-	rect = Rect2i(0, 100, -1280, -720);
-	CHECK_MESSAGE(
-			rect.has_point(rect.position),
-			"has_point() with negative size should include `position`.");
-	CHECK_MESSAGE(
-			!rect.has_point(rect.position + rect.size),
-			"has_point() with negative size should not include `position + size`.");
-	*/
-
-	rect = Rect2i(-4000, -200, 1280, 720);
-	CHECK_MESSAGE(
-			rect.has_point(rect.position + Vector2i(0, 10)),
-			"has_point() with negative position and point located on left edge should return true.");
-	CHECK_MESSAGE(
-			!rect.has_point(rect.position + Vector2i(rect.size.x, 10)),
-			"has_point() with negative position and point located on right edge should return false.");
-	CHECK_MESSAGE(
-			rect.has_point(rect.position + Vector2i(10, 0)),
-			"has_point() with negative position and point located on top edge should return true.");
-	CHECK_MESSAGE(
-			!rect.has_point(rect.position + Vector2i(10, rect.size.y)),
-			"has_point() with negative position and point located on bottom edge should return false.");
-}
-
-TEST_CASE("[Rect2i] Intersection") {
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).intersects(Rect2i(0, 300, 100, 100)),
-			"intersects() with fully enclosed Rect2i should return the expected result.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).intersects(Rect2i(1200, 700, 100, 100)),
-			"intersects() with partially enclosed Rect2i should return the expected result.");
-	CHECK_MESSAGE(
-			!Rect2i(0, 100, 1280, 720).intersects(Rect2i(-4000, -4000, 100, 100)),
-			"intersects() with non-enclosed Rect2i should return the expected result.");
-	CHECK_MESSAGE(
-			!Rect2i(0, 0, 2, 2).intersects(Rect2i(2, 2, 2, 2)),
-			"intersects() with adjacent Rect2i should return the expected result.");
-}
-
-TEST_CASE("[Rect2i] Merging") {
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).merge(Rect2i(0, 300, 100, 100)) == Rect2i(0, 100, 1280, 720),
-			"merge() with fully enclosed Rect2i should return the expected result.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).merge(Rect2i(1200, 700, 100, 100)) == Rect2i(0, 100, 1300, 720),
-			"merge() with partially enclosed Rect2i should return the expected result.");
-	CHECK_MESSAGE(
-			Rect2i(0, 100, 1280, 720).merge(Rect2i(-4000, -4000, 100, 100)) == Rect2i(-4000, -4000, 5280, 4820),
-			"merge() with non-enclosed Rect2i should return the expected result.");
-}
 } // namespace TestRect2
 
 #endif // TEST_RECT2_H
diff --git a/tests/core/math/test_rect2i.h b/tests/core/math/test_rect2i.h
new file mode 100644
index 0000000000..0d1a088a66
--- /dev/null
+++ b/tests/core/math/test_rect2i.h
@@ -0,0 +1,311 @@
+/*************************************************************************/
+/*  test_rect2i.h                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TEST_RECT2I_H
+#define TEST_RECT2I_H
+
+#include "core/math/rect2.h"
+#include "core/math/rect2i.h"
+
+#include "thirdparty/doctest/doctest.h"
+
+namespace TestRect2i {
+TEST_CASE("[Rect2i] Constructor methods") {
+	Rect2i recti = Rect2i(0, 100, 1280, 720);
+	Rect2i recti_vector = Rect2i(Vector2i(0, 100), Vector2i(1280, 720));
+	Rect2i recti_copy_recti = Rect2i(recti);
+	Rect2i recti_copy_rect = Rect2i(Rect2(0, 100, 1280, 720));
+
+	CHECK_MESSAGE(
+			recti == recti_vector,
+			"Rect2is created with the same dimensions but by different methods should be equal.");
+	CHECK_MESSAGE(
+			recti == recti_copy_recti,
+			"Rect2is created with the same dimensions but by different methods should be equal.");
+	CHECK_MESSAGE(
+			recti == recti_copy_rect,
+			"Rect2is created with the same dimensions but by different methods should be equal.");
+}
+
+TEST_CASE("[Rect2i] String conversion") {
+	// Note: This also depends on the Vector2 string representation.
+	CHECK_MESSAGE(
+			String(Rect2i(0, 100, 1280, 720)) == "[P: (0, 100), S: (1280, 720)]",
+			"The string representation should match the expected value.");
+}
+
+TEST_CASE("[Rect2i] Basic getters") {
+	const Rect2i rect = Rect2i(0, 100, 1280, 720);
+	CHECK_MESSAGE(
+			rect.get_position() == Vector2i(0, 100),
+			"get_position() should return the expected value.");
+	CHECK_MESSAGE(
+			rect.get_size() == Vector2i(1280, 720),
+			"get_size() should return the expected value.");
+	CHECK_MESSAGE(
+			rect.get_end() == Vector2i(1280, 820),
+			"get_end() should return the expected value.");
+	CHECK_MESSAGE(
+			rect.get_center() == Vector2i(640, 460),
+			"get_center() should return the expected value.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1281, 721).get_center() == Vector2i(640, 460),
+			"get_center() should return the expected value.");
+}
+
+TEST_CASE("[Rect2i] Basic setters") {
+	Rect2i rect = Rect2i(0, 100, 1280, 720);
+	rect.set_end(Vector2i(4000, 4000));
+	CHECK_MESSAGE(
+			rect == Rect2i(0, 100, 4000, 3900),
+			"set_end() should result in the expected Rect2i.");
+
+	rect = Rect2i(0, 100, 1280, 720);
+	rect.set_position(Vector2i(4000, 4000));
+	CHECK_MESSAGE(
+			rect == Rect2i(4000, 4000, 1280, 720),
+			"set_position() should result in the expected Rect2i.");
+
+	rect = Rect2i(0, 100, 1280, 720);
+	rect.set_size(Vector2i(4000, 4000));
+	CHECK_MESSAGE(
+			rect == Rect2i(0, 100, 4000, 4000),
+			"set_size() should result in the expected Rect2i.");
+}
+
+TEST_CASE("[Rect2i] Area getters") {
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).get_area() == 921'600,
+			"get_area() should return the expected value.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, -1280, -720).get_area() == 921'600,
+			"get_area() should return the expected value.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, -720).get_area() == -921'600,
+			"get_area() should return the expected value.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, -1280, 720).get_area() == -921'600,
+			"get_area() should return the expected value.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 0, 720).get_area() == 0,
+			"get_area() should return the expected value.");
+
+	CHECK_MESSAGE(
+			!Rect2i(0, 100, 1280, 720).has_no_area(),
+			"has_no_area() should return the expected value on Rect2i with an area.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 0, 500).has_no_area(),
+			"has_no_area() should return the expected value on Rect2i with no area.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 500, 0).has_no_area(),
+			"has_no_area() should return the expected value on Rect2i with no area.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 0, 0).has_no_area(),
+			"has_no_area() should return the expected value on Rect2i with no area.");
+}
+
+TEST_CASE("[Rect2i] Absolute coordinates") {
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).abs() == Rect2i(0, 100, 1280, 720),
+			"abs() should return the expected Rect2i.");
+	CHECK_MESSAGE(
+			Rect2i(0, -100, 1280, 720).abs() == Rect2i(0, -100, 1280, 720),
+			"abs() should return the expected Rect2i.");
+	CHECK_MESSAGE(
+			Rect2i(0, -100, -1280, -720).abs() == Rect2i(-1280, -820, 1280, 720),
+			"abs() should return the expected Rect2i.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, -1280, 720).abs() == Rect2i(-1280, 100, 1280, 720),
+			"abs() should return the expected Rect2i.");
+}
+
+TEST_CASE("[Rect2i] Intersection") {
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).intersection(Rect2i(0, 300, 100, 100)) == Rect2i(0, 300, 100, 100),
+			"intersection() with fully enclosed Rect2i should return the expected result.");
+	// The resulting Rect2i is 100 pixels high because the first Rect2i is vertically offset by 100 pixels.
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).intersection(Rect2i(1200, 700, 100, 100)) == Rect2i(1200, 700, 80, 100),
+			"intersection() with partially enclosed Rect2i should return the expected result.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).intersection(Rect2i(-4000, -4000, 100, 100)) == Rect2i(),
+			"intersection() with non-enclosed Rect2i should return the expected result.");
+}
+
+TEST_CASE("[Rect2i] Enclosing") {
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).encloses(Rect2i(0, 300, 100, 100)),
+			"encloses() with fully contained Rect2i should return the expected result.");
+	CHECK_MESSAGE(
+			!Rect2i(0, 100, 1280, 720).encloses(Rect2i(1200, 700, 100, 100)),
+			"encloses() with partially contained Rect2i should return the expected result.");
+	CHECK_MESSAGE(
+			!Rect2i(0, 100, 1280, 720).encloses(Rect2i(-4000, -4000, 100, 100)),
+			"encloses() with non-contained Rect2i should return the expected result.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).encloses(Rect2i(0, 100, 1280, 720)),
+			"encloses() with identical Rect2i should return the expected result.");
+}
+
+TEST_CASE("[Rect2i] Expanding") {
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).expand(Vector2i(500, 600)) == Rect2i(0, 100, 1280, 720),
+			"expand() with contained Vector2i should return the expected result.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).expand(Vector2i(0, 0)) == Rect2i(0, 0, 1280, 820),
+			"expand() with non-contained Vector2i should return the expected result.");
+}
+
+TEST_CASE("[Rect2i] Growing") {
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).grow(100) == Rect2i(-100, 0, 1480, 920),
+			"grow() with positive value should return the expected Rect2i.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).grow(-100) == Rect2i(100, 200, 1080, 520),
+			"grow() with negative value should return the expected Rect2i.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).grow(-4000) == Rect2i(4000, 4100, -6720, -7280),
+			"grow() with large negative value should return the expected Rect2i.");
+
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).grow_individual(100, 200, 300, 400) == Rect2i(-100, -100, 1680, 1320),
+			"grow_individual() with positive values should return the expected Rect2i.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).grow_individual(-100, 200, 300, -400) == Rect2i(100, -100, 1480, 520),
+			"grow_individual() with positive and negative values should return the expected Rect2i.");
+
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).grow_side(SIDE_TOP, 500) == Rect2i(0, -400, 1280, 1220),
+			"grow_side() with positive value should return the expected Rect2i.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).grow_side(SIDE_TOP, -500) == Rect2i(0, 600, 1280, 220),
+			"grow_side() with negative value should return the expected Rect2i.");
+}
+
+TEST_CASE("[Rect2i] Has point") {
+	Rect2i rect = Rect2i(0, 100, 1280, 720);
+	CHECK_MESSAGE(
+			rect.has_point(Vector2i(500, 600)),
+			"has_point() with contained Vector2i should return the expected result.");
+	CHECK_MESSAGE(
+			!rect.has_point(Vector2i(0, 0)),
+			"has_point() with non-contained Vector2i should return the expected result.");
+
+	CHECK_MESSAGE(
+			rect.has_point(rect.position),
+			"has_point() with positive size should include `position`.");
+	CHECK_MESSAGE(
+			rect.has_point(rect.position + Vector2i(1, 1)),
+			"has_point() with positive size should include `position + (1, 1)`.");
+	CHECK_MESSAGE(
+			!rect.has_point(rect.position + Vector2i(1, -1)),
+			"has_point() with positive size should not include `position + (1, -1)`.");
+	CHECK_MESSAGE(
+			!rect.has_point(rect.position + rect.size),
+			"has_point() with positive size should not include `position + size`.");
+	CHECK_MESSAGE(
+			!rect.has_point(rect.position + rect.size + Vector2i(1, 1)),
+			"has_point() with positive size should not include `position + size + (1, 1)`.");
+	CHECK_MESSAGE(
+			rect.has_point(rect.position + rect.size + Vector2i(-1, -1)),
+			"has_point() with positive size should include `position + size + (-1, -1)`.");
+	CHECK_MESSAGE(
+			!rect.has_point(rect.position + rect.size + Vector2i(-1, 1)),
+			"has_point() with positive size should not include `position + size + (-1, 1)`.");
+
+	CHECK_MESSAGE(
+			rect.has_point(rect.position + Vector2i(0, 10)),
+			"has_point() with point located on left edge should return true.");
+	CHECK_MESSAGE(
+			!rect.has_point(rect.position + Vector2i(rect.size.x, 10)),
+			"has_point() with point located on right edge should return false.");
+	CHECK_MESSAGE(
+			rect.has_point(rect.position + Vector2i(10, 0)),
+			"has_point() with point located on top edge should return true.");
+	CHECK_MESSAGE(
+			!rect.has_point(rect.position + Vector2i(10, rect.size.y)),
+			"has_point() with point located on bottom edge should return false.");
+
+	/*
+	// FIXME: Disabled for now until GH-37617 is fixed one way or another.
+	// More tests should then be written like for the positive size case.
+	rect = Rect2i(0, 100, -1280, -720);
+	CHECK_MESSAGE(
+			rect.has_point(rect.position),
+			"has_point() with negative size should include `position`.");
+	CHECK_MESSAGE(
+			!rect.has_point(rect.position + rect.size),
+			"has_point() with negative size should not include `position + size`.");
+	*/
+
+	rect = Rect2i(-4000, -200, 1280, 720);
+	CHECK_MESSAGE(
+			rect.has_point(rect.position + Vector2i(0, 10)),
+			"has_point() with negative position and point located on left edge should return true.");
+	CHECK_MESSAGE(
+			!rect.has_point(rect.position + Vector2i(rect.size.x, 10)),
+			"has_point() with negative position and point located on right edge should return false.");
+	CHECK_MESSAGE(
+			rect.has_point(rect.position + Vector2i(10, 0)),
+			"has_point() with negative position and point located on top edge should return true.");
+	CHECK_MESSAGE(
+			!rect.has_point(rect.position + Vector2i(10, rect.size.y)),
+			"has_point() with negative position and point located on bottom edge should return false.");
+}
+
+TEST_CASE("[Rect2i] Intersection") {
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).intersects(Rect2i(0, 300, 100, 100)),
+			"intersects() with fully enclosed Rect2i should return the expected result.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).intersects(Rect2i(1200, 700, 100, 100)),
+			"intersects() with partially enclosed Rect2i should return the expected result.");
+	CHECK_MESSAGE(
+			!Rect2i(0, 100, 1280, 720).intersects(Rect2i(-4000, -4000, 100, 100)),
+			"intersects() with non-enclosed Rect2i should return the expected result.");
+	CHECK_MESSAGE(
+			!Rect2i(0, 0, 2, 2).intersects(Rect2i(2, 2, 2, 2)),
+			"intersects() with adjacent Rect2i should return the expected result.");
+}
+
+TEST_CASE("[Rect2i] Merging") {
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).merge(Rect2i(0, 300, 100, 100)) == Rect2i(0, 100, 1280, 720),
+			"merge() with fully enclosed Rect2i should return the expected result.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).merge(Rect2i(1200, 700, 100, 100)) == Rect2i(0, 100, 1300, 720),
+			"merge() with partially enclosed Rect2i should return the expected result.");
+	CHECK_MESSAGE(
+			Rect2i(0, 100, 1280, 720).merge(Rect2i(-4000, -4000, 100, 100)) == Rect2i(-4000, -4000, 5280, 4820),
+			"merge() with non-enclosed Rect2i should return the expected result.");
+}
+} // namespace TestRect2i
+
+#endif // TEST_RECT2I_H
diff --git a/tests/core/math/test_vector2.h b/tests/core/math/test_vector2.h
index cb447acd17..ff60467bf4 100644
--- a/tests/core/math/test_vector2.h
+++ b/tests/core/math/test_vector2.h
@@ -32,6 +32,7 @@
 #define TEST_VECTOR2_H
 
 #include "core/math/vector2.h"
+#include "core/math/vector2i.h"
 #include "tests/test_macros.h"
 
 namespace TestVector2 {
diff --git a/tests/core/math/test_vector2i.h b/tests/core/math/test_vector2i.h
index 86e254654d..841bb793a4 100644
--- a/tests/core/math/test_vector2i.h
+++ b/tests/core/math/test_vector2i.h
@@ -32,6 +32,7 @@
 #define TEST_VECTOR2I_H
 
 #include "core/math/vector2.h"
+#include "core/math/vector2i.h"
 #include "tests/test_macros.h"
 
 namespace TestVector2i {
diff --git a/tests/test_main.cpp b/tests/test_main.cpp
index 0190fa5184..3b51a6d805 100644
--- a/tests/test_main.cpp
+++ b/tests/test_main.cpp
@@ -48,6 +48,7 @@
 #include "tests/core/math/test_math.h"
 #include "tests/core/math/test_random_number_generator.h"
 #include "tests/core/math/test_rect2.h"
+#include "tests/core/math/test_rect2i.h"
 #include "tests/core/math/test_vector2.h"
 #include "tests/core/math/test_vector2i.h"
 #include "tests/core/math/test_vector3.h"
diff --git a/thirdparty/README.md b/thirdparty/README.md
index e1f911a9f9..34c33c3b56 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -52,13 +52,13 @@ Includes some patches in the `patches` folder which have been sent upstream.
 
 ## cvtt
 
-- Upstream: https://github.com/elasota/cvtt
-- Version: 1.0.0-beta4 (cc8472a04ba110fe999c686d07af40f7839051fd, 2018)
+- Upstream: https://github.com/elasota/ConvectionKernels
+- Version: git (dc2dbbe0ae2cf2be06ef56d1021e2222a56c7fe2, 2021)
 - License: MIT
 
 Files extracted from upstream source:
 
-- all .cpp, .h, and .txt files in ConvectionKernels/
+- all .cpp, .h, and .txt files except the folders MakeTables and etc2packer.
 
 
 ## doctest
diff --git a/thirdparty/cvtt/ConvectionKernels.cpp b/thirdparty/cvtt/ConvectionKernels.cpp
deleted file mode 100644
index 8d379344e1..0000000000
--- a/thirdparty/cvtt/ConvectionKernels.cpp
+++ /dev/null
@@ -1,7586 +0,0 @@
-/*
-Convection Texture Tools
-Copyright (c) 2018 Eric Lasota
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject
-to the following conditions:
-
-The above copyright notice and this permission notice shall be included
-in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
--------------------------------------------------------------------------------------
-
-Portions based on DirectX Texture Library (DirectXTex)
-
-Copyright (c) Microsoft Corporation. All rights reserved.
-Licensed under the MIT License.
-
-http://go.microsoft.com/fwlink/?LinkId=248926
-*/
-#include "ConvectionKernels.h"
-#include "ConvectionKernels_BC7_SingleColor.h"
-
-#if (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(_M_X64) || defined(__SSE2__)
-#define CVTT_USE_SSE2
-#endif
-
-#ifdef CVTT_USE_SSE2
-#include <emmintrin.h>
-#endif
-
-#include <float.h>
-#include <assert.h>
-#include <string.h>
-#include <algorithm>
-#include <math.h>
-
-#define UNREFERENCED_PARAMETER(n) ((void)n)
-
-namespace cvtt
-{
-#ifdef CVTT_USE_SSE2
-    // SSE2 version
-    struct ParallelMath
-    {
-        typedef uint16_t ScalarUInt16;
-        typedef int16_t ScalarSInt16;
-
-        template<unsigned int TRoundingMode>
-        struct RoundForScope
-        {
-            unsigned int m_oldCSR;
-
-            RoundForScope()
-            {
-                m_oldCSR = _mm_getcsr();
-                _mm_setcsr((m_oldCSR & ~_MM_ROUND_MASK) | (TRoundingMode));
-            }
-
-            ~RoundForScope()
-            {
-                _mm_setcsr(m_oldCSR);
-            }
-        };
-
-        struct RoundTowardZeroForScope : RoundForScope<_MM_ROUND_TOWARD_ZERO>
-        {
-        };
-
-        struct RoundTowardNearestForScope : RoundForScope<_MM_ROUND_NEAREST>
-        {
-        };
-
-        struct RoundUpForScope : RoundForScope<_MM_ROUND_UP>
-        {
-        };
-
-        struct RoundDownForScope : RoundForScope<_MM_ROUND_DOWN>
-        {
-        };
-
-        static const int ParallelSize = 8;
-
-        enum Int16Subtype
-        {
-            IntSubtype_Signed,
-            IntSubtype_UnsignedFull,
-            IntSubtype_UnsignedTruncated,
-            IntSubtype_Abstract,
-        };
-
-        template<int TSubtype>
-        struct VInt16
-        {
-            __m128i m_value;
-
-            inline VInt16 operator+(int16_t other) const
-            {
-                VInt16 result;
-                result.m_value = _mm_add_epi16(m_value, _mm_set1_epi16(static_cast<int16_t>(other)));
-                return result;
-            }
-
-            inline VInt16 operator+(const VInt16 &other) const
-            {
-                VInt16 result;
-                result.m_value = _mm_add_epi16(m_value, other.m_value);
-                return result;
-            }
-
-            inline VInt16 operator|(const VInt16 &other) const
-            {
-                VInt16 result;
-                result.m_value = _mm_or_si128(m_value, other.m_value);
-                return result;
-            }
-
-            inline VInt16 operator&(const VInt16 &other) const
-            {
-                VInt16 result;
-                result.m_value = _mm_and_si128(m_value, other.m_value);
-                return result;
-            }
-
-            inline VInt16 operator-(const VInt16 &other) const
-            {
-                VInt16 result;
-                result.m_value = _mm_sub_epi16(m_value, other.m_value);
-                return result;
-            }
-
-            inline VInt16 operator<<(int bits) const
-            {
-                VInt16 result;
-                result.m_value = _mm_slli_epi16(m_value, bits);
-                return result;
-            }
-        };
-
-        typedef VInt16<IntSubtype_Signed> SInt16;
-        typedef VInt16<IntSubtype_UnsignedFull> UInt16;
-        typedef VInt16<IntSubtype_UnsignedTruncated> UInt15;
-        typedef VInt16<IntSubtype_Abstract> AInt16;
-
-        template<int TSubtype>
-        struct VInt32
-        {
-            __m128i m_values[2];
-
-            inline VInt32 operator+(const VInt32& other) const
-            {
-                VInt32 result;
-                result.m_values[0] = _mm_add_epi32(m_values[0], other.m_values[0]);
-                result.m_values[1] = _mm_add_epi32(m_values[1], other.m_values[1]);
-                return result;
-            }
-
-            inline VInt32 operator-(const VInt32& other) const
-            {
-                VInt32 result;
-                result.m_values[0] = _mm_sub_epi32(m_values[0], other.m_values[0]);
-                result.m_values[1] = _mm_sub_epi32(m_values[1], other.m_values[1]);
-                return result;
-            }
-
-            inline VInt32 operator<<(const int other) const
-            {
-                VInt32 result;
-                result.m_values[0] = _mm_slli_epi32(m_values[0], other);
-                result.m_values[1] = _mm_slli_epi32(m_values[1], other);
-                return result;
-            }
-        };
-
-        typedef VInt32<IntSubtype_Signed> SInt32;
-        typedef VInt32<IntSubtype_UnsignedTruncated> UInt31;
-        typedef VInt32<IntSubtype_UnsignedFull> UInt32;
-        typedef VInt32<IntSubtype_Abstract> AInt32;
-
-        template<class TTargetType>
-        struct LosslessCast
-        {
-#ifdef CVTT_PERMIT_ALIASING
-            template<int TSrcSubtype>
-            static const TTargetType& Cast(const VInt32<TSrcSubtype> &src)
-            {
-                return reinterpret_cast<VInt32<TSubtype>&>(src);
-            }
-
-            template<int TSrcSubtype>
-            static const TTargetType& Cast(const VInt16<TSrcSubtype> &src)
-            {
-                return reinterpret_cast<VInt16<TSubtype>&>(src);
-            }
-#else
-            template<int TSrcSubtype>
-            static TTargetType Cast(const VInt32<TSrcSubtype> &src)
-            {
-                TTargetType result;
-                result.m_values[0] = src.m_values[0];
-                result.m_values[1] = src.m_values[1];
-                return result;
-            }
-
-            template<int TSrcSubtype>
-            static TTargetType Cast(const VInt16<TSrcSubtype> &src)
-            {
-                TTargetType result;
-                result.m_value = src.m_value;
-                return result;
-            }
-#endif
-        };
-
-        struct Int64
-        {
-            __m128i m_values[4];
-        };
-
-        struct Float
-        {
-            __m128 m_values[2];
-
-            inline Float operator+(const Float &other) const
-            {
-                Float result;
-                result.m_values[0] = _mm_add_ps(m_values[0], other.m_values[0]);
-                result.m_values[1] = _mm_add_ps(m_values[1], other.m_values[1]);
-                return result;
-            }
-
-            inline Float operator+(float other) const
-            {
-                Float result;
-                result.m_values[0] = _mm_add_ps(m_values[0], _mm_set1_ps(other));
-                result.m_values[1] = _mm_add_ps(m_values[1], _mm_set1_ps(other));
-                return result;
-            }
-
-            inline Float operator-(const Float& other) const
-            {
-                Float result;
-                result.m_values[0] = _mm_sub_ps(m_values[0], other.m_values[0]);
-                result.m_values[1] = _mm_sub_ps(m_values[1], other.m_values[1]);
-                return result;
-            }
-
-            inline Float operator-() const
-            {
-                Float result;
-                result.m_values[0] = _mm_sub_ps(_mm_setzero_ps(), m_values[0]);
-                result.m_values[1] = _mm_sub_ps(_mm_setzero_ps(), m_values[1]);
-                return result;
-            }
-
-            inline Float operator*(const Float& other) const
-            {
-                Float result;
-                result.m_values[0] = _mm_mul_ps(m_values[0], other.m_values[0]);
-                result.m_values[1] = _mm_mul_ps(m_values[1], other.m_values[1]);
-                return result;
-            }
-
-            inline Float operator*(float other) const
-            {
-                Float result;
-                result.m_values[0] = _mm_mul_ps(m_values[0], _mm_set1_ps(other));
-                result.m_values[1] = _mm_mul_ps(m_values[1], _mm_set1_ps(other));
-                return result;
-            }
-
-            inline Float operator/(const Float &other) const
-            {
-                Float result;
-                result.m_values[0] = _mm_div_ps(m_values[0], other.m_values[0]);
-                result.m_values[1] = _mm_div_ps(m_values[1], other.m_values[1]);
-                return result;
-            }
-
-            inline Float operator/(float other) const
-            {
-                Float result;
-                result.m_values[0] = _mm_div_ps(m_values[0], _mm_set1_ps(other));
-                result.m_values[1] = _mm_div_ps(m_values[1], _mm_set1_ps(other));
-                return result;
-            }
-        };
-
-        struct Int16CompFlag
-        {
-            __m128i m_value;
-
-            inline Int16CompFlag operator&(const Int16CompFlag &other) const
-            {
-                Int16CompFlag result;
-                result.m_value = _mm_and_si128(m_value, other.m_value);
-                return result;
-            }
-
-            inline Int16CompFlag operator|(const Int16CompFlag &other) const
-            {
-                Int16CompFlag result;
-                result.m_value = _mm_or_si128(m_value, other.m_value);
-                return result;
-            }
-        };
-
-        struct FloatCompFlag
-        {
-            __m128 m_values[2];
-        };
-
-        template<int TSubtype>
-        static VInt16<TSubtype> AbstractAdd(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b)
-        {
-            VInt16<TSubtype> result;
-            result.m_value = _mm_add_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        template<int TSubtype>
-        static VInt16<TSubtype> AbstractSubtract(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b)
-        {
-            VInt16<TSubtype> result;
-            result.m_value = _mm_sub_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static Float Select(const FloatCompFlag &flag, const Float &a, const Float &b)
-        {
-            Float result;
-            for (int i = 0; i < 2; i++)
-                result.m_values[i] = _mm_or_ps(_mm_and_ps(flag.m_values[i], a.m_values[i]), _mm_andnot_ps(flag.m_values[i], b.m_values[i]));
-            return result;
-        }
-
-        template<int TSubtype>
-        static VInt16<TSubtype> Select(const Int16CompFlag &flag, const VInt16<TSubtype> &a, const VInt16<TSubtype> &b)
-        {
-            VInt16<TSubtype> result;
-            result.m_value = _mm_or_si128(_mm_and_si128(flag.m_value, a.m_value), _mm_andnot_si128(flag.m_value, b.m_value));
-            return result;
-        }
-
-        template<int TSubtype>
-        static VInt16<TSubtype> SelectOrZero(const Int16CompFlag &flag, const VInt16<TSubtype> &a)
-        {
-            VInt16<TSubtype> result;
-            result.m_value = _mm_and_si128(flag.m_value, a.m_value);
-            return result;
-        }
-
-        template<int TSubtype>
-        static void ConditionalSet(VInt16<TSubtype> &dest, const Int16CompFlag &flag, const VInt16<TSubtype> &src)
-        {
-            dest.m_value = _mm_or_si128(_mm_andnot_si128(flag.m_value, dest.m_value), _mm_and_si128(flag.m_value, src.m_value));
-        }
-
-        static SInt16 ConditionalNegate(const Int16CompFlag &flag, const SInt16 &v)
-        {
-            SInt16 result;
-            result.m_value = _mm_add_epi16(_mm_xor_si128(flag.m_value, v.m_value), _mm_srli_epi16(flag.m_value, 15));
-            return result;
-        }
-
-        template<int TSubtype>
-        static void NotConditionalSet(VInt16<TSubtype> &dest, const Int16CompFlag &flag, const VInt16<TSubtype> &src)
-        {
-            dest.m_value = _mm_or_si128(_mm_and_si128(flag.m_value, dest.m_value), _mm_andnot_si128(flag.m_value, src.m_value));
-        }
-
-        static void ConditionalSet(Float &dest, const FloatCompFlag &flag, const Float &src)
-        {
-            for (int i = 0; i < 2; i++)
-                dest.m_values[i] = _mm_or_ps(_mm_andnot_ps(flag.m_values[i], dest.m_values[i]), _mm_and_ps(flag.m_values[i], src.m_values[i]));
-        }
-
-        static void NotConditionalSet(Float &dest, const FloatCompFlag &flag, const Float &src)
-        {
-            for (int i = 0; i < 2; i++)
-                dest.m_values[i] = _mm_or_ps(_mm_and_ps(flag.m_values[i], dest.m_values[i]), _mm_andnot_ps(flag.m_values[i], src.m_values[i]));
-        }
-
-        static void MakeSafeDenominator(Float& v)
-        {
-            ConditionalSet(v, Equal(v, MakeFloatZero()), MakeFloat(1.0f));
-        }
-
-        static SInt16 TruncateToPrecisionSigned(const SInt16 &v, int precision)
-        {
-            int lostBits = 16 - precision;
-            if (lostBits == 0)
-                return v;
-
-            SInt16 result;
-            result.m_value = _mm_srai_epi16(_mm_slli_epi16(v.m_value, lostBits), lostBits);
-            return result;
-        }
-
-        static UInt16 TruncateToPrecisionUnsigned(const UInt16 &v, int precision)
-        {
-            int lostBits = 16 - precision;
-            if (lostBits == 0)
-                return v;
-
-            UInt16 result;
-            result.m_value = _mm_srli_epi16(_mm_slli_epi16(v.m_value, lostBits), lostBits);
-            return result;
-        }
-
-        static UInt16 Min(const UInt16 &a, const UInt16 &b)
-        {
-            __m128i bitFlip = _mm_set1_epi16(-32768);
-
-            UInt16 result;
-            result.m_value = _mm_xor_si128(_mm_min_epi16(_mm_xor_si128(a.m_value, bitFlip), _mm_xor_si128(b.m_value, bitFlip)), bitFlip);
-            return result;
-        }
-
-        static SInt16 Min(const SInt16 &a, const SInt16 &b)
-        {
-            SInt16 result;
-            result.m_value = _mm_min_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static UInt15 Min(const UInt15 &a, const UInt15 &b)
-        {
-            UInt15 result;
-            result.m_value = _mm_min_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static Float Min(const Float &a, const Float &b)
-        {
-            Float result;
-            for (int i = 0; i < 2; i++)
-                result.m_values[i] = _mm_min_ps(a.m_values[i], b.m_values[i]);
-            return result;
-        }
-
-        static UInt16 Max(const UInt16 &a, const UInt16 &b)
-        {
-            __m128i bitFlip = _mm_set1_epi16(-32768);
-
-            UInt16 result;
-            result.m_value = _mm_xor_si128(_mm_max_epi16(_mm_xor_si128(a.m_value, bitFlip), _mm_xor_si128(b.m_value, bitFlip)), bitFlip);
-            return result;
-        }
-
-        static SInt16 Max(const SInt16 &a, const SInt16 &b)
-        {
-            SInt16 result;
-            result.m_value = _mm_max_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static UInt15 Max(const UInt15 &a, const UInt15 &b)
-        {
-            UInt15 result;
-            result.m_value = _mm_max_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static Float Max(const Float &a, const Float &b)
-        {
-            Float result;
-            for (int i = 0; i < 2; i++)
-                result.m_values[i] = _mm_max_ps(a.m_values[i], b.m_values[i]);
-            return result;
-        }
-
-        static Float Clamp(const Float &v, float min, float max)
-        {
-            Float result;
-            for (int i = 0; i < 2; i++)
-                result.m_values[i] = _mm_max_ps(_mm_min_ps(v.m_values[i], _mm_set1_ps(max)), _mm_set1_ps(min));
-            return result;
-        }
-
-        static Float Reciprocal(const Float &v)
-        {
-            Float result;
-            for (int i = 0; i < 2; i++)
-                result.m_values[i] = _mm_rcp_ps(v.m_values[i]);
-            return result;
-        }
-
-        static void ConvertLDRInputs(const PixelBlockU8* inputBlocks, int pxOffset, int channel, UInt15 &chOut)
-        {
-            int16_t values[8];
-            for (int i = 0; i < 8; i++)
-                values[i] = inputBlocks[i].m_pixels[pxOffset][channel];
-
-            chOut.m_value = _mm_set_epi16(values[7], values[6], values[5], values[4], values[3], values[2], values[1], values[0]);
-        }
-
-        static void ConvertHDRInputs(const PixelBlockF16* inputBlocks, int pxOffset, int channel, SInt16 &chOut)
-        {
-            int16_t values[8];
-            for (int i = 0; i < 8; i++)
-                values[i] = inputBlocks[i].m_pixels[pxOffset][channel];
-
-            chOut.m_value = _mm_set_epi16(values[7], values[6], values[5], values[4], values[3], values[2], values[1], values[0]);
-        }
-
-        static Float MakeFloat(float v)
-        {
-            Float f;
-            f.m_values[0] = f.m_values[1] = _mm_set1_ps(v);
-            return f;
-        }
-
-        static Float MakeFloatZero()
-        {
-            Float f;
-            f.m_values[0] = f.m_values[1] = _mm_setzero_ps();
-            return f;
-        }
-
-        static UInt16 MakeUInt16(uint16_t v)
-        {
-            UInt16 result;
-            result.m_value = _mm_set1_epi16(static_cast<short>(v));
-            return result;
-        }
-
-        static SInt16 MakeSInt16(int16_t v)
-        {
-            SInt16 result;
-            result.m_value = _mm_set1_epi16(static_cast<short>(v));
-            return result;
-        }
-
-        static AInt16 MakeAInt16(int16_t v)
-        {
-            AInt16 result;
-            result.m_value = _mm_set1_epi16(static_cast<short>(v));
-            return result;
-        }
-
-        static UInt15 MakeUInt15(uint16_t v)
-        {
-            UInt15 result;
-            result.m_value = _mm_set1_epi16(static_cast<short>(v));
-            return result;
-        }
-
-        static SInt32 MakeSInt32(int32_t v)
-        {
-            SInt32 result;
-            result.m_values[0] = _mm_set1_epi32(v);
-            result.m_values[1] = _mm_set1_epi32(v);
-            return result;
-        }
-
-        static UInt31 MakeUInt31(uint32_t v)
-        {
-            UInt31 result;
-            result.m_values[0] = _mm_set1_epi32(v);
-            result.m_values[1] = _mm_set1_epi32(v);
-            return result;
-        }
-
-        static uint16_t Extract(const UInt16 &v, int offset)
-        {
-            return reinterpret_cast<const uint16_t*>(&v.m_value)[offset];
-        }
-
-        static int16_t Extract(const SInt16 &v, int offset)
-        {
-            return reinterpret_cast<const int16_t*>(&v.m_value)[offset];
-        }
-
-        static uint16_t Extract(const UInt15 &v, int offset)
-        {
-            return reinterpret_cast<const uint16_t*>(&v.m_value)[offset];
-        }
-
-        static int16_t Extract(const AInt16 &v, int offset)
-        {
-            return reinterpret_cast<const int16_t*>(&v.m_value)[offset];
-        }
-
-        static void PutUInt16(UInt16 &dest, int offset, uint16_t v)
-        {
-            reinterpret_cast<uint16_t*>(&dest)[offset] = v;
-        }
-
-        static void PutUInt15(UInt15 &dest, int offset, uint16_t v)
-        {
-            reinterpret_cast<uint16_t*>(&dest)[offset] = v;
-        }
-
-        static void PutSInt16(SInt16 &dest, int offset, int16_t v)
-        {
-            reinterpret_cast<int16_t*>(&dest)[offset] = v;
-        }
-
-        static float ExtractFloat(const Float& v, int offset)
-        {
-            return reinterpret_cast<const float*>(&v)[offset];
-        }
-
-        static void PutFloat(Float &dest, int offset, float v)
-        {
-            reinterpret_cast<float*>(&dest)[offset] = v;
-        }
-
-        static Int16CompFlag Less(const SInt16 &a, const SInt16 &b)
-        {
-            Int16CompFlag result;
-            result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static Int16CompFlag Less(const UInt15 &a, const UInt15 &b)
-        {
-            Int16CompFlag result;
-            result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static Int16CompFlag LessOrEqual(const UInt15 &a, const UInt15 &b)
-        {
-            Int16CompFlag result;
-            result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static FloatCompFlag Less(const Float &a, const Float &b)
-        {
-            FloatCompFlag result;
-            for (int i = 0; i < 2; i++)
-                result.m_values[i] = _mm_cmplt_ps(a.m_values[i], b.m_values[i]);
-            return result;
-        }
-
-        static FloatCompFlag LessOrEqual(const Float &a, const Float &b)
-        {
-            FloatCompFlag result;
-            for (int i = 0; i < 2; i++)
-                result.m_values[i] = _mm_cmple_ps(a.m_values[i], b.m_values[i]);
-            return result;
-        }
-
-        template<int TSubtype>
-        static Int16CompFlag Equal(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b)
-        {
-            Int16CompFlag result;
-            result.m_value = _mm_cmpeq_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static FloatCompFlag Equal(const Float &a, const Float &b)
-        {
-            FloatCompFlag result;
-            for (int i = 0; i < 2; i++)
-                result.m_values[i] = _mm_cmpeq_ps(a.m_values[i], b.m_values[i]);
-            return result;
-        }
-
-        static Float ToFloat(const UInt16 &v)
-        {
-            Float result;
-            result.m_values[0] = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v.m_value, _mm_setzero_si128()));
-            result.m_values[1] = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v.m_value, _mm_setzero_si128()));
-            return result;
-        }
-
-        static UInt31 ToUInt31(const UInt16 &v)
-        {
-            UInt31 result;
-            result.m_values[0] = _mm_unpacklo_epi16(v.m_value, _mm_setzero_si128());
-            result.m_values[1] = _mm_unpackhi_epi16(v.m_value, _mm_setzero_si128());
-            return result;
-        }
-
-        static SInt32 ToInt32(const UInt16 &v)
-        {
-            SInt32 result;
-            result.m_values[0] = _mm_unpacklo_epi16(v.m_value, _mm_setzero_si128());
-            result.m_values[1] = _mm_unpackhi_epi16(v.m_value, _mm_setzero_si128());
-            return result;
-        }
-
-        static SInt32 ToInt32(const SInt16 &v)
-        {
-            SInt32 result;
-            result.m_values[0] = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), v.m_value), 16);
-            result.m_values[1] = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), v.m_value), 16);
-            return result;
-        }
-
-        static Float ToFloat(const SInt16 &v)
-        {
-            Float result;
-            result.m_values[0] = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), v.m_value), 16));
-            result.m_values[1] = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), v.m_value), 16));
-            return result;
-        }
-
-        static Float ToFloat(const UInt15 &v)
-        {
-            Float result;
-            result.m_values[0] = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v.m_value, _mm_setzero_si128()));
-            result.m_values[1] = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v.m_value, _mm_setzero_si128()));
-            return result;
-        }
-
-        static Float ToFloat(const UInt31 &v)
-        {
-            Float result;
-            result.m_values[0] = _mm_cvtepi32_ps(v.m_values[0]);
-            result.m_values[1] = _mm_cvtepi32_ps(v.m_values[1]);
-            return result;
-        }
-
-        static Int16CompFlag FloatFlagToInt16(const FloatCompFlag &v)
-        {
-            __m128i lo = _mm_castps_si128(v.m_values[0]);
-            __m128i hi = _mm_castps_si128(v.m_values[1]);
-
-            Int16CompFlag result;
-            result.m_value = _mm_packs_epi32(lo, hi);
-            return result;
-        }
-
-        static FloatCompFlag Int16FlagToFloat(const Int16CompFlag &v)
-        {
-            __m128i lo = _mm_unpacklo_epi16(v.m_value, v.m_value);
-            __m128i hi = _mm_unpackhi_epi16(v.m_value, v.m_value);
-
-            FloatCompFlag result;
-            result.m_values[0] = _mm_castsi128_ps(lo);
-            result.m_values[1] = _mm_castsi128_ps(hi);
-            return result;
-        }
-
-        static Int16CompFlag MakeBoolInt16(bool b)
-        {
-            Int16CompFlag result;
-            if (b)
-                result.m_value = _mm_set1_epi16(-1);
-            else
-                result.m_value = _mm_setzero_si128();
-            return result;
-        }
-
-        static FloatCompFlag MakeBoolFloat(bool b)
-        {
-            FloatCompFlag result;
-            if (b)
-                result.m_values[0] = result.m_values[1] = _mm_castsi128_ps(_mm_set1_epi32(-1));
-            else
-                result.m_values[0] = result.m_values[1] = _mm_setzero_ps();
-            return result;
-        }
-
-        static Int16CompFlag AndNot(const Int16CompFlag &a, const Int16CompFlag &b)
-        {
-            Int16CompFlag result;
-            result.m_value = _mm_andnot_si128(b.m_value, a.m_value);
-            return result;
-        }
-
-        static UInt16 RoundAndConvertToU16(const Float &v, const void* /*roundingMode*/)
-        {
-            __m128i lo = _mm_cvtps_epi32(_mm_add_ps(v.m_values[0], _mm_set1_ps(-32768)));
-            __m128i hi = _mm_cvtps_epi32(_mm_add_ps(v.m_values[1], _mm_set1_ps(-32768)));
-
-            __m128i packed = _mm_packs_epi32(lo, hi);
-
-            UInt16 result;
-            result.m_value = _mm_xor_si128(packed, _mm_set1_epi16(-32768));
-            return result;
-        }
-
-        static UInt15 RoundAndConvertToU15(const Float &v, const void* /*roundingMode*/)
-        {
-            __m128i lo = _mm_cvtps_epi32(v.m_values[0]);
-            __m128i hi = _mm_cvtps_epi32(v.m_values[1]);
-
-            __m128i packed = _mm_packs_epi32(lo, hi);
-
-            UInt15 result;
-            result.m_value = _mm_packs_epi32(lo, hi);
-            return result;
-        }
-
-        static SInt16 RoundAndConvertToS16(const Float &v, const void* /*roundingMode*/)
-        {
-            __m128i lo = _mm_cvtps_epi32(v.m_values[0]);
-            __m128i hi = _mm_cvtps_epi32(v.m_values[1]);
-
-            __m128i packed = _mm_packs_epi32(lo, hi);
-
-            SInt16 result;
-            result.m_value = _mm_packs_epi32(lo, hi);
-            return result;
-        }
-
-        static Float Sqrt(const Float &f)
-        {
-            Float result;
-            for (int i = 0; i < 2; i++)
-                result.m_values[i] = _mm_sqrt_ps(f.m_values[i]);
-            return result;
-        }
-
-        static UInt16 Abs(const SInt16 &a)
-        {
-            __m128i signBitsXor = _mm_srai_epi16(a.m_value, 15);
-            __m128i signBitsAdd = _mm_srli_epi16(a.m_value, 15);
-
-            UInt16 result;
-            result.m_value = _mm_add_epi16(_mm_xor_si128(a.m_value, signBitsXor), signBitsAdd);
-            return result;
-        }
-
-        static Float Abs(const Float& a)
-        {
-            __m128 invMask = _mm_set1_ps(-0.0f);
-
-            Float result;
-            result.m_values[0] = _mm_andnot_ps(invMask, a.m_values[0]);
-            result.m_values[1] = _mm_andnot_ps(invMask, a.m_values[1]);
-            return result;
-        }
-
-        static UInt16 SqDiffUInt8(const UInt15 &a, const UInt15 &b)
-        {
-            __m128i diff = _mm_sub_epi16(a.m_value, b.m_value);
-
-            UInt16 result;
-            result.m_value = _mm_mullo_epi16(diff, diff);
-            return result;
-        }
-
-        static Float SqDiffSInt16(const SInt16 &a, const SInt16 &b)
-        {
-            __m128i diffU = _mm_sub_epi16(_mm_max_epi16(a.m_value, b.m_value), _mm_min_epi16(a.m_value, b.m_value));
-
-            __m128i mulHi = _mm_mulhi_epu16(diffU, diffU);
-            __m128i mulLo = _mm_mullo_epi16(diffU, diffU);
-            __m128i sqDiffHi = _mm_unpackhi_epi16(mulLo, mulHi);
-            __m128i sqDiffLo = _mm_unpacklo_epi16(mulLo, mulHi);
-
-            Float result;
-            result.m_values[0] = _mm_cvtepi32_ps(sqDiffLo);
-            result.m_values[1] = _mm_cvtepi32_ps(sqDiffHi);
-
-            return result;
-        }
-
-        static Float TwosCLHalfToFloat(const SInt16 &v)
-        {
-            __m128i absV = _mm_add_epi16(_mm_xor_si128(v.m_value, _mm_srai_epi16(v.m_value, 15)), _mm_srli_epi16(v.m_value, 15));
-
-            __m128i signBits = _mm_and_si128(v.m_value, _mm_set1_epi16(-32768));
-            __m128i mantissa = _mm_and_si128(v.m_value, _mm_set1_epi16(0x03ff));
-            __m128i exponent = _mm_and_si128(v.m_value, _mm_set1_epi16(0x7c00));
-
-            __m128i isDenormal = _mm_cmpeq_epi16(exponent, _mm_setzero_si128());
-
-            // Convert exponent to high-bits 
-            exponent = _mm_add_epi16(_mm_srli_epi16(exponent, 3), _mm_set1_epi16(14336));
-
-            __m128i denormalCorrectionHigh = _mm_and_si128(isDenormal, _mm_or_si128(signBits, _mm_set1_epi16(14336)));
-
-            __m128i highBits = _mm_or_si128(signBits, _mm_or_si128(exponent, _mm_srli_epi16(mantissa, 3)));
-            __m128i lowBits = _mm_slli_epi16(mantissa, 13);
-
-            __m128i flow = _mm_unpacklo_epi16(lowBits, highBits);
-            __m128i fhigh = _mm_unpackhi_epi16(lowBits, highBits);
-
-            __m128i correctionLow = _mm_unpacklo_epi16(_mm_setzero_si128(), denormalCorrectionHigh);
-            __m128i correctionHigh = _mm_unpackhi_epi16(_mm_setzero_si128(), denormalCorrectionHigh);
-
-            Float result;
-            result.m_values[0] = _mm_sub_ps(_mm_castsi128_ps(flow), _mm_castsi128_ps(correctionLow));
-            result.m_values[1] = _mm_sub_ps(_mm_castsi128_ps(fhigh), _mm_castsi128_ps(correctionHigh));
-
-            return result;
-        }
-
-        static Float SqDiff2CLFloat(const SInt16 &a, const Float &b)
-        {
-            Float fa = TwosCLHalfToFloat(a);
-
-            Float diff = fa - b;
-            return diff * diff;
-        }
-
-        static Float SqDiff2CL(const SInt16 &a, const SInt16 &b)
-        {
-            Float fa = TwosCLHalfToFloat(a);
-            Float fb = TwosCLHalfToFloat(b);
-
-            Float diff = fa - fb;
-            return diff * diff;
-        }
-
-        static Float SqDiff2CLFloat(const SInt16 &a, float aWeight, const Float &b)
-        {
-            Float fa = TwosCLHalfToFloat(a) * aWeight;
-
-            Float diff = fa - b;
-            return diff * diff;
-        }
-
-        static UInt16 RightShift(const UInt16 &v, int bits)
-        {
-            UInt16 result;
-            result.m_value = _mm_srli_epi16(v.m_value, bits);
-            return result;
-        }
-
-        static UInt31 RightShift(const UInt31 &v, int bits)
-        {
-            UInt31 result;
-            result.m_values[0] = _mm_srli_epi32(v.m_values[0], bits);
-            result.m_values[1] = _mm_srli_epi32(v.m_values[1], bits);
-            return result;
-        }
-
-        static SInt16 RightShift(const SInt16 &v, int bits)
-        {
-            SInt16 result;
-            result.m_value = _mm_srai_epi16(v.m_value, bits);
-            return result;
-        }
-
-        static UInt15 RightShift(const UInt15 &v, int bits)
-        {
-            UInt15 result;
-            result.m_value = _mm_srli_epi16(v.m_value, bits);
-            return result;
-        }
-
-        static SInt32 RightShift(const SInt32 &v, int bits)
-        {
-            SInt32 result;
-            result.m_values[0] = _mm_srai_epi32(v.m_values[0], bits);
-            result.m_values[1] = _mm_srai_epi32(v.m_values[1], bits);
-            return result;
-        }
-
-        static SInt16 ToSInt16(const SInt32 &v)
-        {
-            SInt16 result;
-            result.m_value = _mm_packs_epi32(v.m_values[0], v.m_values[1]);
-            return result;
-        }
-
-        static UInt16 ToUInt16(const UInt32 &v)
-        {
-            __m128i low = _mm_srai_epi32(_mm_slli_epi32(v.m_values[0], 16), 16);
-            __m128i high = _mm_srai_epi32(_mm_slli_epi32(v.m_values[1], 16), 16);
-
-            UInt16 result;
-            result.m_value = _mm_packs_epi32(low, high);
-            return result;
-        }
-
-        static UInt16 ToUInt16(const UInt31 &v)
-        {
-            __m128i low = _mm_srai_epi32(_mm_slli_epi32(v.m_values[0], 16), 16);
-            __m128i high = _mm_srai_epi32(_mm_slli_epi32(v.m_values[1], 16), 16);
-
-            UInt16 result;
-            result.m_value = _mm_packs_epi32(low, high);
-            return result;
-        }
-
-        static UInt15 ToUInt15(const UInt31 &v)
-        {
-            UInt15 result;
-            result.m_value = _mm_packs_epi32(v.m_values[0], v.m_values[1]);
-            return result;
-        }
-
-        static SInt32 XMultiply(const SInt16 &a, const SInt16 &b)
-        {
-            __m128i high = _mm_mulhi_epi16(a.m_value, b.m_value);
-            __m128i low = _mm_mullo_epi16(a.m_value, b.m_value);
-
-            SInt32 result;
-            result.m_values[0] = _mm_unpacklo_epi16(low, high);
-            result.m_values[1] = _mm_unpackhi_epi16(low, high);
-            return result;
-        }
-
-        static SInt32 XMultiply(const SInt16 &a, const UInt15 &b)
-        {
-            __m128i high = _mm_mulhi_epi16(a.m_value, b.m_value);
-            __m128i low = _mm_mullo_epi16(a.m_value, b.m_value);
-
-            SInt32 result;
-            result.m_values[0] = _mm_unpacklo_epi16(low, high);
-            result.m_values[1] = _mm_unpackhi_epi16(low, high);
-            return result;
-        }
-
-        static SInt32 XMultiply(const UInt15 &a, const SInt16 &b)
-        {
-            return XMultiply(b, a);
-        }
-
-        static UInt32 XMultiply(const UInt16 &a, const UInt16 &b)
-        {
-            __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value);
-            __m128i low = _mm_mullo_epi16(a.m_value, b.m_value);
-
-            UInt32 result;
-            result.m_values[0] = _mm_unpacklo_epi16(low, high);
-            result.m_values[1] = _mm_unpackhi_epi16(low, high);
-            return result;
-        }
-
-        static UInt16 CompactMultiply(const UInt16 &a, const UInt15 &b)
-        {
-            UInt16 result;
-            result.m_value = _mm_mullo_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static UInt16 CompactMultiply(const UInt15 &a, const UInt15 &b)
-        {
-            UInt16 result;
-            result.m_value = _mm_mullo_epi16(a.m_value, b.m_value);
-            return result;
-        }
-
-        static UInt31 XMultiply(const UInt15 &a, const UInt15 &b)
-        {
-            __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value);
-            __m128i low = _mm_mullo_epi16(a.m_value, b.m_value);
-
-            UInt31 result;
-            result.m_values[0] = _mm_unpacklo_epi16(low, high);
-            result.m_values[1] = _mm_unpackhi_epi16(low, high);
-            return result;
-        }
-
-        static UInt31 XMultiply(const UInt16 &a, const UInt15 &b)
-        {
-            __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value);
-            __m128i low = _mm_mullo_epi16(a.m_value, b.m_value);
-
-            UInt31 result;
-            result.m_values[0] = _mm_unpacklo_epi16(low, high);
-            result.m_values[1] = _mm_unpackhi_epi16(low, high);
-            return result;
-        }
-
-        static UInt31 XMultiply(const UInt15 &a, const UInt16 &b)
-        {
-            return XMultiply(b, a);
-        }
-
-        static bool AnySet(const Int16CompFlag &v)
-        {
-            return _mm_movemask_epi8(v.m_value) != 0;
-        }
-
-        static bool AllSet(const Int16CompFlag &v)
-        {
-            return _mm_movemask_epi8(v.m_value) == 0xffff;
-        }
-
-        static bool AnySet(const FloatCompFlag &v)
-        {
-            return _mm_movemask_ps(v.m_values[0]) != 0 || _mm_movemask_ps(v.m_values[1]) != 0;
-        }
-
-        static bool AllSet(const FloatCompFlag &v)
-        {
-            return _mm_movemask_ps(v.m_values[0]) == 0xf && _mm_movemask_ps(v.m_values[1]) == 0xf;
-        }
-    };
-
-#else
-    // Scalar version
-    struct ParallelMath
-    {
-        struct RoundTowardZeroForScope
-        {
-        };
-
-        struct RoundTowardNearestForScope
-        {
-        };
-
-        struct RoundUpForScope
-        {
-        };
-
-        struct RoundDownForScope
-        {
-        };
-
-        static const int ParallelSize = 1;
-
-        enum Int16Subtype
-        {
-            IntSubtype_Signed,
-            IntSubtype_UnsignedFull,
-            IntSubtype_UnsignedTruncated,
-            IntSubtype_Abstract,
-        };
-
-        typedef int32_t SInt16;
-        typedef int32_t UInt15;
-        typedef int32_t UInt16;
-        typedef int32_t AInt16;
-
-        typedef int32_t SInt32;
-        typedef int32_t UInt31;
-        typedef int32_t UInt32;
-        typedef int32_t AInt32;
-
-        typedef int32_t ScalarUInt16;
-        typedef int32_t ScalarSInt16;
-
-        typedef float Float;
-
-        template<class TTargetType>
-        struct LosslessCast
-        {
-            static const int32_t& Cast(const int32_t &src)
-            {
-                return src;
-            }
-        };
-
-        typedef bool Int16CompFlag;
-        typedef bool FloatCompFlag;
-
-        static int32_t AbstractAdd(const int32_t &a, const int32_t &b)
-        {
-            return a + b;
-        }
-
-        static int32_t AbstractSubtract(const int32_t &a, const int32_t &b)
-        {
-            return a - b;
-        }
-
-        static float Select(bool flag, float a, float b)
-        {
-            return flag ? a : b;
-        }
-
-        static int32_t Select(bool flag, int32_t a, int32_t b)
-        {
-            return flag ? a : b;
-        }
-
-        static int32_t SelectOrZero(bool flag, int32_t a)
-        {
-            return flag ? a : 0;
-        }
-
-        static void ConditionalSet(int32_t& dest, bool flag, int32_t src)
-        {
-            if (flag)
-                dest = src;
-        }
-
-        static int32_t ConditionalNegate(bool flag, int32_t v)
-        {
-            return (flag) ? -v : v;
-        }
-
-        static void NotConditionalSet(int32_t& dest, bool flag, int32_t src)
-        {
-            if (!flag)
-                dest = src;
-        }
-
-        static void ConditionalSet(float& dest, bool flag, float src)
-        {
-            if (flag)
-                dest = src;
-        }
-
-        static void NotConditionalSet(float& dest, bool flag, float src)
-        {
-            if (!flag)
-                dest = src;
-        }
-
-        static void MakeSafeDenominator(float& v)
-        {
-            if (v == 0.0f)
-                v = 1.0f;
-        }
-
-        static int32_t SignedRightShift(int32_t v, int bits)
-        {
-            return v >> bits;
-        }
-
-        static int32_t TruncateToPrecisionSigned(int32_t v, int precision)
-        {
-            v = (v << (32 - precision)) & 0xffffffff;
-            return SignedRightShift(v, 32 - precision);
-        }
-
-        static int32_t TruncateToPrecisionUnsigned(int32_t v, int precision)
-        {
-            return v & ((1 << precision) - 1);
-        }
-
-        static int32_t Min(int32_t a, int32_t b)
-        {
-            if (a < b)
-                return a;
-            return b;
-        }
-
-        static float Min(float a, float b)
-        {
-            if (a < b)
-                return a;
-            return b;
-        }
-
-        static int32_t Max(int32_t a, int32_t b)
-        {
-            if (a > b)
-                return a;
-            return b;
-        }
-
-        static float Max(float a, float b)
-        {
-            if (a > b)
-                return a;
-            return b;
-        }
-
-        static float Abs(float a)
-        {
-            return fabsf(a);
-        }
-
-        static int32_t Abs(int32_t a)
-        {
-            if (a < 0)
-                return -a;
-            return a;
-        }
-
-        static float Clamp(float v, float min, float max)
-        {
-            if (v < min)
-                return min;
-            if (v > max)
-                return max;
-            return v;
-        }
-
-        static float Reciprocal(float v)
-        {
-            return 1.0f / v;
-        }
-
-        static void ConvertLDRInputs(const PixelBlockU8* inputBlocks, int pxOffset, int channel, int32_t& chOut)
-        {
-            chOut = inputBlocks[0].m_pixels[pxOffset][channel];
-        }
-
-        static void ConvertHDRInputs(const PixelBlockF16* inputBlocks, int pxOffset, int channel, int32_t& chOut)
-        {
-            chOut = inputBlocks[0].m_pixels[pxOffset][channel];
-        }
-
-        static float MakeFloat(float v)
-        {
-            return v;
-        }
-
-        static float MakeFloatZero()
-        {
-            return 0.0f;
-        }
-
-        static int32_t MakeUInt16(uint16_t v)
-        {
-            return v;
-        }
-
-        static int32_t MakeSInt16(int16_t v)
-        {
-            return v;
-        }
-
-        static int32_t MakeAInt16(int16_t v)
-        {
-            return v;
-        }
-
-        static int32_t MakeUInt15(uint16_t v)
-        {
-            return v;
-        }
-
-        static int32_t MakeSInt32(int32_t v)
-        {
-            return v;
-        }
-
-        static int32_t MakeUInt31(int32_t v)
-        {
-            return v;
-        }
-
-        static int32_t Extract(int32_t v, int offset)
-        {
-            UNREFERENCED_PARAMETER(offset);
-            return v;
-        }
-
-        static void PutUInt16(int32_t &dest, int offset, ParallelMath::ScalarUInt16 v)
-        {
-            UNREFERENCED_PARAMETER(offset);
-            dest = v;
-        }
-
-        static void PutUInt15(int32_t &dest, int offset, ParallelMath::ScalarUInt16 v)
-        {
-            UNREFERENCED_PARAMETER(offset);
-            dest = v;
-        }
-
-        static void PutSInt16(int32_t &dest, int offset, ParallelMath::ScalarSInt16 v)
-        {
-            UNREFERENCED_PARAMETER(offset);
-            dest = v;
-        }
-
-        static float ExtractFloat(float v, int offset)
-        {
-            UNREFERENCED_PARAMETER(offset);
-            return v;
-        }
-
-        static void PutFloat(float &dest, int offset, float v)
-        {
-            UNREFERENCED_PARAMETER(offset);
-            dest = v;
-        }
-
-        static bool Less(int32_t a, int32_t b)
-        {
-            return a < b;
-        }
-
-        static bool Less(float a, float b)
-        {
-            return a < b;
-        }
-
-        static bool LessOrEqual(int32_t a, int32_t b)
-        {
-            return a < b;
-        }
-
-        static bool LessOrEqual(float a, float b)
-        {
-            return a < b;
-        }
-
-        static bool Equal(int32_t a, int32_t b)
-        {
-            return a == b;
-        }
-
-        static bool Equal(float a, float b)
-        {
-            return a == b;
-        }
-
-        static float ToFloat(int32_t v)
-        {
-            return static_cast<float>(v);
-        }
-
-        static int32_t ToUInt31(int32_t v)
-        {
-            return v;
-        }
-
-        static int32_t ToInt32(int32_t v)
-        {
-            return v;
-        }
-
-        static bool FloatFlagToInt16(bool v)
-        {
-            return v;
-        }
-
-        static bool Int16FlagToFloat(bool v)
-        {
-            return v;
-        }
-
-        static bool MakeBoolInt16(bool b)
-        {
-            return b;
-        }
-
-        static bool MakeBoolFloat(bool b)
-        {
-            return b;
-        }
-
-        static bool AndNot(bool a, bool b)
-        {
-            return a && !b;
-        }
-
-        static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundTowardZeroForScope *rtz)
-        {
-            UNREFERENCED_PARAMETER(rtz);
-            return static_cast<int>(v);
-        }
-
-        static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundUpForScope *ru)
-        {
-            UNREFERENCED_PARAMETER(ru);
-            return static_cast<int>(ceilf(v));
-        }
-
-        static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundDownForScope *rd)
-        {
-            UNREFERENCED_PARAMETER(rd);
-            return static_cast<int>(floorf(v));
-        }
-
-        static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundTowardNearestForScope *rtn)
-        {
-            UNREFERENCED_PARAMETER(rtn);
-            return static_cast<int>(floorf(v + 0.5f));
-        }
-
-        template<class TRoundMode>
-        static int32_t RoundAndConvertToU16(float v, const TRoundMode *roundingMode)
-        {
-            return RoundAndConvertToInt(v, roundingMode);
-        }
-
-        template<class TRoundMode>
-        static int32_t RoundAndConvertToU15(float v, const TRoundMode *roundingMode)
-        {
-            return RoundAndConvertToInt(v, roundingMode);
-        }
-
-        template<class TRoundMode>
-        static int32_t RoundAndConvertToS16(float v, const TRoundMode *roundingMode)
-        {
-            return RoundAndConvertToInt(v, roundingMode);
-        }
-
-        static float Sqrt(float f)
-        {
-            return sqrtf(f);
-        }
-
-        static int32_t SqDiffUInt8(int32_t a, int32_t b)
-        {
-            int32_t delta = a - b;
-            return delta * delta;
-        }
-
-        static int32_t SqDiffInt16(int32_t a, int32_t b)
-        {
-            int32_t delta = a - b;
-            return delta * delta;
-        }
-
-        static int32_t SqDiffSInt16(int32_t a, int32_t b)
-        {
-            int32_t delta = a - b;
-            return delta * delta;
-        }
-
-        static float TwosCLHalfToFloat(int32_t v)
-        {
-            int32_t absV = (v < 0) ? -v : v;
-
-            int32_t signBits = (absV & -32768);
-            int32_t mantissa = (absV & 0x03ff);
-            int32_t exponent = (absV & 0x7c00);
-
-            bool isDenormal = (exponent == 0);
-
-            // Convert exponent to high-bits
-            exponent = (exponent >> 3) + 14336;
-
-            int32_t denormalCorrection = (isDenormal ? (signBits | 14336) : 0) << 16;
-
-            int32_t fBits = ((exponent | signBits) << 16) | (mantissa << 13);
-
-            float f, correction;
-            memcpy(&f, &fBits, 4);
-            memcpy(&correction, &denormalCorrection, 4);
-
-            return f - correction;
-        }
-
-        static Float SqDiff2CLFloat(const SInt16 &a, const Float &b)
-        {
-            Float fa = TwosCLHalfToFloat(a);
-
-            Float diff = fa - b;
-            return diff * diff;
-        }
-
-        static Float SqDiff2CL(const SInt16 &a, const SInt16 &b)
-        {
-            Float fa = TwosCLHalfToFloat(a);
-            Float fb = TwosCLHalfToFloat(b);
-
-            Float diff = fa - fb;
-            return diff * diff;
-        }
-
-        static Float SqDiff2CLFloat(const SInt16 &a, float aWeight, const Float &b)
-        {
-            Float fa = TwosCLHalfToFloat(a) * aWeight;
-
-            Float diff = fa - b;
-            return diff * diff;
-        }
-
-        static int32_t RightShift(int32_t v, int bits)
-        {
-            return SignedRightShift(v, bits);
-        }
-
-        static int32_t ToSInt16(int32_t v)
-        {
-            return v;
-        }
-
-        static int32_t ToUInt16(int32_t v)
-        {
-            return v;
-        }
-
-        static int32_t ToUInt15(int32_t v)
-        {
-            return v;
-        }
-
-        static int32_t XMultiply(int32_t a, int32_t b)
-        {
-            return a * b;
-        }
-
-        static int32_t CompactMultiply(int32_t a, int32_t b)
-        {
-            return a * b;
-        }
-
-        static bool AnySet(bool v)
-        {
-            return v;
-        }
-
-        static bool AllSet(bool v)
-        {
-            return v;
-        }
-    };
-
-#endif
-
-    namespace Internal
-    {
-        namespace BC7Data
-        {
-            enum AlphaMode
-            {
-                AlphaMode_Combined,
-                AlphaMode_Separate,
-                AlphaMode_None,
-            };
-
-            enum PBitMode
-            {
-                PBitMode_PerEndpoint,
-                PBitMode_PerSubset,
-                PBitMode_None
-            };
-
-            struct BC7ModeInfo
-            {
-                PBitMode m_pBitMode;
-                AlphaMode m_alphaMode;
-                int m_rgbBits;
-                int m_alphaBits;
-                int m_partitionBits;
-                int m_numSubsets;
-                int m_indexBits;
-                int m_alphaIndexBits;
-                bool m_hasIndexSelector;
-            };
-
-            BC7ModeInfo g_modes[] =
-            {
-                { PBitMode_PerEndpoint, AlphaMode_None, 4, 0, 4, 3, 3, 0, false },     // 0
-                { PBitMode_PerSubset, AlphaMode_None, 6, 0, 6, 2, 3, 0, false },       // 1
-                { PBitMode_None, AlphaMode_None, 5, 0, 6, 3, 2, 0, false },            // 2
-                { PBitMode_PerEndpoint, AlphaMode_None, 7, 0, 6, 2, 2, 0, false },     // 3 (Mode reference has an error, P-bit is really per-endpoint)
-
-                { PBitMode_None, AlphaMode_Separate, 5, 6, 0, 1, 2, 3, true },         // 4
-                { PBitMode_None, AlphaMode_Separate, 7, 8, 0, 1, 2, 2, false },        // 5
-                { PBitMode_PerEndpoint, AlphaMode_Combined, 7, 7, 0, 1, 4, 0, false }, // 6
-                { PBitMode_PerEndpoint, AlphaMode_Combined, 5, 5, 6, 2, 2, 0, false }  // 7
-            };
-
-			const int g_weight2[] = { 0, 21, 43, 64 };
-			const int g_weight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
-			const int g_weight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
-
-			const int *g_weightTables[] =
-			{
-				NULL,
-				NULL,
-				g_weight2,
-				g_weight3,
-				g_weight4
-			};
-
-            struct BC6HModeInfo
-            {
-                uint16_t m_modeID;
-                bool m_partitioned;
-                bool m_transformed;
-                int m_aPrec;
-                int m_bPrec[3];
-            };
-
-            // [partitioned][precision]
-            bool g_hdrModesExistForPrecision[2][17] =
-            {
-                //0      1      2      3      4      5      6      7      8      9      10     11     12     13     14     15     16
-                { false, false, false, false, false, false, false, false, false, false, true,  true,  true,  false, false, false, true },
-                { false, false, false, false, false, false, true,  true,  true,  true,  true,  true,  false, false, false, false, false },
-            };
-
-            BC6HModeInfo g_hdrModes[] =
-            {
-                { 0x00, true,  true,  10,{ 5, 5, 5 } },
-                { 0x01, true,  true,  7,{ 6, 6, 6 } },
-                { 0x02, true,  true,  11,{ 5, 4, 4 } },
-                { 0x06, true,  true,  11,{ 4, 5, 4 } },
-                { 0x0a, true,  true,  11,{ 4, 4, 5 } },
-                { 0x0e, true,  true,  9,{ 5, 5, 5 } },
-                { 0x12, true,  true,  8,{ 6, 5, 5 } },
-                { 0x16, true,  true,  8,{ 5, 6, 5 } },
-                { 0x1a, true,  true,  8,{ 5, 5, 6 } },
-                { 0x1e, true,  false, 6,{ 6, 6, 6 } },
-                { 0x03, false, false, 10,{ 10, 10, 10 } },
-                { 0x07, false, true,  11,{ 9, 9, 9 } },
-                { 0x0b, false, true,  12,{ 8, 8, 8 } },
-                { 0x0f, false, true,  16,{ 4, 4, 4 } },
-            };
-
-            const int g_maxHDRPrecision = 16;
-
-            static const size_t g_numHDRModes = sizeof(g_hdrModes) / sizeof(g_hdrModes[0]);
-
-            static uint16_t g_partitionMap[64] =
-            {
-                0xCCCC, 0x8888, 0xEEEE, 0xECC8,
-                0xC880, 0xFEEC, 0xFEC8, 0xEC80,
-                0xC800, 0xFFEC, 0xFE80, 0xE800,
-                0xFFE8, 0xFF00, 0xFFF0, 0xF000,
-                0xF710, 0x008E, 0x7100, 0x08CE,
-                0x008C, 0x7310, 0x3100, 0x8CCE,
-                0x088C, 0x3110, 0x6666, 0x366C,
-                0x17E8, 0x0FF0, 0x718E, 0x399C,
-                0xaaaa, 0xf0f0, 0x5a5a, 0x33cc,
-                0x3c3c, 0x55aa, 0x9696, 0xa55a,
-                0x73ce, 0x13c8, 0x324c, 0x3bdc,
-                0x6996, 0xc33c, 0x9966, 0x660,
-                0x272, 0x4e4, 0x4e40, 0x2720,
-                0xc936, 0x936c, 0x39c6, 0x639c,
-                0x9336, 0x9cc6, 0x817e, 0xe718,
-                0xccf0, 0xfcc, 0x7744, 0xee22,
-            };
-
-            static uint32_t g_partitionMap2[64] =
-            {
-                0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8,
-                0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050,
-                0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090,
-                0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250,
-                0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0,
-                0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500,
-                0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400,
-                0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200,
-                0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424,
-                0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50,
-                0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0,
-                0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600,
-                0xaa444444, 0x54a854a8, 0x95809580, 0x96969600,
-                0xa85454a8, 0x80959580, 0xaa141414, 0x96960000,
-                0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000,
-                0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254,
-            };
-
-            static int g_fixupIndexes2[64] =
-            {
-                15,15,15,15,
-                15,15,15,15,
-                15,15,15,15,
-                15,15,15,15,
-                15, 2, 8, 2,
-                2, 8, 8,15,
-                2, 8, 2, 2,
-                8, 8, 2, 2,
-
-                15,15, 6, 8,
-                2, 8,15,15,
-                2, 8, 2, 2,
-                2,15,15, 6,
-                6, 2, 6, 8,
-                15,15, 2, 2,
-                15,15,15,15,
-                15, 2, 2,15,
-            };
-
-            static int g_fixupIndexes3[64][2] =
-            {
-                { 3,15 },{ 3, 8 },{ 15, 8 },{ 15, 3 },
-                { 8,15 },{ 3,15 },{ 15, 3 },{ 15, 8 },
-                { 8,15 },{ 8,15 },{ 6,15 },{ 6,15 },
-                { 6,15 },{ 5,15 },{ 3,15 },{ 3, 8 },
-                { 3,15 },{ 3, 8 },{ 8,15 },{ 15, 3 },
-                { 3,15 },{ 3, 8 },{ 6,15 },{ 10, 8 },
-                { 5, 3 },{ 8,15 },{ 8, 6 },{ 6,10 },
-                { 8,15 },{ 5,15 },{ 15,10 },{ 15, 8 },
-
-                { 8,15 },{ 15, 3 },{ 3,15 },{ 5,10 },
-                { 6,10 },{ 10, 8 },{ 8, 9 },{ 15,10 },
-                { 15, 6 },{ 3,15 },{ 15, 8 },{ 5,15 },
-                { 15, 3 },{ 15, 6 },{ 15, 6 },{ 15, 8 },
-                { 3,15 },{ 15, 3 },{ 5,15 },{ 5,15 },
-                { 5,15 },{ 8,15 },{ 5,15 },{ 10,15 },
-                { 5,15 },{ 10,15 },{ 8,15 },{ 13,15 },
-                { 15, 3 },{ 12,15 },{ 3,15 },{ 3, 8 },
-            };
-
-            static const unsigned char g_fragments[] =
-            {
-                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  // 0, 16
-                0, 1, 2, 3,  // 16, 4
-                0, 1, 4,  // 20, 3
-                0, 1, 2, 4,  // 23, 4
-                2, 3, 7,  // 27, 3
-                1, 2, 3, 7,  // 30, 4
-                0, 1, 2, 3, 4, 5, 6, 7,  // 34, 8
-                0, 1, 4, 8,  // 42, 4
-                0, 1, 2, 4, 5, 8,  // 46, 6
-                0, 1, 2, 3, 4, 5, 6, 8,  // 52, 8
-                1, 4, 5, 6, 9,  // 60, 5
-                2, 5, 6, 7, 10,  // 65, 5
-                5, 6, 9, 10,  // 70, 4
-                2, 3, 7, 11,  // 74, 4
-                1, 2, 3, 6, 7, 11,  // 78, 6
-                0, 1, 2, 3, 5, 6, 7, 11,  // 84, 8
-                0, 1, 2, 3, 8, 9, 10, 11,  // 92, 8
-                2, 3, 6, 7, 8, 9, 10, 11,  // 100, 8
-                4, 5, 6, 7, 8, 9, 10, 11,  // 108, 8
-                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,  // 116, 12
-                0, 4, 8, 12,  // 128, 4
-                0, 2, 3, 4, 6, 7, 8, 12,  // 132, 8
-                0, 1, 2, 4, 5, 8, 9, 12,  // 140, 8
-                0, 1, 2, 3, 4, 5, 6, 8, 9, 12,  // 148, 10
-                3, 6, 7, 8, 9, 12,  // 158, 6
-                3, 5, 6, 7, 8, 9, 10, 12,  // 164, 8
-                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12,  // 172, 12
-                0, 1, 2, 5, 6, 7, 11, 12,  // 184, 8
-                5, 8, 9, 10, 13,  // 192, 5
-                8, 12, 13,  // 197, 3
-                4, 8, 12, 13,  // 200, 4
-                2, 3, 6, 9, 12, 13,  // 204, 6
-                0, 1, 2, 3, 8, 9, 12, 13,  // 210, 8
-                0, 1, 4, 5, 8, 9, 12, 13,  // 218, 8
-                2, 3, 6, 7, 8, 9, 12, 13,  // 226, 8
-                2, 3, 5, 6, 9, 10, 12, 13,  // 234, 8
-                0, 3, 6, 7, 9, 10, 12, 13,  // 242, 8
-                0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13,  // 250, 12
-                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13,  // 262, 13
-                2, 3, 4, 7, 8, 11, 12, 13,  // 275, 8
-                1, 2, 6, 7, 8, 11, 12, 13,  // 283, 8
-                2, 3, 4, 6, 7, 8, 9, 11, 12, 13,  // 291, 10
-                2, 3, 4, 5, 10, 11, 12, 13,  // 301, 8
-                0, 1, 6, 7, 10, 11, 12, 13,  // 309, 8
-                6, 9, 10, 11, 14,  // 317, 5
-                0, 2, 4, 6, 8, 10, 12, 14,  // 322, 8
-                1, 3, 5, 7, 8, 10, 12, 14,  // 330, 8
-                1, 3, 4, 6, 9, 11, 12, 14,  // 338, 8
-                0, 2, 5, 7, 9, 11, 12, 14,  // 346, 8
-                0, 3, 4, 5, 8, 9, 13, 14,  // 354, 8
-                2, 3, 4, 7, 8, 9, 13, 14,  // 362, 8
-                1, 2, 5, 6, 9, 10, 13, 14,  // 370, 8
-                0, 3, 4, 7, 9, 10, 13, 14,  // 378, 8
-                0, 3, 5, 6, 8, 11, 13, 14,  // 386, 8
-                1, 2, 4, 7, 8, 11, 13, 14,  // 394, 8
-                0, 1, 4, 7, 10, 11, 13, 14,  // 402, 8
-                0, 3, 6, 7, 10, 11, 13, 14,  // 410, 8
-                8, 12, 13, 14,  // 418, 4
-                1, 2, 3, 7, 8, 12, 13, 14,  // 422, 8
-                4, 8, 9, 12, 13, 14,  // 430, 6
-                0, 4, 5, 8, 9, 12, 13, 14,  // 436, 8
-                1, 2, 3, 6, 7, 8, 9, 12, 13, 14,  // 444, 10
-                2, 6, 8, 9, 10, 12, 13, 14,  // 454, 8
-                0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14,  // 462, 12
-                0, 7, 9, 10, 11, 12, 13, 14,  // 474, 8
-                1, 2, 3, 4, 5, 6, 8, 15,  // 482, 8
-                3, 7, 11, 15,  // 490, 4
-                0, 1, 3, 4, 5, 7, 11, 15,  // 494, 8
-                0, 4, 5, 10, 11, 15,  // 502, 6
-                1, 2, 3, 6, 7, 10, 11, 15,  // 508, 8
-                0, 1, 2, 3, 5, 6, 7, 10, 11, 15,  // 516, 10
-                0, 4, 5, 6, 9, 10, 11, 15,  // 526, 8
-                0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15,  // 534, 12
-                1, 2, 4, 5, 8, 9, 12, 15,  // 546, 8
-                2, 3, 5, 6, 8, 9, 12, 15,  // 554, 8
-                0, 3, 5, 6, 9, 10, 12, 15,  // 562, 8
-                1, 2, 4, 7, 9, 10, 12, 15,  // 570, 8
-                1, 2, 5, 6, 8, 11, 12, 15,  // 578, 8
-                0, 3, 4, 7, 8, 11, 12, 15,  // 586, 8
-                0, 1, 5, 6, 10, 11, 12, 15,  // 594, 8
-                1, 2, 6, 7, 10, 11, 12, 15,  // 602, 8
-                1, 3, 4, 6, 8, 10, 13, 15,  // 610, 8
-                0, 2, 5, 7, 8, 10, 13, 15,  // 618, 8
-                0, 2, 4, 6, 9, 11, 13, 15,  // 626, 8
-                1, 3, 5, 7, 9, 11, 13, 15,  // 634, 8
-                0, 1, 2, 3, 4, 5, 7, 8, 12, 13, 15,  // 642, 11
-                2, 3, 4, 5, 8, 9, 14, 15,  // 653, 8
-                0, 1, 6, 7, 8, 9, 14, 15,  // 661, 8
-                0, 1, 5, 10, 14, 15,  // 669, 6
-                0, 3, 4, 5, 9, 10, 14, 15,  // 675, 8
-                0, 1, 5, 6, 9, 10, 14, 15,  // 683, 8
-                11, 14, 15,  // 691, 3
-                7, 11, 14, 15,  // 694, 4
-                1, 2, 4, 5, 8, 11, 14, 15,  // 698, 8
-                0, 1, 4, 7, 8, 11, 14, 15,  // 706, 8
-                0, 1, 4, 5, 10, 11, 14, 15,  // 714, 8
-                2, 3, 6, 7, 10, 11, 14, 15,  // 722, 8
-                4, 5, 6, 7, 10, 11, 14, 15,  // 730, 8
-                0, 1, 4, 5, 7, 8, 10, 11, 14, 15,  // 738, 10
-                0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15,  // 748, 12
-                0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15,  // 760, 13
-                0, 1, 2, 3, 4, 6, 7, 11, 12, 14, 15,  // 773, 11
-                3, 4, 8, 9, 10, 13, 14, 15,  // 784, 8
-                11, 13, 14, 15,  // 792, 4
-                0, 1, 2, 4, 11, 13, 14, 15,  // 796, 8
-                0, 1, 2, 4, 5, 10, 11, 13, 14, 15,  // 804, 10
-                7, 10, 11, 13, 14, 15,  // 814, 6
-                3, 6, 7, 10, 11, 13, 14, 15,  // 820, 8
-                1, 5, 9, 10, 11, 13, 14, 15,  // 828, 8
-                1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15,  // 836, 12
-                12, 13, 14, 15,  // 848, 4
-                0, 1, 2, 3, 12, 13, 14, 15,  // 852, 8
-                0, 1, 4, 5, 12, 13, 14, 15,  // 860, 8
-                4, 5, 6, 7, 12, 13, 14, 15,  // 868, 8
-                4, 8, 9, 10, 12, 13, 14, 15,  // 876, 8
-                0, 4, 5, 8, 9, 10, 12, 13, 14, 15,  // 884, 10
-                0, 1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15,  // 894, 12
-                0, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 15,  // 906, 12
-                0, 1, 3, 4, 8, 9, 11, 12, 13, 14, 15,  // 918, 11
-                0, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15,  // 929, 11
-                7, 9, 10, 11, 12, 13, 14, 15,  // 940, 8
-                3, 6, 7, 9, 10, 11, 12, 13, 14, 15,  // 948, 10
-                2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15,  // 958, 12
-                8, 9, 10, 11, 12, 13, 14, 15,  // 970, 8
-                0, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15,  // 978, 12
-                0, 1, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15,  // 990, 13
-                3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  // 1003, 12
-                2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  // 1015, 13
-                4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  // 1028, 12
-                0, 2,  // 1040, 2
-                1, 3,  // 1042, 2
-                0, 1, 4, 5,  // 1044, 4
-                0, 1, 2, 4, 5,  // 1048, 5
-                2, 3, 6,  // 1053, 3
-                0, 2, 4, 6,  // 1056, 4
-                1, 2, 5, 6,  // 1060, 4
-                0, 1, 2, 3, 5, 6,  // 1064, 6
-                0, 1, 2, 4, 5, 6,  // 1070, 6
-                0, 1, 2, 3, 4, 5, 6,  // 1076, 7
-                0, 3, 4, 7,  // 1083, 4
-                0, 1, 2, 3, 4, 7,  // 1087, 6
-                1, 3, 5, 7,  // 1093, 4
-                2, 3, 6, 7,  // 1097, 4
-                1, 2, 3, 6, 7,  // 1101, 5
-                1, 2, 3, 5, 6, 7,  // 1106, 6
-                0, 1, 2, 3, 5, 6, 7,  // 1112, 7
-                4, 5, 6, 7,  // 1119, 4
-                0, 8,  // 1123, 2
-                0, 1, 4, 5, 8,  // 1125, 5
-                0, 1, 8, 9,  // 1130, 4
-                4, 5, 8, 9,  // 1134, 4
-                0, 1, 4, 5, 8, 9,  // 1138, 6
-                2, 6, 8, 9,  // 1144, 4
-                6, 7, 8, 9,  // 1148, 4
-                0, 2, 4, 6, 8, 10,  // 1152, 6
-                1, 2, 5, 6, 9, 10,  // 1158, 6
-                0, 3, 4, 7, 9, 10,  // 1164, 6
-                0, 1, 2, 8, 9, 10,  // 1170, 6
-                4, 5, 6, 8, 9, 10,  // 1176, 6
-                3, 11,  // 1182, 2
-                2, 3, 6, 7, 11,  // 1184, 5
-                0, 3, 8, 11,  // 1189, 4
-                0, 3, 4, 7, 8, 11,  // 1193, 6
-                1, 3, 5, 7, 9, 11,  // 1199, 6
-                2, 3, 10, 11,  // 1205, 4
-                1, 5, 10, 11,  // 1209, 4
-                4, 5, 10, 11,  // 1213, 4
-                6, 7, 10, 11,  // 1217, 4
-                2, 3, 6, 7, 10, 11,  // 1221, 6
-                1, 2, 3, 9, 10, 11,  // 1227, 6
-                5, 6, 7, 9, 10, 11,  // 1233, 6
-                8, 9, 10, 11,  // 1239, 4
-                4, 12,  // 1243, 2
-                0, 1, 2, 3, 4, 5, 8, 12,  // 1245, 8
-                8, 9, 12,  // 1253, 3
-                0, 4, 5, 8, 9, 12,  // 1256, 6
-                0, 1, 4, 5, 8, 9, 12,  // 1262, 7
-                2, 3, 5, 6, 8, 9, 12,  // 1269, 7
-                1, 5, 9, 13,  // 1276, 4
-                6, 7, 9, 13,  // 1280, 4
-                1, 4, 7, 10, 13,  // 1284, 5
-                1, 6, 8, 11, 13,  // 1289, 5
-                0, 1, 12, 13,  // 1294, 4
-                4, 5, 12, 13,  // 1298, 4
-                0, 1, 6, 7, 12, 13,  // 1302, 6
-                0, 1, 4, 8, 12, 13,  // 1308, 6
-                8, 9, 12, 13,  // 1314, 4
-                4, 8, 9, 12, 13,  // 1318, 5
-                4, 5, 8, 9, 12, 13,  // 1323, 6
-                0, 4, 5, 8, 9, 12, 13,  // 1329, 7
-                0, 1, 6, 10, 12, 13,  // 1336, 6
-                3, 6, 7, 9, 10, 12, 13,  // 1342, 7
-                0, 1, 10, 11, 12, 13,  // 1349, 6
-                2, 4, 7, 9, 14,  // 1355, 5
-                4, 5, 10, 14,  // 1360, 4
-                2, 6, 10, 14,  // 1364, 4
-                2, 5, 8, 11, 14,  // 1368, 5
-                0, 2, 12, 14,  // 1373, 4
-                8, 10, 12, 14,  // 1377, 4
-                4, 6, 8, 10, 12, 14,  // 1381, 6
-                13, 14,  // 1387, 2
-                9, 10, 13, 14,  // 1389, 4
-                5, 6, 9, 10, 13, 14,  // 1393, 6
-                0, 1, 2, 12, 13, 14,  // 1399, 6
-                4, 5, 6, 12, 13, 14,  // 1405, 6
-                8, 9, 12, 13, 14,  // 1411, 5
-                8, 9, 10, 12, 13, 14,  // 1416, 6
-                7, 15,  // 1422, 2
-                0, 5, 10, 15,  // 1424, 4
-                0, 1, 2, 3, 6, 7, 11, 15,  // 1428, 8
-                10, 11, 15,  // 1436, 3
-                0, 1, 5, 6, 10, 11, 15,  // 1439, 7
-                3, 6, 7, 10, 11, 15,  // 1446, 6
-                12, 15,  // 1452, 2
-                0, 3, 12, 15,  // 1454, 4
-                4, 7, 12, 15,  // 1458, 4
-                0, 3, 6, 9, 12, 15,  // 1462, 6
-                0, 3, 5, 10, 12, 15,  // 1468, 6
-                8, 11, 12, 15,  // 1474, 4
-                5, 6, 8, 11, 12, 15,  // 1478, 6
-                4, 7, 8, 11, 12, 15,  // 1484, 6
-                1, 3, 13, 15,  // 1490, 4
-                9, 11, 13, 15,  // 1494, 4
-                5, 7, 9, 11, 13, 15,  // 1498, 6
-                2, 3, 14, 15,  // 1504, 4
-                2, 3, 4, 5, 14, 15,  // 1508, 6
-                6, 7, 14, 15,  // 1514, 4
-                2, 3, 5, 9, 14, 15,  // 1518, 6
-                2, 3, 8, 9, 14, 15,  // 1524, 6
-                10, 14, 15,  // 1530, 3
-                0, 4, 5, 9, 10, 14, 15,  // 1533, 7
-                2, 3, 7, 11, 14, 15,  // 1540, 6
-                10, 11, 14, 15,  // 1546, 4
-                7, 10, 11, 14, 15,  // 1550, 5
-                6, 7, 10, 11, 14, 15,  // 1555, 6
-                1, 2, 3, 13, 14, 15,  // 1561, 6
-                5, 6, 7, 13, 14, 15,  // 1567, 6
-                10, 11, 13, 14, 15,  // 1573, 5
-                9, 10, 11, 13, 14, 15,  // 1578, 6
-                0, 4, 8, 9, 12, 13, 14, 15,  // 1584, 8
-                9, 10, 12, 13, 14, 15,  // 1592, 6
-                8, 11, 12, 13, 14, 15,  // 1598, 6
-                3, 7, 10, 11, 12, 13, 14, 15,  // 1604, 8
-            };
-            static const int g_shapeRanges[][2] =
-            {
-                { 0, 16 },{ 16, 4 },{ 20, 3 },{ 23, 4 },{ 27, 3 },{ 30, 4 },{ 34, 8 },{ 42, 4 },{ 46, 6 },{ 52, 8 },{ 60, 5 },
-                { 65, 5 },{ 70, 4 },{ 74, 4 },{ 78, 6 },{ 84, 8 },{ 92, 8 },{ 100, 8 },{ 108, 8 },{ 116, 12 },{ 128, 4 },{ 132, 8 },
-                { 140, 8 },{ 148, 10 },{ 158, 6 },{ 164, 8 },{ 172, 12 },{ 184, 8 },{ 192, 5 },{ 197, 3 },{ 200, 4 },{ 204, 6 },{ 210, 8 },
-                { 218, 8 },{ 226, 8 },{ 234, 8 },{ 242, 8 },{ 250, 12 },{ 262, 13 },{ 275, 8 },{ 283, 8 },{ 291, 10 },{ 301, 8 },{ 309, 8 },
-                { 317, 5 },{ 322, 8 },{ 330, 8 },{ 338, 8 },{ 346, 8 },{ 354, 8 },{ 362, 8 },{ 370, 8 },{ 378, 8 },{ 386, 8 },{ 394, 8 },
-                { 402, 8 },{ 410, 8 },{ 418, 4 },{ 422, 8 },{ 430, 6 },{ 436, 8 },{ 444, 10 },{ 454, 8 },{ 462, 12 },{ 474, 8 },{ 482, 8 },
-                { 490, 4 },{ 494, 8 },{ 502, 6 },{ 508, 8 },{ 516, 10 },{ 526, 8 },{ 534, 12 },{ 546, 8 },{ 554, 8 },{ 562, 8 },{ 570, 8 },
-                { 578, 8 },{ 586, 8 },{ 594, 8 },{ 602, 8 },{ 610, 8 },{ 618, 8 },{ 626, 8 },{ 634, 8 },{ 642, 11 },{ 653, 8 },{ 661, 8 },
-                { 669, 6 },{ 675, 8 },{ 683, 8 },{ 691, 3 },{ 694, 4 },{ 698, 8 },{ 706, 8 },{ 714, 8 },{ 722, 8 },{ 730, 8 },{ 738, 10 },
-                { 748, 12 },{ 760, 13 },{ 773, 11 },{ 784, 8 },{ 792, 4 },{ 796, 8 },{ 804, 10 },{ 814, 6 },{ 820, 8 },{ 828, 8 },{ 836, 12 },
-                { 848, 4 },{ 852, 8 },{ 860, 8 },{ 868, 8 },{ 876, 8 },{ 884, 10 },{ 894, 12 },{ 906, 12 },{ 918, 11 },{ 929, 11 },{ 940, 8 },
-                { 948, 10 },{ 958, 12 },{ 970, 8 },{ 978, 12 },{ 990, 13 },{ 1003, 12 },{ 1015, 13 },{ 1028, 12 },{ 1040, 2 },{ 1042, 2 },{ 1044, 4 },
-                { 1048, 5 },{ 1053, 3 },{ 1056, 4 },{ 1060, 4 },{ 1064, 6 },{ 1070, 6 },{ 1076, 7 },{ 1083, 4 },{ 1087, 6 },{ 1093, 4 },{ 1097, 4 },
-                { 1101, 5 },{ 1106, 6 },{ 1112, 7 },{ 1119, 4 },{ 1123, 2 },{ 1125, 5 },{ 1130, 4 },{ 1134, 4 },{ 1138, 6 },{ 1144, 4 },{ 1148, 4 },
-                { 1152, 6 },{ 1158, 6 },{ 1164, 6 },{ 1170, 6 },{ 1176, 6 },{ 1182, 2 },{ 1184, 5 },{ 1189, 4 },{ 1193, 6 },{ 1199, 6 },{ 1205, 4 },
-                { 1209, 4 },{ 1213, 4 },{ 1217, 4 },{ 1221, 6 },{ 1227, 6 },{ 1233, 6 },{ 1239, 4 },{ 1243, 2 },{ 1245, 8 },{ 1253, 3 },{ 1256, 6 },
-                { 1262, 7 },{ 1269, 7 },{ 1276, 4 },{ 1280, 4 },{ 1284, 5 },{ 1289, 5 },{ 1294, 4 },{ 1298, 4 },{ 1302, 6 },{ 1308, 6 },{ 1314, 4 },
-                { 1318, 5 },{ 1323, 6 },{ 1329, 7 },{ 1336, 6 },{ 1342, 7 },{ 1349, 6 },{ 1355, 5 },{ 1360, 4 },{ 1364, 4 },{ 1368, 5 },{ 1373, 4 },
-                { 1377, 4 },{ 1381, 6 },{ 1387, 2 },{ 1389, 4 },{ 1393, 6 },{ 1399, 6 },{ 1405, 6 },{ 1411, 5 },{ 1416, 6 },{ 1422, 2 },{ 1424, 4 },
-                { 1428, 8 },{ 1436, 3 },{ 1439, 7 },{ 1446, 6 },{ 1452, 2 },{ 1454, 4 },{ 1458, 4 },{ 1462, 6 },{ 1468, 6 },{ 1474, 4 },{ 1478, 6 },
-                { 1484, 6 },{ 1490, 4 },{ 1494, 4 },{ 1498, 6 },{ 1504, 4 },{ 1508, 6 },{ 1514, 4 },{ 1518, 6 },{ 1524, 6 },{ 1530, 3 },{ 1533, 7 },
-                { 1540, 6 },{ 1546, 4 },{ 1550, 5 },{ 1555, 6 },{ 1561, 6 },{ 1567, 6 },{ 1573, 5 },{ 1578, 6 },{ 1584, 8 },{ 1592, 6 },{ 1598, 6 },
-                { 1604, 8 },
-            };
-            static const int g_shapes1[][2] =
-            {
-                { 0, 16 }
-            };
-            static const int g_shapes2[64][2] =
-            {
-                { 33, 96 },{ 63, 66 },{ 20, 109 },{ 22, 107 },{ 37, 92 },{ 7, 122 },{ 8, 121 },{ 23, 106 },
-                { 38, 91 },{ 2, 127 },{ 9, 120 },{ 26, 103 },{ 3, 126 },{ 6, 123 },{ 1, 128 },{ 19, 110 },
-                { 15, 114 },{ 124, 5 },{ 72, 57 },{ 115, 14 },{ 125, 4 },{ 70, 59 },{ 100, 29 },{ 60, 69 },
-                { 116, 13 },{ 99, 30 },{ 78, 51 },{ 94, 35 },{ 104, 25 },{ 111, 18 },{ 71, 58 },{ 90, 39 },
-                { 45, 84 },{ 16, 113 },{ 82, 47 },{ 95, 34 },{ 87, 42 },{ 83, 46 },{ 53, 76 },{ 48, 81 },
-                { 68, 61 },{ 105, 24 },{ 98, 31 },{ 88, 41 },{ 75, 54 },{ 43, 86 },{ 52, 77 },{ 117, 12 },
-                { 119, 10 },{ 118, 11 },{ 85, 44 },{ 101, 28 },{ 36, 93 },{ 55, 74 },{ 89, 40 },{ 79, 50 },
-                { 56, 73 },{ 49, 80 },{ 64, 65 },{ 27, 102 },{ 32, 97 },{ 112, 17 },{ 67, 62 },{ 21, 108 },
-            };
-            static const int g_shapes3[64][3] =
-            {
-                { 148, 160, 240 },{ 132, 212, 205 },{ 136, 233, 187 },{ 175, 237, 143 },{ 6, 186, 232 },{ 33, 142, 232 },{ 131, 123, 142 },{ 131, 96, 186 },
-                { 6, 171, 110 },{ 1, 18, 110 },{ 1, 146, 123 },{ 33, 195, 66 },{ 20, 51, 66 },{ 20, 178, 96 },{ 2, 177, 106 },{ 211, 4, 59 },
-                { 8, 191, 91 },{ 230, 14, 29 },{ 1, 188, 234 },{ 151, 110, 168 },{ 20, 144, 238 },{ 137, 66, 206 },{ 173, 179, 232 },{ 209, 194, 186 },
-                { 239, 165, 142 },{ 131, 152, 242 },{ 214, 54, 12 },{ 140, 219, 201 },{ 190, 150, 231 },{ 156, 135, 241 },{ 185, 227, 167 },{ 145, 210, 59 },
-                { 138, 174, 106 },{ 189, 229, 14 },{ 176, 133, 106 },{ 78, 178, 195 },{ 111, 146, 171 },{ 216, 180, 196 },{ 217, 181, 193 },{ 184, 228, 166 },
-                { 192, 225, 153 },{ 134, 141, 123 },{ 6, 222, 198 },{ 149, 183, 96 },{ 33, 226, 164 },{ 161, 215, 51 },{ 197, 221, 18 },{ 1, 223, 199 },
-                { 154, 163, 110 },{ 20, 236, 169 },{ 157, 204, 66 },{ 1, 202, 220 },{ 20, 170, 235 },{ 203, 158, 66 },{ 162, 155, 110 },{ 6, 201, 218 },
-                { 139, 135, 123 },{ 33, 167, 224 },{ 182, 150, 96 },{ 19, 200, 213 },{ 63, 207, 159 },{ 147, 172, 109 },{ 129, 130, 128 },{ 208, 14, 59 },
-            };
-
-            static const int g_shapeList1[] =
-            {
-                0,
-            };
-
-            static const int g_shapeList1Collapse[] =
-            {
-                0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1,
-            };
-            static const int g_shapeList2[] =
-            {
-                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
-                12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
-                23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
-                34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
-                45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
-                56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
-                67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
-                78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
-                89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
-                100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
-                111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
-                122, 123, 124, 125, 126, 127, 128,
-            };
-            static const int g_shapeList2Collapse[] =
-            {
-                -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
-                10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
-                21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
-                43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
-                54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
-                65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
-                76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86,
-                87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
-                98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
-                109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
-                120, 121, 122, 123, 124, 125, 126, 127, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1,
-            };
-
-            static const int g_shapeList12[] =
-            {
-                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
-                11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
-                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
-                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
-                55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
-                66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
-                77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
-                88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
-                99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
-                110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
-                121, 122, 123, 124, 125, 126, 127, 128,
-            };
-
-            static const int g_shapeList12Collapse[] =
-            {
-                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
-                11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
-                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
-                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
-                55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
-                66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
-                77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
-                88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
-                99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
-                110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
-                121, 122, 123, 124, 125, 126, 127, 128, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1,
-            };
-
-            static const int g_shapeList3[] =
-            {
-                1, 2, 4, 6, 8, 12, 14, 18, 19, 20, 29,
-                33, 51, 54, 59, 63, 66, 78, 91, 96, 106, 109,
-                110, 111, 123, 128, 129, 130, 131, 132, 133, 134, 135,
-                136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
-                147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
-                158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
-                169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
-                180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190,
-                191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
-                202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
-                213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
-                224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,
-                235, 236, 237, 238, 239, 240, 241, 242,
-            };
-
-            static const int g_shapeList3Collapse[] =
-            {
-                -1, 0, 1, -1, 2, -1, 3, -1, 4, -1, -1,
-                -1, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1,
-                -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, -1,
-                11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, 12, -1, -1, 13,
-                -1, -1, -1, -1, 14, -1, -1, -1, 15, -1, -1,
-                16, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, 17, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, 18, -1, -1, -1, -1, 19, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, 20, -1, -1, 21,
-                22, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, 24, -1, -1, -1, -1, 25, 26, 27, 28,
-                29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
-                40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
-                51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
-                62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
-                73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
-                84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
-                95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
-                106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
-                117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
-                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
-                139,
-            };
-
-            static const int g_shapeList3Short[] =
-            {
-                1, 2, 4, 6, 18, 20, 33, 51, 59, 66, 96,
-                106, 110, 123, 131, 132, 136, 142, 143, 146, 148, 160,
-                171, 175, 177, 178, 186, 187, 195, 205, 211, 212, 232,
-                233, 237, 240,
-            };
-
-            static const int g_shapeList3ShortCollapse[] =
-            {
-                -1, 0, 1, -1, 2, -1, 3, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, 4, -1, 5, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, 7, -1, -1, -1,
-                -1, -1, -1, -1, 8, -1, -1, -1, -1, -1, -1,
-                9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, 11, -1, -1, -1,
-                12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, 13, -1, -1, -1, -1, -1, -1, -1, 14,
-                15, -1, -1, -1, 16, -1, -1, -1, -1, -1, 17,
-                18, -1, -1, 19, -1, 20, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, 21, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, 22, -1, -1, -1, 23,
-                -1, 24, 25, -1, -1, -1, -1, -1, -1, -1, 26,
-                27, -1, -1, -1, -1, -1, -1, -1, 28, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, 29, -1, -1, -1,
-                -1, -1, 30, 31, -1, -1, -1, -1, -1, -1, -1,
-                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-                -1, 32, 33, -1, -1, -1, 34, -1, -1, 35, -1,
-                -1,
-            };
-
-            static const int g_shapeListAll[] =
-            {
-                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
-                11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
-                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
-                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
-                55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
-                66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
-                77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
-                88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
-                99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
-                110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
-                121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
-                132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
-                143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
-                154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
-                165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
-                176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
-                187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,
-                198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
-                209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
-                220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
-                231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
-                242,
-            };
-
-            static const int g_numShapes1 = sizeof(g_shapeList1) / sizeof(g_shapeList1[0]);
-            static const int g_numShapes2 = sizeof(g_shapeList2) / sizeof(g_shapeList2[0]);
-            static const int g_numShapes12 = sizeof(g_shapeList12) / sizeof(g_shapeList12[0]);
-            static const int g_numShapes3 = sizeof(g_shapeList3) / sizeof(g_shapeList3[0]);
-            static const int g_numShapes3Short = sizeof(g_shapeList3Short) / sizeof(g_shapeList3Short[0]);
-            static const int g_numShapesAll = sizeof(g_shapeListAll) / sizeof(g_shapeListAll[0]);
-            static const int g_numFragments = sizeof(g_fragments) / sizeof(g_fragments[0]);
-
-            static const int g_maxFragmentsPerMode = (g_numShapes2 > g_numShapes3) ? g_numShapes2 : g_numShapes3;
-        }
-
-        namespace BC6HData
-        {
-            enum EField
-            {
-                NA, // N/A
-                M,  // Mode
-                D,  // Shape
-                RW,
-                RX,
-                RY,
-                RZ,
-                GW,
-                GX,
-                GY,
-                GZ,
-                BW,
-                BX,
-                BY,
-                BZ,
-            };
-
-            struct ModeDescriptor
-            {
-                EField m_eField;
-                uint8_t   m_uBit;
-            };
-
-            const ModeDescriptor g_modeDescriptors[14][82] =
-            {
-                {   // Mode 1 (0x00) - 10 5 5 5
-                    { M, 0 },{ M, 1 },{ GY, 4 },{ BY, 4 },{ BZ, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
-                    { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
-                    { D, 3 },{ D, 4 },
-                },
-
-                {   // Mode 2 (0x01) - 7 6 6 6
-                    { M, 0 },{ M, 1 },{ GY, 5 },{ GZ, 4 },{ GZ, 5 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
-                    { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },
-                    { D, 3 },{ D, 4 },
-                },
-
-                {   // Mode 3 (0x02) - 11 5 4 4
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { RW,10 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 },
-                    { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 },
-                    { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
-                    { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
-                    { D, 3 },{ D, 4 },
-                },
-
-                {   // Mode 4 (0x06) - 11 4 5 4
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 },
-                    { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { GW,10 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 },
-                    { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 0 },
-                    { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ GY, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
-                    { D, 3 },{ D, 4 },
-                },
-
-                {   // Mode 5 (0x0a) - 11 4 4 5
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 },
-                    { BY, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 },
-                    { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BW,10 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 1 },
-                    { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ BZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
-                    { D, 3 },{ D, 4 },
-                },
-
-                {   // Mode 6 (0x0e) - 9 5 5 5
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
-                    { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
-                    { D, 3 },{ D, 4 },
-                },
-
-                {   // Mode 7 (0x12) - 8 6 5 5
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ GZ, 4 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 3 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
-                    { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },
-                    { D, 3 },{ D, 4 },
-                },
-
-                {   // Mode 8 (0x16) - 8 5 6 5
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 0 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ GY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ GZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
-                    { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
-                    { D, 3 },{ D, 4 },
-                },
-
-                {   // Mode 9 (0x1a) - 8 5 5 6
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ BY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
-                    { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
-                    { D, 3 },{ D, 4 },
-                },
-
-                {   // Mode 10 (0x1e) - 6 6 6 6
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ GZ, 4 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GY, 5 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ GZ, 5 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
-                    { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },
-                    { D, 3 },{ D, 4 },
-                },
-
-                {   // Mode 11 (0x03) - 10 10
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RX, 9 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GX, 9 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BX, 9 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
-                    { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
-                    { NA, 0 },{ NA, 0 },
-                },
-
-                {   // Mode 12 (0x07) - 11 9
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
-                    { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
-                    { NA, 0 },{ NA, 0 },
-                },
-
-                {   // Mode 13 (0x0b) - 12 8
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
-                    { RX, 5 },{ RX, 6 },{ RX, 7 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
-                    { GX, 5 },{ GX, 6 },{ GX, 7 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
-                    { BX, 5 },{ BX, 6 },{ BX, 7 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
-                    { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
-                    { NA, 0 },{ NA, 0 },
-                },
-
-                {   // Mode 14 (0x0f) - 16 4
-                    { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
-                    { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
-                    { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
-                    { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,15 },
-                    { RW,14 },{ RW,13 },{ RW,12 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,15 },
-                    { GW,14 },{ GW,13 },{ GW,12 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,15 },
-                    { BW,14 },{ BW,13 },{ BW,12 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
-                    { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
-                    { NA, 0 },{ NA, 0 },
-                },
-            };
-        }
-
-        struct PackingVector
-        {
-            uint32_t m_vector[4];
-            int m_offset;
-
-            void Init()
-            {
-                for (int i = 0; i < 4; i++)
-                    m_vector[i] = 0;
-
-                m_offset = 0;
-            }
-
-            inline void Pack(ParallelMath::ScalarUInt16 value, int bits)
-            {
-                int vOffset = m_offset >> 5;
-                int bitOffset = m_offset & 0x1f;
-
-                m_vector[vOffset] |= (static_cast<uint32_t>(value) << bitOffset) & static_cast<uint32_t>(0xffffffff);
-
-                int overflowBits = bitOffset + bits - 32;
-                if (overflowBits > 0)
-                    m_vector[vOffset + 1] |= (static_cast<uint32_t>(value) >> (bits - overflowBits));
-
-                m_offset += bits;
-            }
-
-            inline void Flush(uint8_t* output)
-            {
-                assert(m_offset == 128);
-
-                for (int v = 0; v < 4; v++)
-                {
-                    uint32_t chunk = m_vector[v];
-                    for (int b = 0; b < 4; b++)
-                        output[v * 4 + b] = static_cast<uint8_t>((chunk >> (b * 8)) & 0xff);
-                }
-            }
-        };
-
-
-		struct UnpackingVector
-		{
-			uint32_t m_vector[4];
-
-			void Init(const uint8_t *bytes)
-			{
-				for (int i = 0; i < 4; i++)
-					m_vector[i] = 0;
-
-				for (int b = 0; b < 16; b++)
-					m_vector[b / 4] |= (bytes[b] << ((b % 4) * 8));
-			}
-
-			inline ParallelMath::ScalarUInt16 Unpack(int bits)
-			{
-				uint32_t bitMask = (1 << bits) - 1;
-
-				ParallelMath::ScalarUInt16 result = static_cast<ParallelMath::ScalarUInt16>(m_vector[0] & bitMask);
-
-				for (int i = 0; i < 4; i++)
-				{
-					m_vector[i] >>= bits;
-					if (i != 3)
-						m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - bits);
-				}
-
-				return result;
-			}
-		};
-
-        void ComputeTweakFactors(int tweak, int range, float *outFactors)
-        {
-            int totalUnits = range - 1;
-            int minOutsideUnits = ((tweak >> 1) & 1);
-            int maxOutsideUnits = (tweak & 1);
-            int insideUnits = totalUnits - minOutsideUnits - maxOutsideUnits;
-
-            outFactors[0] = -static_cast<float>(minOutsideUnits) / static_cast<float>(insideUnits);
-            outFactors[1] = static_cast<float>(maxOutsideUnits) / static_cast<float>(insideUnits) + 1.0f;
-        }
-
-        ParallelMath::Float ScaleHDRValue(const ParallelMath::Float &v, bool isSigned)
-        {
-            if (isSigned)
-            {
-                ParallelMath::Float offset = ParallelMath::Select(ParallelMath::Less(v, ParallelMath::MakeFloatZero()), ParallelMath::MakeFloat(-30.0f), ParallelMath::MakeFloat(30.0f));
-                return (v * 32.0f + offset) / 31.0f;
-            }
-            else
-                return (v * 64.0f + 30.0f) / 31.0f;
-        }
-
-        ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v)
-        {
-#ifdef CVTT_ENABLE_ASSERTS
-            for (int i = 0; i < ParallelMath::ParallelSize; i++)
-                assert(ParallelMath::Extract(v, i) != -32768)
-#endif
-
-            ParallelMath::Int16CompFlag negative = ParallelMath::Less(v, ParallelMath::MakeSInt16(0));
-            ParallelMath::UInt15 absComp = ParallelMath::LosslessCast<ParallelMath::UInt15>::Cast(ParallelMath::Select(negative, ParallelMath::SInt16(ParallelMath::MakeSInt16(0) - v), v));
-
-            ParallelMath::UInt31 multiplied = ParallelMath::XMultiply(absComp, ParallelMath::MakeUInt15(31));
-            ParallelMath::UInt31 shifted = ParallelMath::RightShift(multiplied, 5);
-            ParallelMath::UInt15 absCompScaled = ParallelMath::ToUInt15(shifted);
-            ParallelMath::SInt16 signBits = ParallelMath::SelectOrZero(negative, ParallelMath::MakeSInt16(-32768));
-
-            return ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(absCompScaled) | signBits;
-        }
-
-        ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v)
-        {
-            return ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(v, ParallelMath::MakeUInt15(31)), 6));
-        }
-
-        void UnscaleHDREndpoints(const ParallelMath::AInt16 inEP[2][3], ParallelMath::AInt16 outEP[2][3], bool isSigned)
-        {
-            for (int epi = 0; epi < 2; epi++)
-            {
-                for (int ch = 0; ch < 3; ch++)
-                {
-                    if (isSigned)
-                        outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueSigned(ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(inEP[epi][ch])));
-                    else
-                        outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::LosslessCast<ParallelMath::UInt16>::Cast(inEP[epi][ch])));
-                }
-            }
-        }
-
-        template<int TVectorSize>
-        class UnfinishedEndpoints
-        {
-        public:
-            typedef ParallelMath::Float MFloat;
-            typedef ParallelMath::UInt16 MUInt16;
-            typedef ParallelMath::UInt15 MUInt15;
-            typedef ParallelMath::SInt16 MSInt16;
-            typedef ParallelMath::SInt32 MSInt32;
-
-            UnfinishedEndpoints()
-            {
-            }
-
-            UnfinishedEndpoints(const MFloat *base, const MFloat *offset)
-            {
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    m_base[ch] = base[ch];
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    m_offset[ch] = offset[ch];
-            }
-
-            UnfinishedEndpoints(const UnfinishedEndpoints& other)
-            {
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    m_base[ch] = other.m_base[ch];
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    m_offset[ch] = other.m_offset[ch];
-            }
-
-            void FinishHDRUnsigned(int tweak, int range, MSInt16 *outEP0, MSInt16 *outEP1, ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                float tweakFactors[2];
-                ComputeTweakFactors(tweak, range, tweakFactors);
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    MUInt15 channelEPs[2];
-                    for (int epi = 0; epi < 2; epi++)
-                    {
-                        MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], 0.0f, 31743.0f);
-                        channelEPs[epi] = ParallelMath::RoundAndConvertToU15(f, roundingMode);
-                    }
-
-                    outEP0[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[0]);
-                    outEP1[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[1]);
-                }
-            }
-
-            void FinishHDRSigned(int tweak, int range, MSInt16* outEP0, MSInt16* outEP1, ParallelMath::RoundTowardNearestForScope* roundingMode)
-            {
-                float tweakFactors[2];
-                ComputeTweakFactors(tweak, range, tweakFactors);
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    MSInt16 channelEPs[2];
-                    for (int epi = 0; epi < 2; epi++)
-                    {
-                        MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], -31743.0f, 31743.0f);
-                        channelEPs[epi] = ParallelMath::RoundAndConvertToS16(f, roundingMode);
-                    }
-
-                    outEP0[ch] = channelEPs[0];
-                    outEP1[ch] = channelEPs[1];
-                }
-            }
-
-            void FinishLDR(int tweak, int range, MUInt15* outEP0, MUInt15* outEP1)
-            {
-                ParallelMath::RoundTowardNearestForScope roundingMode;
-
-                float tweakFactors[2];
-                ComputeTweakFactors(tweak, range, tweakFactors);
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    MFloat ep0f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[0], 0.0f, 255.0f);
-                    MFloat ep1f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[1], 0.0f, 255.0f);
-                    outEP0[ch] = ParallelMath::RoundAndConvertToU15(ep0f, &roundingMode);
-                    outEP1[ch] = ParallelMath::RoundAndConvertToU15(ep1f, &roundingMode);
-                }
-            }
-
-            template<int TNewVectorSize>
-            UnfinishedEndpoints<TNewVectorSize> ExpandTo(float filler)
-            {
-                MFloat newBase[TNewVectorSize];
-                MFloat newOffset[TNewVectorSize];
-
-                for (int ch = 0; ch < TNewVectorSize && ch < TVectorSize; ch++)
-                {
-                    newBase[ch] = m_base[ch];
-                    newOffset[ch] = m_offset[ch];
-                }
-
-                MFloat fillerV = ParallelMath::MakeFloat(filler);
-
-                for (int ch = TVectorSize; ch < TNewVectorSize; ch++)
-                {
-                    newBase[ch] = fillerV;
-                    newOffset[ch] = ParallelMath::MakeFloatZero();
-                }
-
-                return UnfinishedEndpoints<TNewVectorSize>(newBase, newOffset);
-            }
-
-        private:
-            MFloat m_base[TVectorSize];
-            MFloat m_offset[TVectorSize];
-        };
-
-        template<int TMatrixSize>
-        class PackedCovarianceMatrix
-        {
-        public:
-            // 0: xx,
-            // 1: xy, yy
-            // 3: xz, yz, zz 
-            // 6: xw, yw, zw, ww
-            // ... etc.
-            static const int PyramidSize = (TMatrixSize * (TMatrixSize + 1)) / 2;
-
-            typedef ParallelMath::Float MFloat;
-
-            PackedCovarianceMatrix()
-            {
-                for (int i = 0; i < PyramidSize; i++)
-                    m_values[i] = ParallelMath::MakeFloatZero();
-            }
-
-            void Add(const ParallelMath::Float *vec, const ParallelMath::Float &weight)
-            {
-                int index = 0;
-                for (int row = 0; row < TMatrixSize; row++)
-                {
-                    for (int col = 0; col <= row; col++)
-                    {
-                        m_values[index] = m_values[index] + vec[row] * vec[col] * weight;
-                        index++;
-                    }
-                }
-            }
-
-            void Product(MFloat *outVec, const MFloat *inVec)
-            {
-                for (int row = 0; row < TMatrixSize; row++)
-                {
-                    MFloat sum = ParallelMath::MakeFloatZero();
-
-                    int index = (row * (row + 1)) >> 1;
-                    for (int col = 0; col < TMatrixSize; col++)
-                    {
-                        sum = sum + inVec[col] * m_values[index];
-                        if (col >= row)
-                            index += col + 1;
-                        else
-                            index++;
-                    }
-
-                    outVec[row] = sum;
-                }
-            }
-
-        private:
-            ParallelMath::Float m_values[PyramidSize];
-        };
-
-        static const int NumEndpointSelectorPasses = 3;
-
-        template<int TVectorSize, int TIterationCount>
-        class EndpointSelector
-        {
-        public:
-            typedef ParallelMath::Float MFloat;
-
-            EndpointSelector()
-            {
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    m_centroid[ch] = ParallelMath::MakeFloatZero();
-                    m_direction[ch] = ParallelMath::MakeFloatZero();
-                }
-                m_weightTotal = ParallelMath::MakeFloatZero();
-                m_minDist = ParallelMath::MakeFloat(FLT_MAX);
-                m_maxDist = ParallelMath::MakeFloat(-FLT_MAX);
-            }
-
-            void ContributePass(const MFloat *value, int pass, const MFloat &weight)
-            {
-                if (pass == 0)
-                    ContributeCentroid(value, weight);
-                else if (pass == 1)
-                    ContributeDirection(value, weight);
-                else if (pass == 2)
-                    ContributeMinMax(value);
-            }
-
-            void FinishPass(int pass)
-            {
-                if (pass == 0)
-                    FinishCentroid();
-                else if (pass == 1)
-                    FinishDirection();
-            }
-
-            UnfinishedEndpoints<TVectorSize> GetEndpoints(const float channelWeights[TVectorSize]) const
-            {
-                MFloat unweightedBase[TVectorSize];
-                MFloat unweightedOffset[TVectorSize];
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    MFloat min = m_centroid[ch] + m_direction[ch] * m_minDist;
-                    MFloat max = m_centroid[ch] + m_direction[ch] * m_maxDist;
-
-                    float safeWeight = channelWeights[ch];
-                    if (safeWeight == 0.f)
-                        safeWeight = 1.0f;
-
-                    unweightedBase[ch] = min / channelWeights[ch];
-                    unweightedOffset[ch] = (max - min) / channelWeights[ch];
-                }
-
-                return UnfinishedEndpoints<TVectorSize>(unweightedBase, unweightedOffset);
-            }
-
-        private:
-            void ContributeCentroid(const MFloat *value, const MFloat &weight)
-            {
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    m_centroid[ch] = m_centroid[ch] + value[ch] * weight;
-                m_weightTotal = m_weightTotal + weight;
-            }
-
-            void FinishCentroid()
-            {
-                MFloat denom = m_weightTotal;
-                ParallelMath::MakeSafeDenominator(denom);
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    m_centroid[ch] = m_centroid[ch] / denom;
-            }
-
-            void ContributeDirection(const MFloat *value, const MFloat &weight)
-            {
-                MFloat diff[TVectorSize];
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    diff[ch] = value[ch] - m_centroid[ch];
-
-                m_covarianceMatrix.Add(diff, weight);
-            }
-
-            void FinishDirection()
-            {
-                MFloat approx[TVectorSize];
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    approx[ch] = ParallelMath::MakeFloat(1.0f);
-
-                for (int i = 0; i < TIterationCount; i++)
-                {
-                    MFloat product[TVectorSize];
-                    m_covarianceMatrix.Product(product, approx);
-
-                    MFloat largestComponent = product[0];
-                    for (int ch = 1; ch < TVectorSize; ch++)
-                        largestComponent = ParallelMath::Max(largestComponent, product[ch]);
-
-                    // product = largestComponent*newApprox
-                    ParallelMath::MakeSafeDenominator(largestComponent);
-                    for (int ch = 0; ch < TVectorSize; ch++)
-                        approx[ch] = product[ch] / largestComponent;
-                }
-
-                // Normalize
-                MFloat approxLen = ParallelMath::MakeFloatZero();
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    approxLen = approxLen + approx[ch] * approx[ch];
-
-                approxLen = ParallelMath::Sqrt(approxLen);
-
-                ParallelMath::MakeSafeDenominator(approxLen);
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    m_direction[ch] = approx[ch] / approxLen;
-            }
-
-            void ContributeMinMax(const MFloat *value)
-            {
-                MFloat dist = ParallelMath::MakeFloatZero();
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    dist = dist + m_direction[ch] * (value[ch] - m_centroid[ch]);
-
-                m_minDist = ParallelMath::Min(m_minDist, dist);
-                m_maxDist = ParallelMath::Max(m_maxDist, dist);
-            }
-
-            ParallelMath::Float m_centroid[TVectorSize];
-            ParallelMath::Float m_direction[TVectorSize];
-            PackedCovarianceMatrix<TVectorSize> m_covarianceMatrix;
-            ParallelMath::Float m_weightTotal;
-
-            ParallelMath::Float m_minDist;
-            ParallelMath::Float m_maxDist;
-        };
-
-        static const ParallelMath::UInt16 g_weightReciprocals[] =
-        {
-            ParallelMath::MakeUInt16(0),        // -1 
-            ParallelMath::MakeUInt16(0),        // 0
-            ParallelMath::MakeUInt16(32768),    // 1
-            ParallelMath::MakeUInt16(16384),    // 2
-            ParallelMath::MakeUInt16(10923),    // 3
-            ParallelMath::MakeUInt16(8192),     // 4
-            ParallelMath::MakeUInt16(6554),     // 5
-            ParallelMath::MakeUInt16(5461),     // 6
-            ParallelMath::MakeUInt16(4681),     // 7
-            ParallelMath::MakeUInt16(4096),     // 8
-            ParallelMath::MakeUInt16(3641),     // 9
-            ParallelMath::MakeUInt16(3277),     // 10
-            ParallelMath::MakeUInt16(2979),     // 11
-            ParallelMath::MakeUInt16(2731),     // 12
-            ParallelMath::MakeUInt16(2521),     // 13
-            ParallelMath::MakeUInt16(2341),     // 14
-            ParallelMath::MakeUInt16(2185),     // 15
-        };
-
-        template<int TVectorSize>
-        class IndexSelector
-        {
-        public:
-            typedef ParallelMath::Float MFloat;
-            typedef ParallelMath::UInt16 MUInt16;
-            typedef ParallelMath::UInt15 MUInt15;
-            typedef ParallelMath::SInt16 MSInt16;
-            typedef ParallelMath::AInt16 MAInt16;
-            typedef ParallelMath::SInt32 MSInt32;
-            typedef ParallelMath::UInt31 MUInt31;
-
-            template<class TInterpolationEPType, class TColorEPType>
-            void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range)
-            {
-                // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space.
-                // We need to select indexes using the color-space endpoints.
-
-                m_isUniform = true;
-                for (int ch = 1; ch < TVectorSize; ch++)
-                {
-                    if (channelWeights[ch] != channelWeights[0])
-                        m_isUniform = false;
-                }
-
-                // To work with channel weights, we need something where:
-                // pxDiff = px - ep[0]
-                // epDiff = ep[1] - ep[0]
-                //
-                // weightedEPDiff = epDiff * channelWeights
-                // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff)
-                // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff)
-                // index = normalizedIndex * maxValue
-                //
-                // Equivalent to:
-                // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights)
-                // index = dot(axis, pxDiff)
-
-                for (int ep = 0; ep < 2; ep++)
-                    for (int ch = 0; ch < TVectorSize; ch++)
-                        m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]);
-
-                m_range = range;
-                m_maxValue = static_cast<float>(range - 1);
-
-                MFloat epDiffWeighted[TVectorSize];
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]);
-                    MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]);
-                    epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch];
-                }
-
-                MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0];
-                for (int ch = 1; ch < TVectorSize; ch++)
-                    lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch];
-
-                ParallelMath::MakeSafeDenominator(lenSquared);
-
-                MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared;
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared;
-            }
-
-            template<bool TSigned>
-            void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range)
-            {
-                MAInt16 converted[2][TVectorSize];
-                for (int epi = 0; epi < 2; epi++)
-                    for (int ch = 0; ch < TVectorSize; ch++)
-                        converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]);
-
-                Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range);
-            }
-
-            void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
-            {
-                MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
-
-                for (int ch = 0; ch < numRealChannels; ch++)
-                {
-                    MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
-                    MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
-                    pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6));
-                }
-            }
-
-            void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
-            {
-                MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7));
-
-                for (int ch = 0; ch < numRealChannels; ch++)
-                {
-                    MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
-                    MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
-                    pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8));
-                }
-            }
-
-            void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel)
-            {
-                ReconstructLDR_BC7(index, pixel, TVectorSize);
-            }
-
-            void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel)
-            {
-                ReconstructLDRPrecise(index, pixel, TVectorSize);
-            }
-
-            MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
-            {
-                MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0];
-                for (int ch = 1; ch < TVectorSize; ch++)
-                    dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch];
-
-                return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn);
-            }
-
-        protected:
-            MAInt16 m_endPoint[2][TVectorSize];
-
-        private:
-            MFloat m_origin[TVectorSize];
-            MFloat m_axis[TVectorSize];
-            int m_range;
-            float m_maxValue;
-            bool m_isUniform;
-        };
-
-
-        template<int TVectorSize>
-        class IndexSelectorHDR : public IndexSelector<TVectorSize>
-        {
-        public:
-            typedef ParallelMath::UInt15 MUInt15;
-            typedef ParallelMath::UInt16 MUInt16;
-            typedef ParallelMath::UInt31 MUInt31;
-            typedef ParallelMath::SInt16 MSInt16;
-            typedef ParallelMath::SInt32 MSInt32;
-            typedef ParallelMath::Float MFloat;
-
-        private:
-
-            MUInt15 InvertSingle(const MUInt15& anIndex) const
-            {
-                MUInt15 inverted = m_maxValueMinusOne - anIndex;
-                return ParallelMath::Select(m_isInverted, inverted, anIndex);
-            }
-
-            void ReconstructHDRSignedUninverted(const MUInt15 &index, MSInt16* pixel) const
-            {
-                MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    MSInt16 ep0 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[0][ch]);
-                    MSInt16 ep1 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[1][ch]);
-
-                    MSInt32 pixel32 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1);
-
-                    pixel32 = ParallelMath::RightShift(pixel32 + ParallelMath::MakeSInt32(32), 6);
-
-                    pixel[ch] = UnscaleHDRValueSigned(ParallelMath::ToSInt16(pixel32));
-                }
-            }
-
-            void ReconstructHDRUnsignedUninverted(const MUInt15 &index, MSInt16* pixel) const
-            {
-                MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    MUInt16 ep0 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[0][ch]);
-                    MUInt16 ep1 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[1][ch]);
-
-                    MUInt31 pixel31 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1);
-
-                    pixel31 = ParallelMath::RightShift(pixel31 + ParallelMath::MakeUInt31(32), 6);
-
-                    pixel[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::ToUInt16(pixel31)));
-                }
-            }
-
-            MFloat ErrorForInterpolatorComponent(int index, int ch, const MFloat *pixel) const
-            {
-                MFloat diff = pixel[ch] - m_reconstructedInterpolators[index][ch];
-                return diff * diff;
-            }
-
-            MFloat ErrorForInterpolator(int index, const MFloat *pixel) const
-            {
-                MFloat error = ErrorForInterpolatorComponent(index, 0, pixel);
-                for (int ch = 1; ch < TVectorSize; ch++)
-                    error = error + ErrorForInterpolatorComponent(index, ch, pixel);
-                return error;
-            }
-
-        public:
-
-            void InitHDR(int range, bool isSigned, bool fastIndexing, const float *channelWeights)
-            {
-                assert(range <= 16);
-
-                m_range = range;
-
-                m_isInverted = ParallelMath::MakeBoolInt16(false);
-                m_maxValueMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(range - 1));
-
-                if (!fastIndexing)
-                {
-                    for (int i = 0; i < range; i++)
-                    {
-                        MSInt16 recon2CL[TVectorSize];
-
-                        if (isSigned)
-                            ReconstructHDRSignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL);
-                        else
-                            ReconstructHDRUnsignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL);
-
-                        for (int ch = 0; ch < TVectorSize; ch++)
-                            m_reconstructedInterpolators[i][ch] = ParallelMath::TwosCLHalfToFloat(recon2CL[ch]) * channelWeights[ch];
-                    }
-                }
-            }
-
-            void ReconstructHDRSigned(const MUInt15 &index, MSInt16* pixel) const
-            {
-                ReconstructHDRSignedUninverted(InvertSingle(index), pixel);
-            }
-
-            void ReconstructHDRUnsigned(const MUInt15 &index, MSInt16* pixel) const
-            {
-                ReconstructHDRUnsignedUninverted(InvertSingle(index), pixel);
-            }
-
-            void ConditionalInvert(const ParallelMath::Int16CompFlag &invert)
-            {
-                m_isInverted = invert;
-            }
-
-            MUInt15 SelectIndexHDRSlow(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope*) const
-            {
-                MUInt15 index = ParallelMath::MakeUInt15(0);
-
-                MFloat bestError = ErrorForInterpolator(0, pixel);
-                for (int i = 1; i < m_range; i++)
-                {
-                    MFloat error = ErrorForInterpolator(i, pixel);
-                    ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
-                    ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(static_cast<uint16_t>(i)));
-                    bestError = ParallelMath::Min(bestError, error);
-                }
-
-                return InvertSingle(index);
-            }
-
-            MUInt15 SelectIndexHDRFast(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
-            {
-                return InvertSingle(this->SelectIndexLDR(pixel, rtn));
-            }
-
-        private:
-            MFloat m_reconstructedInterpolators[16][TVectorSize];
-            ParallelMath::Int16CompFlag m_isInverted;
-            MUInt15 m_maxValueMinusOne;
-            int m_range;
-        };
-
-        // Solve for a, b where v = a*t + b
-        // This allows endpoints to be mapped to where T=0 and T=1
-        // Least squares from totals:
-        // a = (tv - t*v/w)/(tt - t*t/w)
-        // b = (v - a*t)/w
-        template<int TVectorSize>
-        class EndpointRefiner
-        {
-        public:
-            typedef ParallelMath::Float MFloat;
-            typedef ParallelMath::UInt16 MUInt16;
-            typedef ParallelMath::UInt15 MUInt15;
-            typedef ParallelMath::AInt16 MAInt16;
-            typedef ParallelMath::SInt16 MSInt16;
-            typedef ParallelMath::SInt32 MSInt32;
-
-            MFloat m_tv[TVectorSize];
-            MFloat m_v[TVectorSize];
-            MFloat m_tt;
-            MFloat m_t;
-            MFloat m_w;
-            int m_wu;
-
-            float m_rcpMaxIndex;
-            float m_channelWeights[TVectorSize];
-            float m_rcpChannelWeights[TVectorSize];
-
-            void Init(int indexRange, const float channelWeights[TVectorSize])
-            {
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    m_tv[ch] = ParallelMath::MakeFloatZero();
-                    m_v[ch] = ParallelMath::MakeFloatZero();
-                }
-                m_tt = ParallelMath::MakeFloatZero();
-                m_t = ParallelMath::MakeFloatZero();
-                m_w = ParallelMath::MakeFloatZero();
-
-                m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1);
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    m_channelWeights[ch] = channelWeights[ch];
-                    m_rcpChannelWeights[ch] = 1.0f;
-                    if (m_channelWeights[ch] != 0.0f)
-                        m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch];
-                }
-
-                m_wu = 0;
-            }
-
-            void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight)
-            {
-                MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    MFloat v = pwFloatPixel[ch] * weight;
-
-                    m_tv[ch] = m_tv[ch] + t * v;
-                    m_v[ch] = m_v[ch] + v;
-                }
-                m_tt = m_tt + weight * t * t;
-                m_t = m_t + weight * t;
-                m_w = m_w + weight;
-            }
-
-            void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels)
-            {
-                MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
-
-                for (int ch = 0; ch < numRealChannels; ch++)
-                {
-                    MFloat v = pwFloatPixel[ch];
-
-                    m_tv[ch] = m_tv[ch] + t * v;
-                    m_v[ch] = m_v[ch] + v;
-                }
-                m_tt = m_tt + t * t;
-                m_t = m_t + t;
-                m_wu++;
-            }
-
-            void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index)
-            {
-                ContributeUnweightedPW(floatPixel, index, TVectorSize);
-            }
-
-            void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize])
-            {
-                // a = (tv - t*v/w)/(tt - t*t/w)
-                // b = (v - a*t)/w
-                MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));
-
-                ParallelMath::MakeSafeDenominator(w);
-                MFloat wRcp = ParallelMath::Reciprocal(w);
-
-                MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;
-
-                ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());
-                ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f));
-
-                for (int ch = 0; ch < TVectorSize; ch++)
-                {
-                    /*
-                    if (adenom == 0.0)
-                        p1 = p2 = er.v / er.w;
-                    else
-                    {
-                        float4 a = (er.tv - er.t*er.v / er.w) / adenom;
-                        float4 b = (er.v - a * er.t) / er.w;
-                        p1 = b;
-                        p2 = a + b;
-                    }
-                    */
-
-                    MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;
-                    MFloat b = (m_v[ch] - a * m_t) * wRcp;
-
-                    MFloat p1 = b;
-                    MFloat p2 = a + b;
-
-                    ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));
-                    ParallelMath::ConditionalSet(p2, adenomZero, p1);
-
-                    // Unweight
-                    float inverseWeight = m_rcpChannelWeights[ch];
-
-                    endPoint[0][ch] = p1 * inverseWeight;
-                    endPoint[1][ch] = p2 * inverseWeight;
-                }
-            }
-
-            void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                MFloat floatEndPoint[2][TVectorSize];
-                GetRefinedEndpoints(floatEndPoint);
-
-                for (int epi = 0; epi < 2; epi++)
-                    for (int ch = 0; ch < TVectorSize; ch++)
-                        endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode);
-            }
-
-            void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);
-            }
-
-            void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                MFloat floatEndPoint[2][TVectorSize];
-                GetRefinedEndpoints(floatEndPoint);
-
-                for (int epi = 0; epi < 2; epi++)
-                {
-                    for (int ch = 0; ch < TVectorSize; ch++)
-                    {
-                        MFloat f = floatEndPoint[epi][ch];
-                        if (isSigned)
-                            endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode));
-                        else
-                            endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode));
-                    }
-                }
-            }
-        };
-
-        template<int TVectorSize>
-        class AggregatedError
-        {
-        public:
-            typedef ParallelMath::UInt16 MUInt16;
-            typedef ParallelMath::UInt31 MUInt31;
-            typedef ParallelMath::Float MFloat;
-
-            AggregatedError()
-            {
-                for (int ch = 0; ch < TVectorSize; ch++)
-                    m_errorUnweighted[ch] = ParallelMath::MakeUInt31(0);
-            }
-
-            void Add(const MUInt16 &channelErrorUnweighted, int ch)
-            {
-                m_errorUnweighted[ch] = m_errorUnweighted[ch] + ParallelMath::ToUInt31(channelErrorUnweighted);
-            }
-
-            MFloat Finalize(uint32_t flags, const float channelWeightsSq[TVectorSize]) const
-            {
-                if (flags & cvtt::Flags::Uniform)
-                {
-                    MUInt31 total = m_errorUnweighted[0];
-                    for (int ch = 1; ch < TVectorSize; ch++)
-                        total = total + m_errorUnweighted[ch];
-                    return ParallelMath::ToFloat(total);
-                }
-                else
-                {
-                    MFloat total = ParallelMath::ToFloat(m_errorUnweighted[0]) * channelWeightsSq[0];
-                    for (int ch = 1; ch < TVectorSize; ch++)
-                        total = total + ParallelMath::ToFloat(m_errorUnweighted[ch]) * channelWeightsSq[ch];
-                    return total;
-                }
-            }
-
-        private:
-            MUInt31 m_errorUnweighted[TVectorSize];
-        };
-
-        class BCCommon
-        {
-        public:
-            typedef ParallelMath::Float MFloat;
-            typedef ParallelMath::UInt16 MUInt16;
-            typedef ParallelMath::UInt15 MUInt15;
-            typedef ParallelMath::AInt16 MAInt16;
-            typedef ParallelMath::SInt16 MSInt16;
-            typedef ParallelMath::SInt32 MSInt32;
-
-            static int TweakRoundsForRange(int range)
-            {
-                if (range == 3)
-                    return 3;
-                return 4;
-            }
-
-            template<int TVectorSize>
-            static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, AggregatedError<TVectorSize> &aggError)
-            {
-                for (int ch = 0; ch < numRealChannels; ch++)
-                    aggError.Add(ParallelMath::SqDiffUInt8(reconstructed[ch], original[ch]), ch);
-            }
-
-            template<int TVectorSize>
-            static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], AggregatedError<TVectorSize> &aggError)
-            {
-                ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, TVectorSize, aggError);
-            }
-
-            template<int TVectorSize>
-            static MFloat ComputeErrorLDRSimple(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, const float *channelWeightsSq)
-            {
-                AggregatedError<TVectorSize> aggError;
-                ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, numRealChannels, aggError);
-                return aggError.Finalize(flags, channelWeightsSq);
-            }
-
-            template<int TVectorSize>
-            static MFloat ComputeErrorHDRFast(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize])
-            {
-                MFloat error = ParallelMath::MakeFloatZero();
-                if (flags & Flags::Uniform)
-                {
-                    for (int ch = 0; ch < TVectorSize; ch++)
-                        error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]);
-                }
-                else
-                {
-                    for (int ch = 0; ch < TVectorSize; ch++)
-                        error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]);
-                }
-
-                return error;
-            }
-
-            template<int TVectorSize>
-            static MFloat ComputeErrorHDRSlow(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize])
-            {
-                MFloat error = ParallelMath::MakeFloatZero();
-                if (flags & Flags::Uniform)
-                {
-                    for (int ch = 0; ch < TVectorSize; ch++)
-                        error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]);
-                }
-                else
-                {
-                    for (int ch = 0; ch < TVectorSize; ch++)
-                        error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]);
-                }
-
-                return error;
-            }
-
-            template<int TChannelCount>
-            static void PreWeightPixelsLDR(MFloat preWeightedPixels[16][TChannelCount], const MUInt15 pixels[16][TChannelCount], const float channelWeights[TChannelCount])
-            {
-                for (int px = 0; px < 16; px++)
-                {
-                    for (int ch = 0; ch < TChannelCount; ch++)
-                        preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch];
-                }
-            }
-
-            template<int TChannelCount>
-            static void PreWeightPixelsHDR(MFloat preWeightedPixels[16][TChannelCount], const MSInt16 pixels[16][TChannelCount], const float channelWeights[TChannelCount])
-            {
-                for (int px = 0; px < 16; px++)
-                {
-                    for (int ch = 0; ch < TChannelCount; ch++)
-                        preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch];
-                }
-            }
-        };
-
-        class BC7Computer
-        {
-        public:
-            static const int MaxTweakRounds = 4;
-
-            typedef ParallelMath::SInt16 MSInt16;
-            typedef ParallelMath::UInt15 MUInt15;
-            typedef ParallelMath::UInt16 MUInt16;
-            typedef ParallelMath::SInt32 MSInt32;
-            typedef ParallelMath::Float MFloat;
-
-            struct WorkInfo
-            {
-                MUInt15 m_mode;
-                MFloat m_error;
-                MUInt15 m_ep[3][2][4];
-                MUInt15 m_indexes[16];
-                MUInt15 m_indexes2[16];
-
-                union
-                {
-                    MUInt15 m_partition;
-                    struct IndexSelectorAndRotation
-                    {
-                        MUInt15 m_indexSelector;
-                        MUInt15 m_rotation;
-                    } m_isr;
-                } m_u;
-            };
-
-            static void TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2])
-            {
-                ParallelMath::RoundTowardNearestForScope roundingMode;
-
-                float tf[2];
-                ComputeTweakFactors(tweak, range, tf);
-
-                MFloat base = ParallelMath::ToFloat(original[0]);
-                MFloat offs = ParallelMath::ToFloat(original[1]) - base;
-
-                result[0] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[0], 0.0f, 255.0f), &roundingMode);
-                result[1] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[1], 0.0f, 255.0f), &roundingMode);
-            }
-
-            static void Quantize(MUInt15* color, int bits, int channels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                float maxColor = static_cast<float>((1 << bits) - 1);
-
-                for (int i = 0; i < channels; i++)
-                    color[i] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(ParallelMath::ToFloat(color[i]) * ParallelMath::MakeFloat(1.0f / 255.0f) * maxColor, 0.f, 255.f), roundingMode);
-            }
-
-            static void QuantizeP(MUInt15* color, int bits, uint16_t p, int channels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                uint16_t pShift = static_cast<uint16_t>(1 << (7 - bits));
-                MUInt15 pShiftV = ParallelMath::MakeUInt15(pShift);
-
-                float maxColorF = static_cast<float>(255 - (1 << (7 - bits)));
-
-                float maxQuantized = static_cast<float>((1 << bits) - 1);
-
-                for (int ch = 0; ch < channels; ch++)
-                {
-                    MUInt15 clr = color[ch];
-                    if (p)
-                        clr = ParallelMath::Max(clr, pShiftV) - pShiftV;
-
-                    MFloat rerangedColor = ParallelMath::ToFloat(clr) * maxQuantized / maxColorF;
-
-                    clr = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(rerangedColor, 0.0f, maxQuantized), roundingMode) << 1;
-                    if (p)
-                        clr = clr | ParallelMath::MakeUInt15(1);
-
-                    color[ch] = clr;
-                }
-            }
-
-            static void Unquantize(MUInt15* color, int bits, int channels)
-            {
-                for (int ch = 0; ch < channels; ch++)
-                {
-                    MUInt15 clr = color[ch];
-                    clr = clr << (8 - bits);
-                    color[ch] = clr | ParallelMath::RightShift(clr, bits);
-                }
-            }
-
-            static void CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2], const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                for (int j = 0; j < 2; j++)
-                {
-                    QuantizeP(ep[j], 4, p[j], 3, roundingMode);
-                    Unquantize(ep[j], 5, 3);
-                    ep[j][3] = ParallelMath::MakeUInt15(255);
-                }
-            }
-
-            static void CompressEndpoints1(MUInt15 ep[2][4], uint16_t p, const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                for (int j = 0; j < 2; j++)
-                {
-                    QuantizeP(ep[j], 6, p, 3, roundingMode);
-                    Unquantize(ep[j], 7, 3);
-                    ep[j][3] = ParallelMath::MakeUInt15(255);
-                }
-            }
-
-            static void CompressEndpoints2(MUInt15 ep[2][4], const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                for (int j = 0; j < 2; j++)
-                {
-                    Quantize(ep[j], 5, 3, roundingMode);
-                    Unquantize(ep[j], 5, 3);
-                    ep[j][3] = ParallelMath::MakeUInt15(255);
-                }
-            }
-
-            static void CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2], const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                for (int j = 0; j < 2; j++)
-                {
-                    QuantizeP(ep[j], 7, p[j], 3, roundingMode);
-                    ep[j][3] = ParallelMath::MakeUInt15(255);
-                }
-            }
-
-            static void CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2], const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                for (int j = 0; j < 2; j++)
-                {
-                    Quantize(epRGB[j], 5, 3, roundingMode);
-                    Unquantize(epRGB[j], 5, 3);
-
-                    Quantize(epA + j, 6, 1, roundingMode);
-                    Unquantize(epA + j, 6, 1);
-                }
-            }
-
-            static void CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2], const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                for (int j = 0; j < 2; j++)
-                {
-                    Quantize(epRGB[j], 7, 3, roundingMode);
-                    Unquantize(epRGB[j], 7, 3);
-                }
-
-                // Alpha is full precision
-                (void)epA;
-            }
-
-            static void CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2], const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                for (int j = 0; j < 2; j++)
-                    QuantizeP(ep[j], 7, p[j], 4, roundingMode);
-            }
-
-            static void CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2], const ParallelMath::RoundTowardNearestForScope *roundingMode)
-            {
-                for (int j = 0; j < 2; j++)
-                {
-                    QuantizeP(ep[j], 5, p[j], 4, roundingMode);
-                    Unquantize(ep[j], 6, 4);
-                }
-            }
-
-            struct SinglePlaneTemporaries
-            {
-                UnfinishedEndpoints<3> unfinishedRGB[BC7Data::g_numShapesAll];
-                UnfinishedEndpoints<4> unfinishedRGBA[BC7Data::g_numShapes12];
-
-                MUInt15 fragmentBestIndexes[BC7Data::g_numFragments];
-                MUInt15 shapeBestEP[BC7Data::g_maxFragmentsPerMode][2][4];
-                MFloat shapeBestError[BC7Data::g_maxFragmentsPerMode];
-            };
-
-            static void TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn)
-            {
-                MFloat bestAverageError = ParallelMath::MakeFloat(FLT_MAX);
-
-                MUInt15 intAverage[4];
-                for (int ch = 0; ch < 4; ch++)
-                    intAverage[ch] = ParallelMath::RoundAndConvertToU15(average[ch], rtn);
-
-                MUInt15 eps[2][4];
-                MUInt15 reconstructed[4];
-                MUInt15 index = ParallelMath::MakeUInt15(0);
-
-                for (int epi = 0; epi < 2; epi++)
-                {
-                    for (int ch = 0; ch < 3; ch++)
-                        eps[epi][ch] = ParallelMath::MakeUInt15(0);
-                    eps[epi][3] = ParallelMath::MakeUInt15(255);
-                }
-
-                for (int ch = 0; ch < 3; ch++)
-                    reconstructed[ch] = ParallelMath::MakeUInt15(0);
-                reconstructed[3] = ParallelMath::MakeUInt15(255);
-
-                // Depending on the target index and parity bits, there are multiple valid solid colors.
-                // We want to find the one closest to the actual average.
-                MFloat epsAverageDiff = ParallelMath::MakeFloat(FLT_MAX);
-                for (int t = 0; t < numTables; t++)
-                {
-                    const cvtt::Tables::BC7SC::Table& table = *(tables[t]);
-
-                    ParallelMath::Int16CompFlag pti = punchThroughInvalid[table.m_pBits];
-
-                    MUInt15 candidateReconstructed[4];
-                    MUInt15 candidateEPs[2][4];
-
-                    for (int i = 0; i < ParallelMath::ParallelSize; i++)
-                    {
-                        for (int ch = 0; ch < numRealChannels; ch++)
-                        {
-                            ParallelMath::ScalarUInt16 avgValue = ParallelMath::Extract(intAverage[ch], i);
-                            assert(avgValue >= 0 && avgValue <= 255);
-
-                            const cvtt::Tables::BC7SC::TableEntry &entry = table.m_entries[avgValue];
-
-                            ParallelMath::PutUInt15(candidateEPs[0][ch], i, entry.m_min);
-                            ParallelMath::PutUInt15(candidateEPs[1][ch], i, entry.m_max);
-                            ParallelMath::PutUInt15(candidateReconstructed[ch], i, entry.m_actualColor);
-                        }
-                    }
-
-                    MFloat avgError = ParallelMath::MakeFloatZero();
-                    for (int ch = 0; ch < numRealChannels; ch++)
-                    {
-                        MFloat delta = ParallelMath::ToFloat(candidateReconstructed[ch]) - average[ch];
-                        avgError = avgError + delta * delta * channelWeightsSq[ch];
-                    }
-
-                    ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(avgError, bestAverageError));
-                    better = ParallelMath::AndNot(pti, better); // Mask out punch-through invalidations
-
-                    if (ParallelMath::AnySet(better))
-                    {
-                        ParallelMath::ConditionalSet(bestAverageError, ParallelMath::Int16FlagToFloat(better), avgError);
-
-                        MUInt15 candidateIndex = ParallelMath::MakeUInt15(table.m_index);
-
-                        ParallelMath::ConditionalSet(index, better, candidateIndex);
-
-                        for (int ch = 0; ch < numRealChannels; ch++)
-                            ParallelMath::ConditionalSet(reconstructed[ch], better, candidateReconstructed[ch]);
-
-                        for (int epi = 0; epi < 2; epi++)
-                            for (int ch = 0; ch < numRealChannels; ch++)
-                                ParallelMath::ConditionalSet(eps[epi][ch], better, candidateEPs[epi][ch]);
-                    }
-                }
-
-                AggregatedError<4> aggError;
-                for (int pxi = 0; pxi < shapeLength; pxi++)
-                {
-                    int px = fragmentStart[pxi];
-
-                    BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError);
-                }
-
-                MFloat error = aggError.Finalize(flags, channelWeightsSq) + staticAlphaError;
-
-                ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, shapeBestError));
-                if (ParallelMath::AnySet(better))
-                {
-                    shapeBestError = ParallelMath::Min(shapeBestError, error);
-                    for (int epi = 0; epi < 2; epi++)
-                    {
-                        for (int ch = 0; ch < numRealChannels; ch++)
-                            ParallelMath::ConditionalSet(shapeBestEP[epi][ch], better, eps[epi][ch]);
-                    }
-
-                    for (int pxi = 0; pxi < shapeLength; pxi++)
-                        ParallelMath::ConditionalSet(fragmentBestIndexes[pxi], better, index);
-                }
-            }
-
-
-            static void TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], int numTweakRounds, int numRefineRounds, WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn)
-            {
-                if (numRefineRounds < 1)
-                    numRefineRounds = 1;
-
-                if (numTweakRounds < 1)
-                    numTweakRounds = 1;
-                else if (numTweakRounds > MaxTweakRounds)
-                    numTweakRounds = MaxTweakRounds;
-
-                float channelWeightsSq[4];
-
-                for (int ch = 0; ch < 4; ch++)
-                    channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
-
-                SinglePlaneTemporaries temps;
-
-                MUInt15 maxAlpha = ParallelMath::MakeUInt15(0);
-                MUInt15 minAlpha = ParallelMath::MakeUInt15(255);
-                ParallelMath::Int16CompFlag isPunchThrough = ParallelMath::MakeBoolInt16(true);
-                for (int px = 0; px < 16; px++)
-                {
-                    MUInt15 a = pixels[px][3];
-                    maxAlpha = ParallelMath::Max(maxAlpha, a);
-                    minAlpha = ParallelMath::Min(minAlpha, a);
-
-                    isPunchThrough = (isPunchThrough & (ParallelMath::Equal(a, ParallelMath::MakeUInt15(0)) | ParallelMath::Equal(a, ParallelMath::MakeUInt15(255))));
-                }
-
-                ParallelMath::Int16CompFlag blockHasNonMaxAlpha = ParallelMath::Less(minAlpha, ParallelMath::MakeUInt15(255));
-                ParallelMath::Int16CompFlag blockHasNonZeroAlpha = ParallelMath::Less(ParallelMath::MakeUInt15(0), maxAlpha);
-
-                bool anyBlockHasAlpha = ParallelMath::AnySet(blockHasNonMaxAlpha);
-
-                // Try RGB modes if any block has a min alpha 251 or higher
-                bool allowRGBModes = ParallelMath::AnySet(ParallelMath::Less(ParallelMath::MakeUInt15(250), minAlpha));
-
-                // Try mode 7 if any block has alpha.
-                // Mode 7 is almost never selected for RGB blocks because mode 4 has very accurate 7.7.7.1 endpoints
-                // and its parity bit doesn't affect alpha, meaning mode 7 can only be better in extremely specific
-                // situations, and only by at most 1 unit of error per pixel.
-                bool allowMode7 = anyBlockHasAlpha;
-
-                MFloat preWeightedPixels[16][4];
-
-                BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights);
-
-                const int *rgbInitialEPCollapseList = NULL;
-
-                // Get initial RGB endpoints
-                if (allowRGBModes)
-                {
-                    const int *shapeList;
-                    int numShapesToEvaluate;
-
-                    if (flags & Flags::BC7_EnablePartitioning)
-                    {
-                        if (flags & Flags::BC7_Enable3Subsets)
-                        {
-                            shapeList = BC7Data::g_shapeListAll;
-                            rgbInitialEPCollapseList = BC7Data::g_shapeListAll;
-                            numShapesToEvaluate = BC7Data::g_numShapesAll;
-                        }
-                        else
-                        {
-                            shapeList = BC7Data::g_shapeList12;
-                            rgbInitialEPCollapseList = BC7Data::g_shapeList12Collapse;
-                            numShapesToEvaluate = BC7Data::g_numShapes12;
-                        }
-                    }
-                    else
-                    {
-                        shapeList = BC7Data::g_shapeList1;
-                        rgbInitialEPCollapseList = BC7Data::g_shapeList1Collapse;
-                        numShapesToEvaluate = BC7Data::g_numShapes1;
-                    }
-
-                    for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++)
-                    {
-                        int shape = shapeList[shapeIter];
-
-                        int shapeStart = BC7Data::g_shapeRanges[shape][0];
-                        int shapeSize = BC7Data::g_shapeRanges[shape][1];
-
-                        EndpointSelector<3, 8> epSelector;
-
-                        for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
-                        {
-                            for (int spx = 0; spx < shapeSize; spx++)
-                            {
-                                int px = BC7Data::g_fragments[shapeStart + spx];
-                                epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f));
-                            }
-                            epSelector.FinishPass(epPass);
-                        }
-                        temps.unfinishedRGB[shapeIter] = epSelector.GetEndpoints(channelWeights);
-                    }
-                }
-
-                const int *rgbaInitialEPCollapseList = BC7Data::g_shapeList12Collapse;
-
-                // Get initial RGBA endpoints
-                {
-                    const int *shapeList = BC7Data::g_shapeList12;
-                    int numShapesToEvaluate = BC7Data::g_numShapes12;
-
-                    for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++)
-                    {
-                        int shape = shapeList[shapeIter];
-
-                        if (anyBlockHasAlpha || !allowRGBModes)
-                        {
-                            int shapeStart = BC7Data::g_shapeRanges[shape][0];
-                            int shapeSize = BC7Data::g_shapeRanges[shape][1];
-
-                            EndpointSelector<4, 8> epSelector;
-
-                            for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
-                            {
-                                for (int spx = 0; spx < shapeSize; spx++)
-                                {
-                                    int px = BC7Data::g_fragments[shapeStart + spx];
-                                    epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f));
-                                }
-                                epSelector.FinishPass(epPass);
-                            }
-                            temps.unfinishedRGBA[shapeIter] = epSelector.GetEndpoints(channelWeights);
-                        }
-                        else
-                        {
-                            temps.unfinishedRGBA[shapeIter] = temps.unfinishedRGB[rgbInitialEPCollapseList[shape]].ExpandTo<4>(255);
-                        }
-                    }
-                }
-
-                for (uint16_t mode = 0; mode <= 7; mode++)
-                {
-                    if (!(flags & Flags::BC7_EnablePartitioning) && BC7Data::g_modes[mode].m_numSubsets != 1)
-                        continue;
-
-                    if (!(flags & Flags::BC7_Enable3Subsets) && BC7Data::g_modes[mode].m_numSubsets == 3)
-                        continue;
-
-                    if (mode == 4 || mode == 5)
-                        continue;
-
-                    if (mode < 4 && !allowRGBModes)
-                        continue;
-
-                    if (mode == 7 && !allowMode7)
-                        continue;
-
-                    bool isRGB = (mode < 4);
-
-                    unsigned int numPartitions = 1 << BC7Data::g_modes[mode].m_partitionBits;
-                    int numSubsets = BC7Data::g_modes[mode].m_numSubsets;
-                    int indexPrec = BC7Data::g_modes[mode].m_indexBits;
-
-                    int parityBitMax = 1;
-                    if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerEndpoint)
-                        parityBitMax = 4;
-                    else if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerSubset)
-                        parityBitMax = 2;
-
-                    int numRealChannels = isRGB ? 3 : 4;
-
-                    int numShapes;
-                    const int *shapeList;
-                    const int *shapeCollapseList;
-
-                    if (numSubsets == 1)
-                    {
-                        numShapes = BC7Data::g_numShapes1;
-                        shapeList = BC7Data::g_shapeList1;
-                        shapeCollapseList = BC7Data::g_shapeList1Collapse;
-                    }
-                    else if (numSubsets == 2)
-                    {
-                        numShapes = BC7Data::g_numShapes2;
-                        shapeList = BC7Data::g_shapeList2;
-                        shapeCollapseList = BC7Data::g_shapeList2Collapse;
-                    }
-                    else
-                    {
-                        assert(numSubsets == 3);
-                        if (numPartitions == 16)
-                        {
-                            numShapes = BC7Data::g_numShapes3Short;
-                            shapeList = BC7Data::g_shapeList3Short;
-                            shapeCollapseList = BC7Data::g_shapeList3ShortCollapse;
-                        }
-                        else
-                        {
-                            assert(numPartitions == 64);
-                            numShapes = BC7Data::g_numShapes3;
-                            shapeList = BC7Data::g_shapeList3;
-                            shapeCollapseList = BC7Data::g_shapeList3Collapse;
-                        }
-                    }
-
-                    for (int slot = 0; slot < BC7Data::g_maxFragmentsPerMode; slot++)
-                        temps.shapeBestError[slot] = ParallelMath::MakeFloat(FLT_MAX);
-
-                    for (int shapeIter = 0; shapeIter < numShapes; shapeIter++)
-                    {
-                        int shape = shapeList[shapeIter];
-                        int shapeStart = BC7Data::g_shapeRanges[shape][0];
-                        int shapeLength = BC7Data::g_shapeRanges[shape][1];
-                        int shapeCollapsedEvalIndex = shapeCollapseList[shape];
-
-                        AggregatedError<1> alphaAggError;
-                        if (isRGB && anyBlockHasAlpha)
-                        {
-                            MUInt15 filledAlpha[1] = { ParallelMath::MakeUInt15(255) };
-
-                            for (int pxi = 0; pxi < shapeLength; pxi++)
-                            {
-                                int px = BC7Data::g_fragments[shapeStart + pxi];
-                                MUInt15 original[1] = { pixels[px][3] };
-                                BCCommon::ComputeErrorLDR<1>(flags, filledAlpha, original, alphaAggError);
-                            }
-                        }
-
-                        float alphaWeightsSq[1] = { channelWeightsSq[3] };
-                        MFloat staticAlphaError = alphaAggError.Finalize(flags, alphaWeightsSq);
-
-                        assert(shapeCollapsedEvalIndex >= 0);
-
-                        MUInt15 tweakBaseEP[MaxTweakRounds][2][4];
-
-                        for (int tweak = 0; tweak < numTweakRounds; tweak++)
-                        {
-                            if (isRGB)
-                            {
-                                temps.unfinishedRGB[rgbInitialEPCollapseList[shape]].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]);
-                                tweakBaseEP[tweak][0][3] = tweakBaseEP[tweak][1][3] = ParallelMath::MakeUInt15(255);
-                            }
-                            else
-                            {
-                                temps.unfinishedRGBA[rgbaInitialEPCollapseList[shape]].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]);
-                            }
-                        }
-
-                        ParallelMath::Int16CompFlag punchThroughInvalid[4];
-                        for (int pIter = 0; pIter < parityBitMax; pIter++)
-                        {
-                            punchThroughInvalid[pIter] = ParallelMath::MakeBoolInt16(false);
-
-                            if ((flags & Flags::BC7_RespectPunchThrough) && (mode == 6 || mode == 7))
-                            {
-                                // Modes 6 and 7 have parity bits that affect alpha
-                                if (pIter == 0)
-                                    punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonZeroAlpha);
-                                else if (pIter == parityBitMax - 1)
-                                    punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonMaxAlpha);
-                                else
-                                    punchThroughInvalid[pIter] = isPunchThrough;
-                            }
-                        }
-
-                        for (int pIter = 0; pIter < parityBitMax; pIter++)
-                        {
-                            if (ParallelMath::AllSet(punchThroughInvalid[pIter]))
-                                continue;
-
-                            bool needPunchThroughCheck = ParallelMath::AnySet(punchThroughInvalid[pIter]);
-
-                            for (int tweak = 0; tweak < numTweakRounds; tweak++)
-                            {
-                                uint16_t p[2];
-                                p[0] = (pIter & 1);
-                                p[1] = ((pIter >> 1) & 1);
-
-                                MUInt15 ep[2][4];
-
-                                for (int epi = 0; epi < 2; epi++)
-                                    for (int ch = 0; ch < 4; ch++)
-                                        ep[epi][ch] = tweakBaseEP[tweak][epi][ch];
-
-                                for (int refine = 0; refine < numRefineRounds; refine++)
-                                {
-                                    switch (mode)
-                                    {
-                                    case 0:
-                                        CompressEndpoints0(ep, p, rtn);
-                                        break;
-                                    case 1:
-                                        CompressEndpoints1(ep, p[0], rtn);
-                                        break;
-                                    case 2:
-                                        CompressEndpoints2(ep, rtn);
-                                        break;
-                                    case 3:
-                                        CompressEndpoints3(ep, p, rtn);
-                                        break;
-                                    case 6:
-                                        CompressEndpoints6(ep, p, rtn);
-                                        break;
-                                    case 7:
-                                        CompressEndpoints7(ep, p, rtn);
-                                        break;
-                                    default:
-                                        assert(false);
-                                        break;
-                                    };
-
-                                    MFloat shapeError = ParallelMath::MakeFloatZero();
-
-                                    IndexSelector<4> indexSelector;
-                                    indexSelector.Init<false>(channelWeights, ep, 1 << indexPrec);
-
-                                    EndpointRefiner<4> epRefiner;
-                                    epRefiner.Init(1 << indexPrec, channelWeights);
-
-                                    MUInt15 indexes[16];
-
-                                    AggregatedError<4> aggError;
-                                    for (int pxi = 0; pxi < shapeLength; pxi++)
-                                    {
-                                        int px = BC7Data::g_fragments[shapeStart + pxi];
-
-                                        MUInt15 index;
-                                        MUInt15 reconstructed[4];
-
-                                        index = indexSelector.SelectIndexLDR(floatPixels[px], rtn);
-                                        indexSelector.ReconstructLDR_BC7(index, reconstructed, numRealChannels);
-
-                                        if (flags & cvtt::Flags::BC7_FastIndexing)
-                                            BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError);
-                                        else
-                                        {
-                                            MFloat error = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq);
-
-                                            MUInt15 altIndexes[2];
-                                            altIndexes[0] = ParallelMath::Max(index, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
-                                            altIndexes[1] = ParallelMath::Min(index + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << indexPrec) - 1)));
-
-                                            for (int ii = 0; ii < 2; ii++)
-                                            {
-                                                indexSelector.ReconstructLDR_BC7(altIndexes[ii], reconstructed, numRealChannels);
-
-                                                MFloat altError = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq);
-                                                ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altError, error));
-                                                error = ParallelMath::Min(error, altError);
-                                                ParallelMath::ConditionalSet(index, better, altIndexes[ii]);
-                                            }
-
-                                            shapeError = shapeError + error;
-                                        }
-
-                                        if (refine != numRefineRounds - 1)
-                                            epRefiner.ContributeUnweightedPW(preWeightedPixels[px], index, numRealChannels);
-
-                                        indexes[pxi] = index;
-                                    }
-
-                                    if (flags & cvtt::Flags::BC7_FastIndexing)
-                                        shapeError = aggError.Finalize(flags, channelWeightsSq);
-
-                                    if (isRGB)
-                                        shapeError = shapeError + staticAlphaError;
-
-                                    ParallelMath::FloatCompFlag shapeErrorBetter;
-                                    ParallelMath::Int16CompFlag shapeErrorBetter16;
-
-                                    shapeErrorBetter = ParallelMath::Less(shapeError, temps.shapeBestError[shapeCollapsedEvalIndex]);
-                                    shapeErrorBetter16 = ParallelMath::FloatFlagToInt16(shapeErrorBetter);
-
-                                    if (ParallelMath::AnySet(shapeErrorBetter16))
-                                    {
-                                        bool punchThroughOK = true;
-                                        if (needPunchThroughCheck)
-                                        {
-                                            shapeErrorBetter16 = ParallelMath::AndNot(punchThroughInvalid[pIter], shapeErrorBetter16);
-                                            shapeErrorBetter = ParallelMath::Int16FlagToFloat(shapeErrorBetter16);
-
-                                            if (!ParallelMath::AnySet(shapeErrorBetter16))
-                                                punchThroughOK = false;
-                                        }
-
-                                        if (punchThroughOK)
-                                        {
-                                            ParallelMath::ConditionalSet(temps.shapeBestError[shapeCollapsedEvalIndex], shapeErrorBetter, shapeError);
-                                            for (int epi = 0; epi < 2; epi++)
-                                                for (int ch = 0; ch < numRealChannels; ch++)
-                                                    ParallelMath::ConditionalSet(temps.shapeBestEP[shapeCollapsedEvalIndex][epi][ch], shapeErrorBetter16, ep[epi][ch]);
-
-                                            for (int pxi = 0; pxi < shapeLength; pxi++)
-                                                ParallelMath::ConditionalSet(temps.fragmentBestIndexes[shapeStart + pxi], shapeErrorBetter16, indexes[pxi]);
-                                        }
-                                    }
-
-                                    if (refine != numRefineRounds - 1)
-                                        epRefiner.GetRefinedEndpointsLDR(ep, numRealChannels, rtn);
-                                } // refine
-                            } // tweak
-                        } // p
-
-                        if (flags & cvtt::Flags::BC7_TrySingleColor)
-                        {
-                            MUInt15 total[4];
-                            for (int ch = 0; ch < 4; ch++)
-                                total[ch] = ParallelMath::MakeUInt15(0);
-
-                            for (int pxi = 0; pxi < shapeLength; pxi++)
-                            {
-                                int px = BC7Data::g_fragments[shapeStart + pxi];
-                                for (int ch = 0; ch < 4; ch++)
-                                    total[ch] = total[ch] + pixels[pxi][ch];
-                            }
-
-                            MFloat rcpShapeLength = ParallelMath::MakeFloat(1.0f / static_cast<float>(shapeLength));
-                            MFloat average[4];
-                            for (int ch = 0; ch < 4; ch++)
-                                average[ch] = ParallelMath::ToFloat(total[ch]) * rcpShapeLength;
-
-                            const uint8_t *fragment = BC7Data::g_fragments + shapeStart;
-                            MFloat &shapeBestError = temps.shapeBestError[shapeCollapsedEvalIndex];
-                            MUInt15(&shapeBestEP)[2][4] = temps.shapeBestEP[shapeCollapsedEvalIndex];
-                            MUInt15 *fragmentBestIndexes = temps.fragmentBestIndexes + shapeStart;
-
-                            const cvtt::Tables::BC7SC::Table **scTables = NULL;
-                            int numSCTables = 0;
-
-                            switch (mode)
-                            {
-                            case 0:
-                                {
-                                    const cvtt::Tables::BC7SC::Table *tables[] =
-                                    {
-                                        &cvtt::Tables::BC7SC::g_mode0_p00_i1,
-                                        &cvtt::Tables::BC7SC::g_mode0_p00_i2,
-                                        &cvtt::Tables::BC7SC::g_mode0_p00_i3,
-                                        &cvtt::Tables::BC7SC::g_mode0_p01_i1,
-                                        &cvtt::Tables::BC7SC::g_mode0_p01_i2,
-                                        &cvtt::Tables::BC7SC::g_mode0_p01_i3,
-                                        &cvtt::Tables::BC7SC::g_mode0_p10_i1,
-                                        &cvtt::Tables::BC7SC::g_mode0_p10_i2,
-                                        &cvtt::Tables::BC7SC::g_mode0_p10_i3,
-                                        &cvtt::Tables::BC7SC::g_mode0_p11_i1,
-                                        &cvtt::Tables::BC7SC::g_mode0_p11_i2,
-                                        &cvtt::Tables::BC7SC::g_mode0_p11_i3,
-                                    };
-                                    scTables = tables;
-                                    numSCTables = sizeof(tables) / sizeof(tables[0]);
-                                }
-                                break;
-                            case 1:
-                                {
-                                    const cvtt::Tables::BC7SC::Table *tables[] =
-                                    {
-                                        &cvtt::Tables::BC7SC::g_mode1_p0_i1,
-                                        &cvtt::Tables::BC7SC::g_mode1_p0_i2,
-                                        &cvtt::Tables::BC7SC::g_mode1_p0_i3,
-                                        &cvtt::Tables::BC7SC::g_mode1_p1_i1,
-                                        &cvtt::Tables::BC7SC::g_mode1_p1_i2,
-                                        &cvtt::Tables::BC7SC::g_mode1_p1_i3,
-                                    };
-                                    scTables = tables;
-                                    numSCTables = sizeof(tables) / sizeof(tables[0]);
-                                }
-                                break;
-                            case 2:
-                                {
-                                    const cvtt::Tables::BC7SC::Table *tables[] =
-                                    {
-                                        &cvtt::Tables::BC7SC::g_mode2,
-                                    };
-                                    scTables = tables;
-                                    numSCTables = sizeof(tables) / sizeof(tables[0]);
-                                }
-                                break;
-                            case 3:
-                                {
-                                    const cvtt::Tables::BC7SC::Table *tables[] =
-                                    {
-                                        &cvtt::Tables::BC7SC::g_mode3_p0,
-                                        &cvtt::Tables::BC7SC::g_mode3_p1,
-                                    };
-                                    scTables = tables;
-                                    numSCTables = sizeof(tables) / sizeof(tables[0]);
-                                }
-                                break;
-                            case 6:
-                                {
-                                    const cvtt::Tables::BC7SC::Table *tables[] =
-                                    {
-                                        &cvtt::Tables::BC7SC::g_mode6_p0_i1,
-                                        &cvtt::Tables::BC7SC::g_mode6_p0_i2,
-                                        &cvtt::Tables::BC7SC::g_mode6_p0_i3,
-                                        &cvtt::Tables::BC7SC::g_mode6_p0_i4,
-                                        &cvtt::Tables::BC7SC::g_mode6_p0_i5,
-                                        &cvtt::Tables::BC7SC::g_mode6_p0_i6,
-                                        &cvtt::Tables::BC7SC::g_mode6_p0_i7,
-                                        &cvtt::Tables::BC7SC::g_mode6_p1_i1,
-                                        &cvtt::Tables::BC7SC::g_mode6_p1_i2,
-                                        &cvtt::Tables::BC7SC::g_mode6_p1_i3,
-                                        &cvtt::Tables::BC7SC::g_mode6_p1_i4,
-                                        &cvtt::Tables::BC7SC::g_mode6_p1_i5,
-                                        &cvtt::Tables::BC7SC::g_mode6_p1_i6,
-                                        &cvtt::Tables::BC7SC::g_mode6_p1_i7,
-                                    };
-                                    scTables = tables;
-                                    numSCTables = sizeof(tables) / sizeof(tables[0]);
-                                }
-                                break;
-                            case 7:
-                                {
-                                    const cvtt::Tables::BC7SC::Table *tables[] =
-                                    {
-                                        &cvtt::Tables::BC7SC::g_mode7_p00,
-                                        &cvtt::Tables::BC7SC::g_mode7_p01,
-                                        &cvtt::Tables::BC7SC::g_mode7_p10,
-                                        &cvtt::Tables::BC7SC::g_mode7_p11,
-                                    };
-                                    scTables = tables;
-                                    numSCTables = sizeof(tables) / sizeof(tables[0]);
-                                }
-                                break;
-                            default:
-                                assert(false);
-                                break;
-                            }
-
-                            TrySingleColorRGBAMultiTable(flags, pixels, average, numRealChannels, fragment, shapeLength, staticAlphaError, punchThroughInvalid, shapeBestError, shapeBestEP, fragmentBestIndexes, channelWeightsSq, scTables, numSCTables, rtn);
-                        }
-                    } // shapeIter
-
-                    for (uint16_t partition = 0; partition < numPartitions; partition++)
-                    {
-                        const int *partitionShapes;
-                        if (numSubsets == 1)
-                            partitionShapes = BC7Data::g_shapes1[partition];
-                        else if (numSubsets == 2)
-                            partitionShapes = BC7Data::g_shapes2[partition];
-                        else
-                        {
-                            assert(numSubsets == 3);
-                            partitionShapes = BC7Data::g_shapes3[partition];
-                        }
-
-                        MFloat totalError = ParallelMath::MakeFloatZero();
-                        for (int subset = 0; subset < numSubsets; subset++)
-                            totalError = totalError + temps.shapeBestError[shapeCollapseList[partitionShapes[subset]]];
-
-                        ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(totalError, work.m_error);
-                        ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
-
-                        if (ParallelMath::AnySet(errorBetter16))
-                        {
-                            for (int subset = 0; subset < numSubsets; subset++)
-                            {
-                                int shape = partitionShapes[subset];
-                                int shapeStart = BC7Data::g_shapeRanges[shape][0];
-                                int shapeLength = BC7Data::g_shapeRanges[shape][1];
-                                int shapeCollapsedEvalIndex = shapeCollapseList[shape];
-
-                                for (int epi = 0; epi < 2; epi++)
-                                    for (int ch = 0; ch < 4; ch++)
-                                        ParallelMath::ConditionalSet(work.m_ep[subset][epi][ch], errorBetter16, temps.shapeBestEP[shapeCollapsedEvalIndex][epi][ch]);
-
-                                for (int pxi = 0; pxi < shapeLength; pxi++)
-                                {
-                                    int px = BC7Data::g_fragments[shapeStart + pxi];
-                                    ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, temps.fragmentBestIndexes[shapeStart + pxi]);
-                                }
-                            }
-
-                            work.m_error = ParallelMath::Min(totalError, work.m_error);
-                            ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode));
-                            ParallelMath::ConditionalSet(work.m_u.m_partition, errorBetter16, ParallelMath::MakeUInt15(partition));
-                        }
-                    }
-                }
-            }
-
-            static void TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], int numTweakRounds, int numRefineRounds, WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn)
-            {
-                // TODO: These error calculations are not optimal for weight-by-alpha, but this routine needs to be mostly rewritten for that.
-                // The alpha/color solutions are co-dependent in that case, but a good way to solve it would probably be to
-                // solve the alpha channel first, then solve the RGB channels, which in turn breaks down into two cases:
-                // - Separate alpha channel, then weighted RGB
-                // - Alpha+2 other channels, then the independent channel
-
-                if (!(flags & Flags::BC7_EnableDualPlane))
-                    return;
-
-                if (numRefineRounds < 1)
-                    numRefineRounds = 1;
-
-                if (numTweakRounds < 1)
-                    numTweakRounds = 1;
-                else if (numTweakRounds > MaxTweakRounds)
-                    numTweakRounds = MaxTweakRounds;
-
-                float channelWeightsSq[4];
-                for (int ch = 0; ch < 4; ch++)
-                    channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
-
-                for (uint16_t mode = 4; mode <= 5; mode++)
-                {
-                    for (uint16_t rotation = 0; rotation < 4; rotation++)
-                    {
-                        int alphaChannel = (rotation + 3) & 3;
-                        int redChannel = (rotation == 1) ? 3 : 0;
-                        int greenChannel = (rotation == 2) ? 3 : 1;
-                        int blueChannel = (rotation == 3) ? 3 : 2;
-
-                        MUInt15 rotatedRGB[16][3];
-                        MFloat floatRotatedRGB[16][3];
-
-                        for (int px = 0; px < 16; px++)
-                        {
-                            rotatedRGB[px][0] = pixels[px][redChannel];
-                            rotatedRGB[px][1] = pixels[px][greenChannel];
-                            rotatedRGB[px][2] = pixels[px][blueChannel];
-
-                            for (int ch = 0; ch < 3; ch++)
-                                floatRotatedRGB[px][ch] = ParallelMath::ToFloat(rotatedRGB[px][ch]);
-                        }
-
-                        uint16_t maxIndexSelector = (mode == 4) ? 2 : 1;
-
-                        float rotatedRGBWeights[3] = { channelWeights[redChannel], channelWeights[greenChannel], channelWeights[blueChannel] };
-                        float rotatedRGBWeightsSq[3] = { channelWeightsSq[redChannel], channelWeightsSq[greenChannel], channelWeightsSq[blueChannel] };
-                        float rotatedAlphaWeight[1] = { channelWeights[alphaChannel] };
-                        float rotatedAlphaWeightSq[1] = { channelWeightsSq[alphaChannel] };
-
-                        float uniformWeight[1] = { 1.0f };   // Since the alpha channel is independent, there's no need to bother with weights when doing refinement or selection, only error
-
-                        MFloat preWeightedRotatedRGB[16][3];
-                        BCCommon::PreWeightPixelsLDR<3>(preWeightedRotatedRGB, rotatedRGB, rotatedRGBWeights);
-
-                        for (uint16_t indexSelector = 0; indexSelector < maxIndexSelector; indexSelector++)
-                        {
-                            EndpointSelector<3, 8> rgbSelector;
-
-                            for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
-                            {
-                                for (int px = 0; px < 16; px++)
-                                    rgbSelector.ContributePass(preWeightedRotatedRGB[px], epPass, ParallelMath::MakeFloat(1.0f));
-
-                                rgbSelector.FinishPass(epPass);
-                            }
-
-                            MUInt15 alphaRange[2];
-
-                            alphaRange[0] = alphaRange[1] = pixels[0][alphaChannel];
-                            for (int px = 1; px < 16; px++)
-                            {
-                                alphaRange[0] = ParallelMath::Min(pixels[px][alphaChannel], alphaRange[0]);
-                                alphaRange[1] = ParallelMath::Max(pixels[px][alphaChannel], alphaRange[1]);
-                            }
-
-                            int rgbPrec = 0;
-                            int alphaPrec = 0;
-
-                            if (mode == 4)
-                            {
-                                rgbPrec = indexSelector ? 3 : 2;
-                                alphaPrec = indexSelector ? 2 : 3;
-                            }
-                            else
-                                rgbPrec = alphaPrec = 2;
-
-                            UnfinishedEndpoints<3> unfinishedRGB = rgbSelector.GetEndpoints(rotatedRGBWeights);
-
-                            MFloat bestRGBError = ParallelMath::MakeFloat(FLT_MAX);
-                            MFloat bestAlphaError = ParallelMath::MakeFloat(FLT_MAX);
-
-                            MUInt15 bestRGBIndexes[16];
-                            MUInt15 bestAlphaIndexes[16];
-                            MUInt15 bestEP[2][4];
-
-                            for (int px = 0; px < 16; px++)
-                                bestRGBIndexes[px] = bestAlphaIndexes[px] = ParallelMath::MakeUInt15(0);
-
-                            for (int tweak = 0; tweak < numTweakRounds; tweak++)
-                            {
-                                MUInt15 rgbEP[2][3];
-                                MUInt15 alphaEP[2];
-
-                                unfinishedRGB.FinishLDR(tweak, 1 << rgbPrec, rgbEP[0], rgbEP[1]);
-
-                                TweakAlpha(alphaRange, tweak, 1 << alphaPrec, alphaEP);
-
-                                for (int refine = 0; refine < numRefineRounds; refine++)
-                                {
-                                    if (mode == 4)
-                                        CompressEndpoints4(rgbEP, alphaEP, rtn);
-                                    else
-                                        CompressEndpoints5(rgbEP, alphaEP, rtn);
-
-
-                                    IndexSelector<1> alphaIndexSelector;
-                                    IndexSelector<3> rgbIndexSelector;
-
-                                    {
-                                        MUInt15 alphaEPTemp[2][1] = { { alphaEP[0] },{ alphaEP[1] } };
-                                        alphaIndexSelector.Init<false>(uniformWeight, alphaEPTemp, 1 << alphaPrec);
-                                    }
-                                    rgbIndexSelector.Init<false>(rotatedRGBWeights, rgbEP, 1 << rgbPrec);
-
-                                    EndpointRefiner<3> rgbRefiner;
-                                    EndpointRefiner<1> alphaRefiner;
-
-                                    rgbRefiner.Init(1 << rgbPrec, rotatedRGBWeights);
-                                    alphaRefiner.Init(1 << alphaPrec, uniformWeight);
-
-                                    MFloat errorRGB = ParallelMath::MakeFloatZero();
-                                    MFloat errorA = ParallelMath::MakeFloatZero();
-
-                                    MUInt15 rgbIndexes[16];
-                                    MUInt15 alphaIndexes[16];
-
-                                    AggregatedError<3> rgbAggError;
-                                    AggregatedError<1> alphaAggError;
-
-                                    for (int px = 0; px < 16; px++)
-                                    {
-                                        MUInt15 rgbIndex = rgbIndexSelector.SelectIndexLDR(floatRotatedRGB[px], rtn);
-                                        MUInt15 alphaIndex = alphaIndexSelector.SelectIndexLDR(floatPixels[px] + alphaChannel, rtn);
-
-                                        MUInt15 reconstructedRGB[3];
-                                        MUInt15 reconstructedAlpha[1];
-
-                                        rgbIndexSelector.ReconstructLDR_BC7(rgbIndex, reconstructedRGB);
-                                        alphaIndexSelector.ReconstructLDR_BC7(alphaIndex, reconstructedAlpha);
-
-                                        if (flags & cvtt::Flags::BC7_FastIndexing)
-                                        {
-                                            BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], rgbAggError);
-                                            BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, alphaAggError);
-                                        }
-                                        else
-                                        {
-                                            AggregatedError<3> baseRGBAggError;
-                                            AggregatedError<1> baseAlphaAggError;
-
-                                            BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], baseRGBAggError);
-                                            BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, baseAlphaAggError);
-
-                                            MFloat rgbError = baseRGBAggError.Finalize(flags, rotatedRGBWeightsSq);
-                                            MFloat alphaError = baseAlphaAggError.Finalize(flags, rotatedAlphaWeightSq);
-
-                                            MUInt15 altRGBIndexes[2];
-                                            MUInt15 altAlphaIndexes[2];
-
-                                            altRGBIndexes[0] = ParallelMath::Max(rgbIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
-                                            altRGBIndexes[1] = ParallelMath::Min(rgbIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << rgbPrec) - 1)));
-
-                                            altAlphaIndexes[0] = ParallelMath::Max(alphaIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
-                                            altAlphaIndexes[1] = ParallelMath::Min(alphaIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << alphaPrec) - 1)));
-
-                                            for (int ii = 0; ii < 2; ii++)
-                                            {
-                                                rgbIndexSelector.ReconstructLDR_BC7(altRGBIndexes[ii], reconstructedRGB);
-                                                alphaIndexSelector.ReconstructLDR_BC7(altAlphaIndexes[ii], reconstructedAlpha);
-
-                                                AggregatedError<3> altRGBAggError;
-                                                AggregatedError<1> altAlphaAggError;
-
-                                                BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], altRGBAggError);
-                                                BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, altAlphaAggError);
-
-                                                MFloat altRGBError = altRGBAggError.Finalize(flags, rotatedRGBWeightsSq);
-                                                MFloat altAlphaError = altAlphaAggError.Finalize(flags, rotatedAlphaWeightSq);
-
-                                                ParallelMath::Int16CompFlag rgbBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altRGBError, rgbError));
-                                                ParallelMath::Int16CompFlag alphaBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altAlphaError, alphaError));
-
-                                                rgbError = ParallelMath::Min(altRGBError, rgbError);
-                                                alphaError = ParallelMath::Min(altAlphaError, alphaError);
-
-                                                ParallelMath::ConditionalSet(rgbIndex, rgbBetter, altRGBIndexes[ii]);
-                                                ParallelMath::ConditionalSet(alphaIndex, alphaBetter, altAlphaIndexes[ii]);
-                                            }
-
-                                            errorRGB = errorRGB + rgbError;
-                                            errorA = errorA + alphaError;
-                                        }
-
-                                        if (refine != numRefineRounds - 1)
-                                        {
-                                            rgbRefiner.ContributeUnweightedPW(preWeightedRotatedRGB[px], rgbIndex);
-                                            alphaRefiner.ContributeUnweightedPW(floatPixels[px] + alphaChannel, alphaIndex);
-                                        }
-
-                                        if (flags & Flags::BC7_FastIndexing)
-                                        {
-                                            errorRGB = rgbAggError.Finalize(flags, rotatedRGBWeightsSq);
-                                            errorA = rgbAggError.Finalize(flags, rotatedAlphaWeightSq);
-                                        }
-
-                                        rgbIndexes[px] = rgbIndex;
-                                        alphaIndexes[px] = alphaIndex;
-                                    }
-
-                                    ParallelMath::FloatCompFlag rgbBetter = ParallelMath::Less(errorRGB, bestRGBError);
-                                    ParallelMath::FloatCompFlag alphaBetter = ParallelMath::Less(errorA, bestAlphaError);
-
-                                    ParallelMath::Int16CompFlag rgbBetterInt16 = ParallelMath::FloatFlagToInt16(rgbBetter);
-                                    ParallelMath::Int16CompFlag alphaBetterInt16 = ParallelMath::FloatFlagToInt16(alphaBetter);
-
-                                    if (ParallelMath::AnySet(rgbBetterInt16))
-                                    {
-                                        bestRGBError = ParallelMath::Min(errorRGB, bestRGBError);
-
-                                        for (int px = 0; px < 16; px++)
-                                            ParallelMath::ConditionalSet(bestRGBIndexes[px], rgbBetterInt16, rgbIndexes[px]);
-
-                                        for (int ep = 0; ep < 2; ep++)
-                                        {
-                                            for (int ch = 0; ch < 3; ch++)
-                                                ParallelMath::ConditionalSet(bestEP[ep][ch], rgbBetterInt16, rgbEP[ep][ch]);
-                                        }
-                                    }
-
-                                    if (ParallelMath::AnySet(alphaBetterInt16))
-                                    {
-                                        bestAlphaError = ParallelMath::Min(errorA, bestAlphaError);
-
-                                        for (int px = 0; px < 16; px++)
-                                            ParallelMath::ConditionalSet(bestAlphaIndexes[px], alphaBetterInt16, alphaIndexes[px]);
-
-                                        for (int ep = 0; ep < 2; ep++)
-                                            ParallelMath::ConditionalSet(bestEP[ep][3], alphaBetterInt16, alphaEP[ep]);
-                                    }
-
-                                    if (refine != numRefineRounds - 1)
-                                    {
-                                        rgbRefiner.GetRefinedEndpointsLDR(rgbEP, rtn);
-
-                                        MUInt15 alphaEPTemp[2][1];
-                                        alphaRefiner.GetRefinedEndpointsLDR(alphaEPTemp, rtn);
-
-                                        for (int i = 0; i < 2; i++)
-                                            alphaEP[i] = alphaEPTemp[i][0];
-                                    }
-                                }	// refine
-                            } // tweak
-
-                            MFloat combinedError = bestRGBError + bestAlphaError;
-
-                            ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, work.m_error);
-                            ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
-
-                            work.m_error = ParallelMath::Min(combinedError, work.m_error);
-
-                            ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode));
-                            ParallelMath::ConditionalSet(work.m_u.m_isr.m_rotation, errorBetter16, ParallelMath::MakeUInt15(rotation));
-                            ParallelMath::ConditionalSet(work.m_u.m_isr.m_indexSelector, errorBetter16, ParallelMath::MakeUInt15(indexSelector));
-
-                            for (int px = 0; px < 16; px++)
-                            {
-                                ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, indexSelector ? bestAlphaIndexes[px] : bestRGBIndexes[px]);
-                                ParallelMath::ConditionalSet(work.m_indexes2[px], errorBetter16, indexSelector ? bestRGBIndexes[px] : bestAlphaIndexes[px]);
-                            }
-
-                            for (int ep = 0; ep < 2; ep++)
-                                for (int ch = 0; ch < 4; ch++)
-                                    ParallelMath::ConditionalSet(work.m_ep[0][ep][ch], errorBetter16, bestEP[ep][ch]);
-                        }
-                    }
-                }
-            }
-
-            template<class T>
-            static void Swap(T& a, T& b)
-            {
-                T temp = a;
-                a = b;
-                b = temp;
-            }
-
-            static void Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], int numTweakRounds, int numRefineRounds)
-            {
-                MUInt15 pixels[16][4];
-                MFloat floatPixels[16][4];
-
-                for (int px = 0; px < 16; px++)
-                {
-                    for (int ch = 0; ch < 4; ch++)
-                        ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]);
-                }
-
-                for (int px = 0; px < 16; px++)
-                {
-                    for (int ch = 0; ch < 4; ch++)
-                        floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]);
-                }
-
-                WorkInfo work;
-                memset(&work, 0, sizeof(work));
-
-                work.m_error = ParallelMath::MakeFloat(FLT_MAX);
-
-                {
-                    ParallelMath::RoundTowardNearestForScope rtn;
-                    TrySinglePlane(flags, pixels, floatPixels, channelWeights, numTweakRounds, numRefineRounds, work, &rtn);
-                    TryDualPlane(flags, pixels, floatPixels, channelWeights, numTweakRounds, numRefineRounds, work, &rtn);
-                }
-
-                for (int block = 0; block < ParallelMath::ParallelSize; block++)
-                {
-                    PackingVector pv;
-                    pv.Init();
-
-                    ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(work.m_mode, block);
-                    ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(work.m_u.m_partition, block);
-                    ParallelMath::ScalarUInt16 indexSelector = ParallelMath::Extract(work.m_u.m_isr.m_indexSelector, block);
-
-                    const BC7Data::BC7ModeInfo& modeInfo = BC7Data::g_modes[mode];
-
-                    ParallelMath::ScalarUInt16 indexes[16];
-                    ParallelMath::ScalarUInt16 indexes2[16];
-                    ParallelMath::ScalarUInt16 endPoints[3][2][4];
-
-                    for (int i = 0; i < 16; i++)
-                    {
-                        indexes[i] = ParallelMath::Extract(work.m_indexes[i], block);
-                        if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
-                            indexes2[i] = ParallelMath::Extract(work.m_indexes2[i], block);
-                    }
-
-                    for (int subset = 0; subset < 3; subset++)
-                    {
-                        for (int ep = 0; ep < 2; ep++)
-                        {
-                            for (int ch = 0; ch < 4; ch++)
-                                endPoints[subset][ep][ch] = ParallelMath::Extract(work.m_ep[subset][ep][ch], block);
-                        }
-                    }
-
-                    int fixups[3] = { 0, 0, 0 };
-
-                    if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
-                    {
-                        bool flipRGB = ((indexes[0] & (1 << (modeInfo.m_indexBits - 1))) != 0);
-                        bool flipAlpha = ((indexes2[0] & (1 << (modeInfo.m_alphaIndexBits - 1))) != 0);
-
-                        if (flipRGB)
-                        {
-                            uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1;
-                            for (int px = 0; px < 16; px++)
-                                indexes[px] = highIndex - indexes[px];
-                        }
-
-                        if (flipAlpha)
-                        {
-                            uint16_t highIndex = (1 << modeInfo.m_alphaIndexBits) - 1;
-                            for (int px = 0; px < 16; px++)
-                                indexes2[px] = highIndex - indexes2[px];
-                        }
-
-                        if (indexSelector)
-                            Swap(flipRGB, flipAlpha);
-
-                        if (flipRGB)
-                        {
-                            for (int ch = 0; ch < 3; ch++)
-                                Swap(endPoints[0][0][ch], endPoints[0][1][ch]);
-                        }
-                        if (flipAlpha)
-                            Swap(endPoints[0][0][3], endPoints[0][1][3]);
-
-                    }
-                    else
-                    {
-                        if (modeInfo.m_numSubsets == 2)
-                            fixups[1] = BC7Data::g_fixupIndexes2[partition];
-                        else if (modeInfo.m_numSubsets == 3)
-                        {
-                            fixups[1] = BC7Data::g_fixupIndexes3[partition][0];
-                            fixups[2] = BC7Data::g_fixupIndexes3[partition][1];
-                        }
-
-                        bool flip[3] = { false, false, false };
-                        for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                            flip[subset] = ((indexes[fixups[subset]] & (1 << (modeInfo.m_indexBits - 1))) != 0);
-
-                        if (flip[0] || flip[1] || flip[2])
-                        {
-                            uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1;
-                            for (int px = 0; px < 16; px++)
-                            {
-                                int subset = 0;
-                                if (modeInfo.m_numSubsets == 2)
-                                    subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
-                                else if (modeInfo.m_numSubsets == 3)
-                                    subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3;
-
-                                if (flip[subset])
-                                    indexes[px] = highIndex - indexes[px];
-                            }
-
-                            int maxCH = (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) ? 4 : 3;
-                            for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                            {
-                                if (flip[subset])
-                                    for (int ch = 0; ch < maxCH; ch++)
-                                        Swap(endPoints[subset][0][ch], endPoints[subset][1][ch]);
-                            }
-                        }
-                    }
-
-                    pv.Pack(static_cast<uint8_t>(1 << mode), mode + 1);
-
-                    if (modeInfo.m_partitionBits)
-                        pv.Pack(partition, modeInfo.m_partitionBits);
-
-                    if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
-                    {
-                        ParallelMath::ScalarUInt16 rotation = ParallelMath::Extract(work.m_u.m_isr.m_rotation, block);
-                        pv.Pack(rotation, 2);
-                    }
-
-                    if (modeInfo.m_hasIndexSelector)
-                        pv.Pack(indexSelector, 1);
-
-                    // Encode RGB
-                    for (int ch = 0; ch < 3; ch++)
-                    {
-                        for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                        {
-                            for (int ep = 0; ep < 2; ep++)
-                            {
-                                ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][ch];
-                                epPart >>= (8 - modeInfo.m_rgbBits);
-
-                                pv.Pack(epPart, modeInfo.m_rgbBits);
-                            }
-                        }
-                    }
-
-                    // Encode alpha
-                    if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
-                    {
-                        for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                        {
-                            for (int ep = 0; ep < 2; ep++)
-                            {
-                                ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][3];
-                                epPart >>= (8 - modeInfo.m_alphaBits);
-
-                                pv.Pack(epPart, modeInfo.m_alphaBits);
-                            }
-                        }
-                    }
-
-                    // Encode parity bits
-                    if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset)
-                    {
-                        for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                        {
-                            ParallelMath::ScalarUInt16 epPart = endPoints[subset][0][0];
-                            epPart >>= (7 - modeInfo.m_rgbBits);
-                            epPart &= 1;
-
-                            pv.Pack(epPart, 1);
-                        }
-                    }
-                    else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint)
-                    {
-                        for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                        {
-                            for (int ep = 0; ep < 2; ep++)
-                            {
-                                ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][0];
-                                epPart >>= (7 - modeInfo.m_rgbBits);
-                                epPart &= 1;
-
-                                pv.Pack(epPart, 1);
-                            }
-                        }
-                    }
-
-                    // Encode indexes
-                    for (int px = 0; px < 16; px++)
-                    {
-                        int bits = modeInfo.m_indexBits;
-                        if ((px == 0) || (px == fixups[1]) || (px == fixups[2]))
-                            bits--;
-
-                        pv.Pack(indexes[px], bits);
-                    }
-
-                    // Encode secondary indexes
-                    if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
-                    {
-                        for (int px = 0; px < 16; px++)
-                        {
-                            int bits = modeInfo.m_alphaIndexBits;
-                            if (px == 0)
-                                bits--;
-
-                            pv.Pack(indexes2[px], bits);
-                        }
-                    }
-
-                    pv.Flush(packedBlocks);
-
-                    packedBlocks += 16;
-                }
-            }
-
-            static void UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock)
-            {
-                UnpackingVector pv;
-                pv.Init(packedBlock);
-
-                int mode = 8;
-                for (int i = 0; i < 8; i++)
-                {
-                    if (pv.Unpack(1) == 1)
-                    {
-                        mode = i;
-                        break;
-                    }
-                }
-
-                if (mode > 7)
-                {
-                    for (int px = 0; px < 16; px++)
-                        for (int ch = 0; ch < 4; ch++)
-                            output.m_pixels[px][ch] = 0;
-
-                    return;
-                }
-
-                const BC7Data::BC7ModeInfo &modeInfo = BC7Data::g_modes[mode];
-
-                int partition = 0;
-                if (modeInfo.m_partitionBits)
-                    partition = pv.Unpack(modeInfo.m_partitionBits);
-
-                int rotation = 0;
-                if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
-                    rotation = pv.Unpack(2);
-
-                int indexSelector = 0;
-                if (modeInfo.m_hasIndexSelector)
-                    indexSelector = pv.Unpack(1);
-
-                // Resolve fixups
-                int fixups[3] = { 0, 0, 0 };
-
-                if (modeInfo.m_alphaMode != BC7Data::AlphaMode_Separate)
-                {
-                    if (modeInfo.m_numSubsets == 2)
-                        fixups[1] = BC7Data::g_fixupIndexes2[partition];
-                    else if (modeInfo.m_numSubsets == 3)
-                    {
-                        fixups[1] = BC7Data::g_fixupIndexes3[partition][0];
-                        fixups[2] = BC7Data::g_fixupIndexes3[partition][1];
-                    }
-                }
-
-                int endPoints[3][2][4];
-
-                // Decode RGB
-                for (int ch = 0; ch < 3; ch++)
-                {
-                    for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                    {
-                        for (int ep = 0; ep < 2; ep++)
-                            endPoints[subset][ep][ch] = (pv.Unpack(modeInfo.m_rgbBits) << (8 - modeInfo.m_rgbBits));
-                    }
-                }
-
-                // Decode alpha
-                if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
-                {
-                    for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                    {
-                        for (int ep = 0; ep < 2; ep++)
-                            endPoints[subset][ep][3] = (pv.Unpack(modeInfo.m_alphaBits) << (8 - modeInfo.m_alphaBits));
-                    }
-                }
-                else
-                {
-                    for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                    {
-                        for (int ep = 0; ep < 2; ep++)
-                            endPoints[subset][ep][3] = 255;
-                    }
-                }
-
-                int parityBits = 0;
-
-                // Decode parity bits
-                if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset)
-                {
-                    for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                    {
-                        int p = pv.Unpack(1);
-
-                        for (int ep = 0; ep < 2; ep++)
-                        {
-                            for (int ch = 0; ch < 3; ch++)
-                                endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits);
-
-                            if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
-                                endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits);
-                        }
-                    }
-
-                    parityBits = 1;
-                }
-                else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint)
-                {
-                    for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                    {
-                        for (int ep = 0; ep < 2; ep++)
-                        {
-                            int p = pv.Unpack(1);
-
-                            for (int ch = 0; ch < 3; ch++)
-                                endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits);
-
-                            if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
-                                endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits);
-                        }
-                    }
-
-                    parityBits = 1;
-                }
-
-                // Fill endpoint bits
-                for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
-                {
-                    for (int ep = 0; ep < 2; ep++)
-                    {
-                        for (int ch = 0; ch < 3; ch++)
-                            endPoints[subset][ep][ch] |= (endPoints[subset][ep][ch] >> (modeInfo.m_rgbBits + parityBits));
-
-                        if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
-                            endPoints[subset][ep][3] |= (endPoints[subset][ep][3] >> (modeInfo.m_alphaBits + parityBits));
-                    }
-                }
-
-                int indexes[16];
-                int indexes2[16];
-
-                // Decode indexes
-                for (int px = 0; px < 16; px++)
-                {
-                    int bits = modeInfo.m_indexBits;
-                    if ((px == 0) || (px == fixups[1]) || (px == fixups[2]))
-                        bits--;
-
-                    indexes[px] = pv.Unpack(bits);
-                }
-
-                // Decode secondary indexes
-                if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
-                {
-                    for (int px = 0; px < 16; px++)
-                    {
-                        int bits = modeInfo.m_alphaIndexBits;
-                        if (px == 0)
-                            bits--;
-
-                        indexes2[px] = pv.Unpack(bits);
-                    }
-                }
-                else
-                {
-                    for (int px = 0; px < 16; px++)
-                        indexes2[px] = 0;
-                }
-
-                const int *alphaWeights = BC7Data::g_weightTables[modeInfo.m_alphaIndexBits];
-                const int *rgbWeights = BC7Data::g_weightTables[modeInfo.m_indexBits];
-
-                // Decode each pixel
-                for (int px = 0; px < 16; px++)
-                {
-                    int rgbWeight = 0;
-                    int alphaWeight = 0;
-
-                    int rgbIndex = indexes[px];
-
-                    rgbWeight = rgbWeights[indexes[px]];
-
-                    if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined)
-                        alphaWeight = rgbWeight;
-                    else if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
-                        alphaWeight = alphaWeights[indexes2[px]];
-
-                    if (indexSelector == 1)
-                    {
-                        int temp = rgbWeight;
-                        rgbWeight = alphaWeight;
-                        alphaWeight = temp;
-                    }
-
-                    int pixel[4] = { 0, 0, 0, 255 };
-
-                    int subset = 0;
-
-                    if (modeInfo.m_numSubsets == 2)
-                        subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
-                    else if (modeInfo.m_numSubsets == 3)
-                        subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3;
-
-                    for (int ch = 0; ch < 3; ch++)
-                        pixel[ch] = ((64 - rgbWeight) * endPoints[subset][0][ch] + rgbWeight * endPoints[subset][1][ch] + 32) >> 6;
-
-                    if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
-                        pixel[3] = ((64 - alphaWeight) * endPoints[subset][0][3] + alphaWeight * endPoints[subset][1][3] + 32) >> 6;
-
-                    if (rotation != 0)
-                    {
-                        int ch = rotation - 1;
-                        int temp = pixel[ch];
-                        pixel[ch] = pixel[3];
-                        pixel[3] = temp;
-                    }
-
-                    for (int ch = 0; ch < 4; ch++)
-                        output.m_pixels[px][ch] = static_cast<uint8_t>(pixel[ch]);
-                }
-            }
-        };
-
-        class BC6HComputer
-        {
-        public:
-            typedef ParallelMath::Float MFloat;
-            typedef ParallelMath::SInt16 MSInt16;
-            typedef ParallelMath::UInt16 MUInt16;
-            typedef ParallelMath::UInt15 MUInt15;
-            typedef ParallelMath::AInt16 MAInt16;
-            typedef ParallelMath::SInt32 MSInt32;
-            typedef ParallelMath::UInt31 MUInt31;
-
-            static const int MaxTweakRounds = 4;
-            static const int MaxRefineRounds = 3;
-
-            static MSInt16 QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru)
-            {
-                assert(ParallelMath::AllSet(ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(31744))));
-                assert(ParallelMath::AllSet(ParallelMath::Less(ParallelMath::MakeSInt16(-31744), elem2CL)));
-
-                // Expand to full range
-                ParallelMath::Int16CompFlag isNegative = ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(0));
-                MUInt15 absElem = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - elem2CL, elem2CL));
-
-                absElem = ParallelMath::RightShift(ParallelMath::RoundAndConvertToU15(ParallelMath::ToFloat(absElem) * 32.0f / 31.0f, ru), 16 - precision);
-
-                MSInt16 absElemS16 = ParallelMath::LosslessCast<MSInt16>::Cast(absElem);
-
-                return ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - absElemS16, absElemS16);
-            }
-
-            static MUInt15 QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru)
-            {
-                MUInt16 expandedElem = ParallelMath::RoundAndConvertToU16(ParallelMath::Min(ParallelMath::ToFloat(elem) * 64.0f / 31.0f, ParallelMath::MakeFloat(65535.0f)), ru);
-                return ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(expandedElem, 16 - precision));
-            }
-
-            static void UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL)
-            {
-                MSInt16 zero = ParallelMath::MakeSInt16(0);
-
-                ParallelMath::Int16CompFlag negative = ParallelMath::Less(comp, zero);
-                MUInt15 absComp = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(negative, MSInt16(zero - comp), comp));
-
-                MSInt16 unq;
-                MUInt15 absUnq;
-
-                if (precision >= 16)
-                {
-                    unq = comp;
-                    absUnq = absComp;
-                }
-                else
-                {
-                    MSInt16 maxCompMinusOne = ParallelMath::MakeSInt16(static_cast<int16_t>((1 << (precision - 1)) - 2));
-                    ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero);
-                    ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp);
-
-                    absUnq = (absComp << (16 - precision)) + ParallelMath::MakeUInt15(static_cast<uint16_t>(0x4000 >> (precision - 1)));
-                    ParallelMath::ConditionalSet(absUnq, isZero, ParallelMath::MakeUInt15(0));
-                    ParallelMath::ConditionalSet(absUnq, isMax, ParallelMath::MakeUInt15(0x7fff));
-
-                    unq = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(absUnq));
-                }
-
-                outUnquantized = unq;
-
-                MUInt15 funq = ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(absUnq, ParallelMath::MakeUInt15(31)), 5));
-
-                outUnquantizedFinished2CL = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(funq));
-            }
-
-            static void UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished)
-            {
-                MUInt16 unq = ParallelMath::LosslessCast<MUInt16>::Cast(comp);
-                if (precision < 15)
-                {
-                    MUInt15 zero = ParallelMath::MakeUInt15(0);
-                    MUInt15 maxCompMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << precision) - 2));
-
-                    ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero);
-                    ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp);
-
-                    unq = (ParallelMath::LosslessCast<MUInt16>::Cast(comp) << (16 - precision)) + ParallelMath::MakeUInt16(static_cast<uint16_t>(0x8000 >> precision));
-
-                    ParallelMath::ConditionalSet(unq, isZero, ParallelMath::MakeUInt16(0));
-                    ParallelMath::ConditionalSet(unq, isMax, ParallelMath::MakeUInt16(0xffff));
-                }
-
-                outUnquantized = unq;
-                outUnquantizedFinished = ParallelMath::ToUInt16(ParallelMath::RightShift(ParallelMath::XMultiply(unq, ParallelMath::MakeUInt15(31)), 6));
-            }
-
-            static void QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn)
-            {
-                MSInt16 unquantizedEP[2][3];
-                MSInt16 finishedUnquantizedEP[2][3];
-
-                {
-                    ParallelMath::RoundUpForScope ru;
-
-                    for (int epi = 0; epi < 2; epi++)
-                    {
-                        for (int ch = 0; ch < 3; ch++)
-                        {
-                            MSInt16 qee = QuantizeSingleEndpointElementSigned(endPoints[epi][ch], precision, &ru);
-                            UnquantizeSingleEndpointElementSigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]);
-                            quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee);
-                        }
-                    }
-                }
-
-                indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange);
-                indexSelector.InitHDR(indexRange, true, fastIndexing, channelWeights);
-
-                MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1);
-
-                MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn);
-
-                ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index);
-
-                if (ParallelMath::AnySet(invert))
-                {
-                    ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index));
-
-                    indexSelector.ConditionalInvert(invert);
-
-                    for (int ch = 0; ch < 3; ch++)
-                    {
-                        MAInt16 firstEP = quantizedEndPoints[0][ch];
-                        MAInt16 secondEP = quantizedEndPoints[1][ch];
-
-                        quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP);
-                        quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP);
-                    }
-                }
-
-                indexes[fixupIndex] = index;
-            }
-
-            static void QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn)
-            {
-                MUInt16 unquantizedEP[2][3];
-                MUInt16 finishedUnquantizedEP[2][3];
-
-                {
-                    ParallelMath::RoundUpForScope ru;
-
-                    for (int epi = 0; epi < 2; epi++)
-                    {
-                        for (int ch = 0; ch < 3; ch++)
-                        {
-                            MUInt15 qee = QuantizeSingleEndpointElementUnsigned(ParallelMath::LosslessCast<MUInt15>::Cast(endPoints[epi][ch]), precision, &ru);
-                            UnquantizeSingleEndpointElementUnsigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]);
-                            quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee);
-                        }
-                    }
-                }
-
-                indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange);
-                indexSelector.InitHDR(indexRange, false, fastIndexing, channelWeights);
-
-                MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1);
-
-                MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn);
-
-                ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index);
-
-                if (ParallelMath::AnySet(invert))
-                {
-                    ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index));
-
-                    indexSelector.ConditionalInvert(invert);
-
-                    for (int ch = 0; ch < 3; ch++)
-                    {
-                        MAInt16 firstEP = quantizedEndPoints[0][ch];
-                        MAInt16 secondEP = quantizedEndPoints[1][ch];
-
-                        quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP);
-                        quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP);
-                    }
-                }
-
-                indexes[fixupIndex] = index;
-            }
-
-            static void EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal)
-            {
-                ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true);
-
-                MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1));
-
-                for (int ch = 0; ch < 3; ch++)
-                {
-                    outEncodedEPs[0][0][ch] = ep0[0][ch];
-                    outEncodedEPs[0][1][ch] = ep0[1][ch];
-                    outEncodedEPs[1][0][ch] = ep1[0][ch];
-                    outEncodedEPs[1][1][ch] = ep1[1][ch];
-
-                    if (isTransformed)
-                    {
-                        for (int subset = 0; subset < 2; subset++)
-                        {
-                            for (int epi = 0; epi < 2; epi++)
-                            {
-                                if (epi == 0 && subset == 0)
-                                    continue;
-
-                                MAInt16 bReduced = (outEncodedEPs[subset][epi][ch] & aSignificantMask);
-
-                                MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch])), bPrec[ch]);
-
-                                outEncodedEPs[subset][epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta);
-
-                                MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch]) & aSignificantMask);
-                                allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced);
-                            }
-                        }
-                    }
-
-                    if (!ParallelMath::AnySet(allLegal))
-                        break;
-                }
-
-                outIsLegal = allLegal;
-            }
-
-            static void EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal)
-            {
-                ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true);
-
-                MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1));
-
-                for (int ch = 0; ch < 3; ch++)
-                {
-                    outEncodedEPs[0][ch] = ep[0][ch];
-                    outEncodedEPs[1][ch] = ep[1][ch];
-
-                    if (isTransformed)
-                    {
-                        MAInt16 bReduced = (outEncodedEPs[1][ch] & aSignificantMask);
-
-                        MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[1][ch], outEncodedEPs[0][ch])), bPrec[ch]);
-
-                        outEncodedEPs[1][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta);
-
-                        MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[1][ch], outEncodedEPs[0][ch]) & aSignificantMask);
-                        allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced);
-                    }
-                }
-
-                outIsLegal = allLegal;
-            }
-
-            static void Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds)
-            {
-                if (numTweakRounds < 1)
-                    numTweakRounds = 1;
-                else if (numTweakRounds > MaxTweakRounds)
-                    numTweakRounds = MaxTweakRounds;
-
-                if (numRefineRounds < 1)
-                    numRefineRounds = 1;
-                else if (numRefineRounds > MaxRefineRounds)
-                    numRefineRounds = MaxRefineRounds;
-
-                bool fastIndexing = ((flags & cvtt::Flags::BC6H_FastIndexing) != 0);
-                float channelWeightsSq[3];
-
-                ParallelMath::RoundTowardNearestForScope rtn;
-
-                MSInt16 pixels[16][3];
-                MFloat floatPixels2CL[16][3];
-                MFloat floatPixelsLinearWeighted[16][3];
-
-                MSInt16 low15Bits = ParallelMath::MakeSInt16(32767);
-
-                for (int ch = 0; ch < 3; ch++)
-                    channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
-
-                for (int px = 0; px < 16; px++)
-                {
-                    for (int ch = 0; ch < 3; ch++)
-                    {
-                        MSInt16 pixelValue;
-                        ParallelMath::ConvertHDRInputs(inputs, px, ch, pixelValue);
-
-                        // Convert from sign+magnitude to 2CL
-                        if (isSigned)
-                        {
-                            ParallelMath::Int16CompFlag negative = ParallelMath::Less(pixelValue, ParallelMath::MakeSInt16(0));
-                            MSInt16 magnitude = (pixelValue & low15Bits);
-                            ParallelMath::ConditionalSet(pixelValue, negative, ParallelMath::MakeSInt16(0) - magnitude);
-                            pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(-31743));
-                        }
-                        else
-                            pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(0));
-
-                        pixelValue = ParallelMath::Min(pixelValue, ParallelMath::MakeSInt16(31743));
-
-                        pixels[px][ch] = pixelValue;
-                        floatPixels2CL[px][ch] = ParallelMath::ToFloat(pixelValue);
-                        floatPixelsLinearWeighted[px][ch] = ParallelMath::TwosCLHalfToFloat(pixelValue) * channelWeights[ch];
-                    }
-                }
-
-                MFloat preWeightedPixels[16][3];
-
-                BCCommon::PreWeightPixelsHDR<3>(preWeightedPixels, pixels, channelWeights);
-
-                MAInt16 bestEndPoints[2][2][3];
-                MUInt15 bestIndexes[16];
-                MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
-                MUInt15 bestMode = ParallelMath::MakeUInt15(0);
-                MUInt15 bestPartition = ParallelMath::MakeUInt15(0);
-
-                for (int px = 0; px < 16; px++)
-                    bestIndexes[px] = ParallelMath::MakeUInt15(0);
-
-                for (int subset = 0; subset < 2; subset++)
-                    for (int epi = 0; epi < 2; epi++)
-                        for (int ch = 0; ch < 3; ch++)
-                            bestEndPoints[subset][epi][ch] = ParallelMath::MakeAInt16(0);
-
-                UnfinishedEndpoints<3> partitionedUFEP[32][2];
-                UnfinishedEndpoints<3> singleUFEP;
-
-                // Generate UFEP for partitions
-                for (int p = 0; p < 32; p++)
-                {
-                    int partitionMask = BC7Data::g_partitionMap[p];
-
-                    EndpointSelector<3, 8> epSelectors[2];
-
-                    for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)
-                    {
-                        for (int px = 0; px < 16; px++)
-                        {
-                            int subset = (partitionMask >> px) & 1;
-                            epSelectors[subset].ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f));
-                        }
-
-                        for (int subset = 0; subset < 2; subset++)
-                            epSelectors[subset].FinishPass(pass);
-                    }
-
-                    for (int subset = 0; subset < 2; subset++)
-                        partitionedUFEP[p][subset] = epSelectors[subset].GetEndpoints(channelWeights);
-                }
-
-                // Generate UFEP for single
-                {
-                    EndpointSelector<3, 8> epSelector;
-
-                    for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)
-                    {
-                        for (int px = 0; px < 16; px++)
-                            epSelector.ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f));
-
-                        epSelector.FinishPass(pass);
-                    }
-
-                    singleUFEP = epSelector.GetEndpoints(channelWeights);
-                }
-
-                for (int partitionedInt = 0; partitionedInt < 2; partitionedInt++)
-                {
-                    bool partitioned = (partitionedInt == 1);
-
-                    for (int aPrec = BC7Data::g_maxHDRPrecision; aPrec >= 0; aPrec--)
-                    {
-                        if (!BC7Data::g_hdrModesExistForPrecision[partitionedInt][aPrec])
-                            continue;
-
-                        int numPartitions = partitioned ? 32 : 1;
-                        int numSubsets = partitioned ? 2 : 1;
-                        int indexBits = partitioned ? 3 : 4;
-                        int indexRange = (1 << indexBits);
-
-                        for (int p = 0; p < numPartitions; p++)
-                        {
-                            int partitionMask = partitioned ? BC7Data::g_partitionMap[p] : 0;
-
-                            const int MaxMetaRounds = MaxTweakRounds * MaxRefineRounds;
-
-                            MAInt16 metaEndPointsQuantized[MaxMetaRounds][2][2][3];
-                            MUInt15 metaIndexes[MaxMetaRounds][16];
-                            MFloat metaError[MaxMetaRounds][2];
-
-                            bool roundValid[MaxMetaRounds][2];
-
-                            for (int r = 0; r < MaxMetaRounds; r++)
-                                for (int subset = 0; subset < 2; subset++)
-                                    roundValid[r][subset] = true;
-
-                            for (int subset = 0; subset < numSubsets; subset++)
-                            {
-                                for (int tweak = 0; tweak < MaxTweakRounds; tweak++)
-                                {
-                                    EndpointRefiner<3> refiners[2];
-
-                                    bool abortRemainingRefines = false;
-                                    for (int refinePass = 0; refinePass < MaxRefineRounds; refinePass++)
-                                    {
-                                        int metaRound = tweak * MaxRefineRounds + refinePass;
-
-                                        if (tweak >= numTweakRounds || refinePass >= numRefineRounds)
-                                            abortRemainingRefines = true;
-
-                                        if (abortRemainingRefines)
-                                        {
-                                            roundValid[metaRound][subset] = false;
-                                            continue;
-                                        }
-
-                                        MAInt16(&mrQuantizedEndPoints)[2][2][3] = metaEndPointsQuantized[metaRound];
-                                        MUInt15(&mrIndexes)[16] = metaIndexes[metaRound];
-
-                                        MSInt16 endPointsColorSpace[2][3];
-
-                                        if (refinePass == 0)
-                                        {
-                                            UnfinishedEndpoints<3> ufep = partitioned ? partitionedUFEP[p][subset] : singleUFEP;
-
-                                            if (isSigned)
-                                                ufep.FinishHDRSigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn);
-                                            else
-                                                ufep.FinishHDRUnsigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn);
-                                        }
-                                        else
-                                            refiners[subset].GetRefinedEndpointsHDR(endPointsColorSpace, isSigned, &rtn);
-
-                                        refiners[subset].Init(indexRange, channelWeights);
-
-                                        int fixupIndex = (subset == 0) ? 0 : BC7Data::g_fixupIndexes2[p];
-
-                                        IndexSelectorHDR<3> indexSelector;
-                                        if (isSigned)
-                                            QuantizeEndpointsSigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn);
-                                        else
-                                            QuantizeEndpointsUnsigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn);
-
-                                        if (metaRound > 0)
-                                        {
-                                            ParallelMath::Int16CompFlag anySame = ParallelMath::MakeBoolInt16(false);
-
-                                            for (int prevRound = 0; prevRound < metaRound; prevRound++)
-                                            {
-                                                MAInt16(&prevRoundEPs)[2][3] = metaEndPointsQuantized[prevRound][subset];
-
-                                                ParallelMath::Int16CompFlag same = ParallelMath::MakeBoolInt16(true);
-
-                                                for (int epi = 0; epi < 2; epi++)
-                                                    for (int ch = 0; ch < 3; ch++)
-                                                        same = (same & ParallelMath::Equal(prevRoundEPs[epi][ch], mrQuantizedEndPoints[subset][epi][ch]));
-
-                                                anySame = (anySame | same);
-                                                if (ParallelMath::AllSet(anySame))
-                                                    break;
-                                            }
-
-                                            if (ParallelMath::AllSet(anySame))
-                                            {
-                                                roundValid[metaRound][subset] = false;
-                                                continue;
-                                            }
-                                        }
-
-                                        MFloat subsetError = ParallelMath::MakeFloatZero();
-
-                                        {
-                                            for (int px = 0; px < 16; px++)
-                                            {
-                                                if (subset != ((partitionMask >> px) & 1))
-                                                    continue;
-
-                                                MUInt15 index;
-                                                if (px == fixupIndex)
-                                                    index = mrIndexes[px];
-                                                else
-                                                {
-                                                    index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixels2CL[px], &rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[px], &rtn);
-                                                    mrIndexes[px] = index;
-                                                }
-
-                                                MSInt16 reconstructed[3];
-                                                if (isSigned)
-                                                    indexSelector.ReconstructHDRSigned(mrIndexes[px], reconstructed);
-                                                else
-                                                    indexSelector.ReconstructHDRUnsigned(mrIndexes[px], reconstructed);
-
-                                                subsetError = subsetError + (fastIndexing ? BCCommon::ComputeErrorHDRFast<3>(flags, reconstructed, pixels[px], channelWeightsSq) : BCCommon::ComputeErrorHDRSlow<3>(flags, reconstructed, pixels[px], channelWeightsSq));
-
-                                                if (refinePass != numRefineRounds - 1)
-                                                    refiners[subset].ContributeUnweightedPW(preWeightedPixels[px], index);
-                                            }
-                                        }
-
-                                        metaError[metaRound][subset] = subsetError;
-                                    }
-                                }
-                            }
-
-                            // Now we have a bunch of attempts, but not all of them will fit in the delta coding scheme
-                            int numMeta1 = partitioned ? MaxMetaRounds : 1;
-                            for (int meta0 = 0; meta0 < MaxMetaRounds; meta0++)
-                            {
-                                if (!roundValid[meta0][0])
-                                    continue;
-
-                                for (int meta1 = 0; meta1 < numMeta1; meta1++)
-                                {
-                                    MFloat combinedError = metaError[meta0][0];
-                                    if (partitioned)
-                                    {
-                                        if (!roundValid[meta1][1])
-                                            continue;
-
-                                        combinedError = combinedError + metaError[meta1][1];
-                                    }
-
-                                    ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, bestError);
-                                    if (!ParallelMath::AnySet(errorBetter))
-                                        continue;
-
-                                    ParallelMath::Int16CompFlag needsCommit = ParallelMath::FloatFlagToInt16(errorBetter);
-
-                                    // Figure out if this is encodable
-                                    for (int mode = 0; mode < BC7Data::g_numHDRModes; mode++)
-                                    {
-                                        const BC7Data::BC6HModeInfo &modeInfo = BC7Data::g_hdrModes[mode];
-
-                                        if (modeInfo.m_partitioned != partitioned || modeInfo.m_aPrec != aPrec)
-                                            continue;
-
-                                        MAInt16 encodedEPs[2][2][3];
-                                        ParallelMath::Int16CompFlag isLegal;
-                                        if (partitioned)
-                                            EvaluatePartitionedLegality(metaEndPointsQuantized[meta0][0], metaEndPointsQuantized[meta1][1], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs, isLegal);
-                                        else
-                                            EvaluateSingleLegality(metaEndPointsQuantized[meta0][0], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs[0], isLegal);
-
-                                        ParallelMath::Int16CompFlag isLegalAndBetter = (ParallelMath::FloatFlagToInt16(errorBetter) & isLegal);
-                                        if (!ParallelMath::AnySet(isLegalAndBetter))
-                                            continue;
-
-                                        ParallelMath::FloatCompFlag isLegalAndBetterFloat = ParallelMath::Int16FlagToFloat(isLegalAndBetter);
-
-                                        ParallelMath::ConditionalSet(bestError, isLegalAndBetterFloat, combinedError);
-                                        ParallelMath::ConditionalSet(bestMode, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(mode)));
-                                        ParallelMath::ConditionalSet(bestPartition, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(p)));
-
-                                        for (int subset = 0; subset < numSubsets; subset++)
-                                        {
-                                            for (int epi = 0; epi < 2; epi++)
-                                            {
-                                                for (int ch = 0; ch < 3; ch++)
-                                                    ParallelMath::ConditionalSet(bestEndPoints[subset][epi][ch], isLegalAndBetter, encodedEPs[subset][epi][ch]);
-                                            }
-                                        }
-
-                                        for (int px = 0; px < 16; px++)
-                                        {
-                                            int subset = ((partitionMask >> px) & 1);
-                                            if (subset == 0)
-                                                ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta0][px]);
-                                            else
-                                                ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta1][px]);
-                                        }
-
-                                        needsCommit = ParallelMath::AndNot(needsCommit, isLegalAndBetter);
-                                        if (!ParallelMath::AnySet(needsCommit))
-                                            break;
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-
-                // At this point, everything should be set
-                for (int block = 0; block < ParallelMath::ParallelSize; block++)
-                {
-                    ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(bestMode, block);
-                    ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(bestPartition, block);
-                    int32_t eps[2][2][3];
-                    ParallelMath::ScalarUInt16 indexes[16];
-
-                    const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode];
-
-                    const BC6HData::ModeDescriptor* desc = BC6HData::g_modeDescriptors[mode];
-
-                    const size_t headerBits = modeInfo.m_partitioned ? 82 : 65;
-
-                    for (int subset = 0; subset < 2; subset++)
-                    {
-                        for (int epi = 0; epi < 2; epi++)
-                        {
-                            for (int ch = 0; ch < 3; ch++)
-                                eps[subset][epi][ch] = ParallelMath::Extract(bestEndPoints[subset][epi][ch], block);
-                        }
-                    }
-
-                    for (int px = 0; px < 16; px++)
-                        indexes[px] = ParallelMath::Extract(bestIndexes[px], block);
-
-                    uint16_t modeID = modeInfo.m_modeID;
-
-                    PackingVector pv;
-                    pv.Init();
-
-                    for (size_t i = 0; i < headerBits; i++)
-                    {
-                        int32_t codedValue = 0;
-                        switch (desc[i].m_eField)
-                        {
-                        case BC6HData::M:  codedValue = modeID; break;
-                        case BC6HData::D:  codedValue = partition; break;
-                        case BC6HData::RW: codedValue = eps[0][0][0]; break;
-                        case BC6HData::RX: codedValue = eps[0][1][0]; break;
-                        case BC6HData::RY: codedValue = eps[1][0][0]; break;
-                        case BC6HData::RZ: codedValue = eps[1][1][0]; break;
-                        case BC6HData::GW: codedValue = eps[0][0][1]; break;
-                        case BC6HData::GX: codedValue = eps[0][1][1]; break;
-                        case BC6HData::GY: codedValue = eps[1][0][1]; break;
-                        case BC6HData::GZ: codedValue = eps[1][1][1]; break;
-                        case BC6HData::BW: codedValue = eps[0][0][2]; break;
-                        case BC6HData::BX: codedValue = eps[0][1][2]; break;
-                        case BC6HData::BY: codedValue = eps[1][0][2]; break;
-                        case BC6HData::BZ: codedValue = eps[1][1][2]; break;
-                        default: assert(false); break;
-                        }
-
-                        pv.Pack(static_cast<uint16_t>((codedValue >> desc[i].m_uBit) & 1), 1);
-                    }
-
-                    int fixupIndex1 = 0;
-                    int indexBits = 4;
-                    if (modeInfo.m_partitioned)
-                    {
-                        fixupIndex1 = BC7Data::g_fixupIndexes2[partition];
-                        indexBits = 3;
-                    }
-
-                    for (int px = 0; px < 16; px++)
-                    {
-                        ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[px], block);
-                        if (px == 0 || px == fixupIndex1)
-                            pv.Pack(index, indexBits - 1);
-                        else
-                            pv.Pack(index, indexBits);
-                    }
-
-                    pv.Flush(packedBlocks + 16 * block);
-                }
-            }
-
-            static void SignExtendSingle(int &v, int bits)
-            {
-                if (v & (1 << (bits - 1)))
-                    v |= -(1 << bits);
-            }
-
-            static void UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned)
-            {
-                UnpackingVector pv;
-                pv.Init(pBC);
-
-                int numModeBits = 2;
-                int modeBits = pv.Unpack(2);
-                if (modeBits != 0 && modeBits != 1)
-                {
-                    modeBits |= pv.Unpack(3) << 2;
-                    numModeBits += 3;
-                }
-
-                int mode = -1;
-                for (int possibleMode = 0; possibleMode < BC7Data::g_numHDRModes; possibleMode++)
-                {
-                    if (BC7Data::g_hdrModes[possibleMode].m_modeID == modeBits)
-                    {
-                        mode = possibleMode;
-                        break;
-                    }
-                }
-
-                if (mode < 0)
-                {
-                    for (int px = 0; px < 16; px++)
-                    {
-                        for (int ch = 0; ch < 3; ch++)
-                            output.m_pixels[px][ch] = 0;
-                        output.m_pixels[px][3] = 0x3c00;	// 1.0
-                    }
-                    return;
-                }
-
-                const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode];
-                const size_t headerBits = modeInfo.m_partitioned ? 82 : 65;
-                const BC6HData::ModeDescriptor* desc = BC6HData::g_modeDescriptors[mode];
-
-                int32_t partition = 0;
-                int32_t eps[2][2][3];
-
-                for (int subset = 0; subset < 2; subset++)
-                    for (int epi = 0; epi < 2; epi++)
-                        for (int ch = 0; ch < 3; ch++)
-                            eps[subset][epi][ch] = 0;
-
-                for (size_t i = numModeBits; i < headerBits; i++)
-                {
-                    int32_t *pCodedValue = NULL;
-
-                    switch (desc[i].m_eField)
-                    {
-                    case BC6HData::D:  pCodedValue = &partition; break;
-                    case BC6HData::RW: pCodedValue = &eps[0][0][0]; break;
-                    case BC6HData::RX: pCodedValue = &eps[0][1][0]; break;
-                    case BC6HData::RY: pCodedValue = &eps[1][0][0]; break;
-                    case BC6HData::RZ: pCodedValue = &eps[1][1][0]; break;
-                    case BC6HData::GW: pCodedValue = &eps[0][0][1]; break;
-                    case BC6HData::GX: pCodedValue = &eps[0][1][1]; break;
-                    case BC6HData::GY: pCodedValue = &eps[1][0][1]; break;
-                    case BC6HData::GZ: pCodedValue = &eps[1][1][1]; break;
-                    case BC6HData::BW: pCodedValue = &eps[0][0][2]; break;
-                    case BC6HData::BX: pCodedValue = &eps[0][1][2]; break;
-                    case BC6HData::BY: pCodedValue = &eps[1][0][2]; break;
-                    case BC6HData::BZ: pCodedValue = &eps[1][1][2]; break;
-                    default: assert(false); break;
-                    }
-
-                    (*pCodedValue) |= pv.Unpack(1) << desc[i].m_uBit;
-                }
-
-
-                uint16_t modeID = modeInfo.m_modeID;
-
-                int fixupIndex1 = 0;
-                int indexBits = 4;
-                int numSubsets = 1;
-                if (modeInfo.m_partitioned)
-                {
-                    fixupIndex1 = BC7Data::g_fixupIndexes2[partition];
-                    indexBits = 3;
-                    numSubsets = 2;
-                }
-
-                int indexes[16];
-                for (int px = 0; px < 16; px++)
-                {
-                    if (px == 0 || px == fixupIndex1)
-                        indexes[px] = pv.Unpack(indexBits - 1);
-                    else
-                        indexes[px] = pv.Unpack(indexBits);
-                }
-
-                if (modeInfo.m_partitioned)
-                {
-                    for (int ch = 0; ch < 3; ch++)
-                    {
-                        if (isSigned)
-                            SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec);
-                        if (modeInfo.m_transformed || isSigned)
-                        {
-                            SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]);
-                            SignExtendSingle(eps[1][0][ch], modeInfo.m_bPrec[ch]);
-                            SignExtendSingle(eps[1][1][ch], modeInfo.m_bPrec[ch]);
-                        }
-                    }
-                }
-                else
-                {
-                    for (int ch = 0; ch < 3; ch++)
-                    {
-                        if (isSigned)
-                            SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec);
-                        if (modeInfo.m_transformed || isSigned)
-                            SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]);
-                    }
-                }
-
-                int aPrec = modeInfo.m_aPrec;
-
-                if (modeInfo.m_transformed)
-                {
-                    for (int ch = 0; ch < 3; ch++)
-                    {
-                        int wrapMask = (1 << aPrec) - 1;
-
-                        eps[0][1][ch] = ((eps[0][0][ch] + eps[0][1][ch]) & wrapMask);
-                        if (isSigned)
-                            SignExtendSingle(eps[0][1][ch], aPrec);
-
-                        if (modeInfo.m_partitioned)
-                        {
-                            eps[1][0][ch] = ((eps[0][0][ch] + eps[1][0][ch]) & wrapMask);
-                            eps[1][1][ch] = ((eps[0][0][ch] + eps[1][1][ch]) & wrapMask);
-
-                            if (isSigned)
-                            {
-                                SignExtendSingle(eps[1][0][ch], aPrec);
-                                SignExtendSingle(eps[1][1][ch], aPrec);
-                            }
-                        }
-                    }
-                }
-
-                // Unquantize endpoints
-                for (int subset = 0; subset < numSubsets; subset++)
-                {
-                    for (int epi = 0; epi < 2; epi++)
-                    {
-                        for (int ch = 0; ch < 3; ch++)
-                        {
-                            int &v = eps[subset][epi][ch];
-
-                            if (isSigned)
-                            {
-                                if (aPrec >= 16)
-                                {
-                                    // Nothing
-                                }
-                                else
-                                {
-                                    bool s = false;
-                                    int comp = v;
-                                    if (v < 0)
-                                    {
-                                        s = true;
-                                        comp = -comp;
-                                    }
-
-                                    int unq = 0;
-                                    if (comp == 0)
-                                        unq = 0;
-                                    else if (comp >= ((1 << (aPrec - 1)) - 1))
-                                        unq = 0x7fff;
-                                    else
-                                        unq = ((comp << 15) + 0x4000) >> (aPrec - 1);
-
-                                    if (s)
-                                        unq = -unq;
-
-                                    v = unq;
-                                }
-                            }
-                            else
-                            {
-                                if (aPrec >= 15)
-                                {
-                                    // Nothing
-                                }
-                                else if (v == 0)
-                                {
-                                    // Nothing
-                                }
-                                else if (v == ((1 << aPrec) - 1))
-                                    v = 0xffff;
-                                else
-                                    v = ((v << 16) + 0x8000) >> aPrec;
-                            }
-                        }
-                    }
-                }
-
-                const int *weights = BC7Data::g_weightTables[indexBits];
-
-                for (int px = 0; px < 16; px++)
-                {
-                    int subset = 0;
-                    if (modeInfo.m_partitioned)
-                        subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
-
-                    int w = weights[indexes[px]];
-                    for (int ch = 0; ch < 3; ch++)
-                    {
-                        int comp = ((64 - w) * eps[subset][0][ch] + w * eps[subset][1][ch] + 32) >> 6;
-
-                        if (isSigned)
-                        {
-                            if (comp < 0)
-                                comp = -(((-comp) * 31) >> 5);
-                            else
-                                comp = (comp * 31) >> 5;
-
-                            int s = 0;
-                            if (comp < 0)
-                            {
-                                s = 0x8000;
-                                comp = -comp;
-                            }
-
-                            output.m_pixels[px][ch] = static_cast<uint16_t>(s | comp);
-                        }
-                        else
-                        {
-                            comp = (comp * 31) >> 6;
-                            output.m_pixels[px][ch] = static_cast<uint16_t>(comp);
-                        }
-                    }
-                    output.m_pixels[px][3] = 0x3c00;	// 1.0
-                }
-            }
-        };
-
-        namespace S3TCSingleColorTables
-        {
-            struct SingleColorTableEntry
-            {
-                uint8_t m_min;
-                uint8_t m_max;
-                uint8_t m_actualColor;
-                uint8_t m_span;
-            };
-
-            SingleColorTableEntry g_singleColor5_3[256] =
-            {
-                { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 8, 0, 2, 8 }, { 8, 0, 2, 8 }, { 0, 8, 5, 8 }, { 0, 8, 5, 8 }, { 0, 8, 5, 8 }, { 8, 8, 8, 0 },
-                { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 16, 8, 10, 8 }, { 33, 0, 11, 33 }, { 8, 16, 13, 8 }, { 8, 16, 13, 8 }, { 8, 16, 13, 8 }, { 16, 16, 16, 0 },
-                { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 24, 16, 18, 8 }, { 41, 8, 19, 33 }, { 16, 24, 21, 8 }, { 16, 24, 21, 8 }, { 0, 33, 22, 33 }, { 24, 24, 24, 0 },
-                { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 33, 24, 27, 9 }, { 33, 24, 27, 9 }, { 33, 24, 27, 9 }, { 41, 24, 29, 17 }, { 24, 33, 30, 9 }, { 24, 33, 30, 9 },
-                { 16, 41, 32, 25 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 41, 33, 35, 8 }, { 41, 33, 35, 8 }, { 33, 41, 38, 8 }, { 33, 41, 38, 8 }, { 33, 41, 38, 8 },
-                { 24, 49, 40, 25 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 49, 41, 43, 8 }, { 66, 33, 44, 33 }, { 41, 49, 46, 8 }, { 41, 49, 46, 8 }, { 41, 49, 46, 8 },
-                { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 57, 49, 51, 8 }, { 74, 41, 52, 33 }, { 49, 57, 54, 8 }, { 49, 57, 54, 8 }, { 33, 66, 55, 33 },
-                { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 66, 57, 60, 9 }, { 66, 57, 60, 9 }, { 66, 57, 60, 9 }, { 74, 57, 62, 17 }, { 57, 66, 63, 9 },
-                { 57, 66, 63, 9 }, { 49, 74, 65, 25 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 74, 66, 68, 8 }, { 74, 66, 68, 8 }, { 66, 74, 71, 8 }, { 66, 74, 71, 8 },
-                { 66, 74, 71, 8 }, { 57, 82, 73, 25 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 82, 74, 76, 8 }, { 99, 66, 77, 33 }, { 74, 82, 79, 8 }, { 74, 82, 79, 8 },
-                { 74, 82, 79, 8 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 90, 82, 84, 8 }, { 107, 74, 85, 33 }, { 82, 90, 87, 8 }, { 82, 90, 87, 8 },
-                { 66, 99, 88, 33 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 99, 90, 93, 9 }, { 99, 90, 93, 9 }, { 99, 90, 93, 9 }, { 107, 90, 95, 17 },
-                { 90, 99, 96, 9 }, { 90, 99, 96, 9 }, { 82, 107, 98, 25 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 107, 99, 101, 8 }, { 107, 99, 101, 8 }, { 99, 107, 104, 8 },
-                { 99, 107, 104, 8 }, { 99, 107, 104, 8 }, { 90, 115, 106, 25 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 115, 107, 109, 8 }, { 132, 99, 110, 33 }, { 107, 115, 112, 8 },
-                { 107, 115, 112, 8 }, { 107, 115, 112, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 123, 115, 117, 8 }, { 140, 107, 118, 33 }, { 115, 123, 120, 8 },
-                { 115, 123, 120, 8 }, { 99, 132, 121, 33 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 132, 123, 126, 9 }, { 132, 123, 126, 9 }, { 132, 123, 126, 9 },
-                { 140, 123, 128, 17 }, { 123, 132, 129, 9 }, { 123, 132, 129, 9 }, { 115, 140, 131, 25 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 140, 132, 134, 8 }, { 140, 132, 134, 8 },
-                { 132, 140, 137, 8 }, { 132, 140, 137, 8 }, { 132, 140, 137, 8 }, { 123, 148, 139, 25 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 148, 140, 142, 8 }, { 165, 132, 143, 33 },
-                { 140, 148, 145, 8 }, { 140, 148, 145, 8 }, { 140, 148, 145, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 156, 148, 150, 8 }, { 173, 140, 151, 33 },
-                { 148, 156, 153, 8 }, { 148, 156, 153, 8 }, { 132, 165, 154, 33 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 165, 156, 159, 9 }, { 165, 156, 159, 9 },
-                { 165, 156, 159, 9 }, { 173, 156, 161, 17 }, { 156, 165, 162, 9 }, { 156, 165, 162, 9 }, { 148, 173, 164, 25 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 173, 165, 167, 8 },
-                { 173, 165, 167, 8 }, { 165, 173, 170, 8 }, { 165, 173, 170, 8 }, { 165, 173, 170, 8 }, { 156, 181, 172, 25 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 181, 173, 175, 8 },
-                { 198, 165, 176, 33 }, { 173, 181, 178, 8 }, { 173, 181, 178, 8 }, { 173, 181, 178, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 189, 181, 183, 8 },
-                { 206, 173, 184, 33 }, { 181, 189, 186, 8 }, { 181, 189, 186, 8 }, { 165, 198, 187, 33 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 198, 189, 192, 9 },
-                { 198, 189, 192, 9 }, { 198, 189, 192, 9 }, { 206, 189, 194, 17 }, { 189, 198, 195, 9 }, { 189, 198, 195, 9 }, { 181, 206, 197, 25 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
-                { 206, 198, 200, 8 }, { 206, 198, 200, 8 }, { 198, 206, 203, 8 }, { 198, 206, 203, 8 }, { 198, 206, 203, 8 }, { 189, 214, 205, 25 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
-                { 214, 206, 208, 8 }, { 231, 198, 209, 33 }, { 206, 214, 211, 8 }, { 206, 214, 211, 8 }, { 206, 214, 211, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
-                { 222, 214, 216, 8 }, { 239, 206, 217, 33 }, { 214, 222, 219, 8 }, { 214, 222, 219, 8 }, { 198, 231, 220, 33 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
-                { 231, 222, 225, 9 }, { 231, 222, 225, 9 }, { 231, 222, 225, 9 }, { 239, 222, 227, 17 }, { 222, 231, 228, 9 }, { 222, 231, 228, 9 }, { 214, 239, 230, 25 }, { 231, 231, 231, 0 },
-                { 231, 231, 231, 0 }, { 239, 231, 233, 8 }, { 239, 231, 233, 8 }, { 231, 239, 236, 8 }, { 231, 239, 236, 8 }, { 231, 239, 236, 8 }, { 222, 247, 238, 25 }, { 239, 239, 239, 0 },
-                { 239, 239, 239, 0 }, { 247, 239, 241, 8 }, { 247, 239, 241, 8 }, { 239, 247, 244, 8 }, { 239, 247, 244, 8 }, { 239, 247, 244, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
-                { 247, 247, 247, 0 }, { 255, 247, 249, 8 }, { 255, 247, 249, 8 }, { 247, 255, 252, 8 }, { 247, 255, 252, 8 }, { 247, 255, 252, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
-            };
-
-            SingleColorTableEntry g_singleColor6_3[256] =
-            {
-                { 0, 0, 0, 0 }, { 4, 0, 1, 4 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 8, 4, 5, 4 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 },
-                { 8, 8, 8, 0 }, { 12, 8, 9, 4 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 16, 12, 13, 4 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 },
-                { 16, 16, 16, 0 }, { 20, 16, 17, 4 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 24, 20, 21, 4 }, { 20, 24, 22, 4 }, { 69, 0, 23, 69 },
-                { 24, 24, 24, 0 }, { 28, 24, 25, 4 }, { 24, 28, 26, 4 }, { 65, 8, 27, 57 }, { 28, 28, 28, 0 }, { 32, 28, 29, 4 }, { 28, 32, 30, 4 }, { 69, 12, 31, 57 },
-                { 32, 32, 32, 0 }, { 36, 32, 33, 4 }, { 32, 36, 34, 4 }, { 65, 20, 35, 45 }, { 36, 36, 36, 0 }, { 40, 36, 37, 4 }, { 36, 40, 38, 4 }, { 69, 24, 39, 45 },
-                { 40, 40, 40, 0 }, { 44, 40, 41, 4 }, { 40, 44, 42, 4 }, { 65, 32, 43, 33 }, { 44, 44, 44, 0 }, { 48, 44, 45, 4 }, { 44, 48, 46, 4 }, { 69, 36, 47, 33 },
-                { 48, 48, 48, 0 }, { 52, 48, 49, 4 }, { 48, 52, 50, 4 }, { 65, 44, 51, 21 }, { 52, 52, 52, 0 }, { 56, 52, 53, 4 }, { 52, 56, 54, 4 }, { 69, 48, 55, 21 },
-                { 56, 56, 56, 0 }, { 60, 56, 57, 4 }, { 56, 60, 58, 4 }, { 65, 56, 59, 9 }, { 60, 60, 60, 0 }, { 65, 60, 61, 5 }, { 56, 65, 62, 9 }, { 60, 65, 63, 5 },
-                { 56, 69, 64, 13 }, { 65, 65, 65, 0 }, { 69, 65, 66, 4 }, { 65, 69, 67, 4 }, { 60, 73, 68, 13 }, { 69, 69, 69, 0 }, { 73, 69, 70, 4 }, { 69, 73, 71, 4 },
-                { 56, 81, 72, 25 }, { 73, 73, 73, 0 }, { 77, 73, 74, 4 }, { 73, 77, 75, 4 }, { 60, 85, 76, 25 }, { 77, 77, 77, 0 }, { 81, 77, 78, 4 }, { 77, 81, 79, 4 },
-                { 56, 93, 80, 37 }, { 81, 81, 81, 0 }, { 85, 81, 82, 4 }, { 81, 85, 83, 4 }, { 60, 97, 84, 37 }, { 85, 85, 85, 0 }, { 89, 85, 86, 4 }, { 85, 89, 87, 4 },
-                { 56, 105, 88, 49 }, { 89, 89, 89, 0 }, { 93, 89, 90, 4 }, { 89, 93, 91, 4 }, { 60, 109, 92, 49 }, { 93, 93, 93, 0 }, { 97, 93, 94, 4 }, { 93, 97, 95, 4 },
-                { 134, 77, 96, 57 }, { 97, 97, 97, 0 }, { 101, 97, 98, 4 }, { 97, 101, 99, 4 }, { 130, 85, 100, 45 }, { 101, 101, 101, 0 }, { 105, 101, 102, 4 }, { 101, 105, 103, 4 },
-                { 134, 89, 104, 45 }, { 105, 105, 105, 0 }, { 109, 105, 106, 4 }, { 105, 109, 107, 4 }, { 130, 97, 108, 33 }, { 109, 109, 109, 0 }, { 113, 109, 110, 4 }, { 109, 113, 111, 4 },
-                { 134, 101, 112, 33 }, { 113, 113, 113, 0 }, { 117, 113, 114, 4 }, { 113, 117, 115, 4 }, { 130, 109, 116, 21 }, { 117, 117, 117, 0 }, { 121, 117, 118, 4 }, { 117, 121, 119, 4 },
-                { 134, 113, 120, 21 }, { 121, 121, 121, 0 }, { 125, 121, 122, 4 }, { 121, 125, 123, 4 }, { 130, 121, 124, 9 }, { 125, 125, 125, 0 }, { 130, 125, 126, 5 }, { 121, 130, 127, 9 },
-                { 125, 130, 128, 5 }, { 121, 134, 129, 13 }, { 130, 130, 130, 0 }, { 134, 130, 131, 4 }, { 130, 134, 132, 4 }, { 125, 138, 133, 13 }, { 134, 134, 134, 0 }, { 138, 134, 135, 4 },
-                { 134, 138, 136, 4 }, { 121, 146, 137, 25 }, { 138, 138, 138, 0 }, { 142, 138, 139, 4 }, { 138, 142, 140, 4 }, { 125, 150, 141, 25 }, { 142, 142, 142, 0 }, { 146, 142, 143, 4 },
-                { 142, 146, 144, 4 }, { 121, 158, 145, 37 }, { 146, 146, 146, 0 }, { 150, 146, 147, 4 }, { 146, 150, 148, 4 }, { 125, 162, 149, 37 }, { 150, 150, 150, 0 }, { 154, 150, 151, 4 },
-                { 150, 154, 152, 4 }, { 121, 170, 153, 49 }, { 154, 154, 154, 0 }, { 158, 154, 155, 4 }, { 154, 158, 156, 4 }, { 125, 174, 157, 49 }, { 158, 158, 158, 0 }, { 162, 158, 159, 4 },
-                { 158, 162, 160, 4 }, { 199, 142, 161, 57 }, { 162, 162, 162, 0 }, { 166, 162, 163, 4 }, { 162, 166, 164, 4 }, { 195, 150, 165, 45 }, { 166, 166, 166, 0 }, { 170, 166, 167, 4 },
-                { 166, 170, 168, 4 }, { 199, 154, 169, 45 }, { 170, 170, 170, 0 }, { 174, 170, 171, 4 }, { 170, 174, 172, 4 }, { 195, 162, 173, 33 }, { 174, 174, 174, 0 }, { 178, 174, 175, 4 },
-                { 174, 178, 176, 4 }, { 199, 166, 177, 33 }, { 178, 178, 178, 0 }, { 182, 178, 179, 4 }, { 178, 182, 180, 4 }, { 195, 174, 181, 21 }, { 182, 182, 182, 0 }, { 186, 182, 183, 4 },
-                { 182, 186, 184, 4 }, { 199, 178, 185, 21 }, { 186, 186, 186, 0 }, { 190, 186, 187, 4 }, { 186, 190, 188, 4 }, { 195, 186, 189, 9 }, { 190, 190, 190, 0 }, { 195, 190, 191, 5 },
-                { 186, 195, 192, 9 }, { 190, 195, 193, 5 }, { 186, 199, 194, 13 }, { 195, 195, 195, 0 }, { 199, 195, 196, 4 }, { 195, 199, 197, 4 }, { 190, 203, 198, 13 }, { 199, 199, 199, 0 },
-                { 203, 199, 200, 4 }, { 199, 203, 201, 4 }, { 186, 211, 202, 25 }, { 203, 203, 203, 0 }, { 207, 203, 204, 4 }, { 203, 207, 205, 4 }, { 190, 215, 206, 25 }, { 207, 207, 207, 0 },
-                { 211, 207, 208, 4 }, { 207, 211, 209, 4 }, { 186, 223, 210, 37 }, { 211, 211, 211, 0 }, { 215, 211, 212, 4 }, { 211, 215, 213, 4 }, { 190, 227, 214, 37 }, { 215, 215, 215, 0 },
-                { 219, 215, 216, 4 }, { 215, 219, 217, 4 }, { 186, 235, 218, 49 }, { 219, 219, 219, 0 }, { 223, 219, 220, 4 }, { 219, 223, 221, 4 }, { 190, 239, 222, 49 }, { 223, 223, 223, 0 },
-                { 227, 223, 224, 4 }, { 223, 227, 225, 4 }, { 186, 247, 226, 61 }, { 227, 227, 227, 0 }, { 231, 227, 228, 4 }, { 227, 231, 229, 4 }, { 190, 251, 230, 61 }, { 231, 231, 231, 0 },
-                { 235, 231, 232, 4 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 239, 235, 236, 4 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
-                { 243, 239, 240, 4 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 247, 243, 244, 4 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
-                { 251, 247, 248, 4 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 255, 251, 252, 4 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
-            };
-
-            SingleColorTableEntry g_singleColor5_2[256] =
-            {
-                { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 },
-                { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 },
-                { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 },
-                { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 41, 32, 17 },
-                { 24, 41, 32, 17 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 24, 49, 36, 25 }, { 24, 49, 36, 25 }, { 33, 41, 37, 8 }, { 33, 41, 37, 8 }, { 24, 57, 40, 33 },
-                { 24, 57, 40, 33 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 49, 49, 49, 0 },
-                { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 57, 57, 57, 0 },
-                { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 },
-                { 57, 74, 65, 17 }, { 57, 74, 65, 17 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 57, 82, 69, 25 }, { 57, 82, 69, 25 }, { 66, 74, 70, 8 }, { 66, 74, 70, 8 },
-                { 57, 90, 73, 33 }, { 57, 90, 73, 33 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 },
-                { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 },
-                { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 },
-                { 90, 99, 94, 9 }, { 90, 107, 98, 17 }, { 90, 107, 98, 17 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 90, 115, 102, 25 }, { 90, 115, 102, 25 }, { 99, 107, 103, 8 },
-                { 99, 107, 103, 8 }, { 90, 123, 106, 33 }, { 90, 123, 106, 33 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 111, 8 }, { 107, 115, 111, 8 },
-                { 107, 115, 111, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 119, 8 }, { 115, 123, 119, 8 },
-                { 115, 123, 119, 8 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 127, 9 }, { 123, 132, 127, 9 },
-                { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, { 123, 140, 131, 17 }, { 123, 140, 131, 17 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 123, 148, 135, 25 }, { 123, 148, 135, 25 },
-                { 132, 140, 136, 8 }, { 132, 140, 136, 8 }, { 123, 156, 139, 33 }, { 123, 156, 139, 33 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 144, 8 },
-                { 140, 148, 144, 8 }, { 140, 148, 144, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 152, 8 },
-                { 148, 156, 152, 8 }, { 148, 156, 152, 8 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 160, 9 },
-                { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 173, 164, 17 }, { 156, 173, 164, 17 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 156, 181, 168, 25 },
-                { 156, 181, 168, 25 }, { 165, 173, 169, 8 }, { 165, 173, 169, 8 }, { 156, 189, 172, 33 }, { 156, 189, 172, 33 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 },
-                { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 },
-                { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 },
-                { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 206, 197, 17 }, { 189, 206, 197, 17 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
-                { 189, 214, 201, 25 }, { 189, 214, 201, 25 }, { 198, 206, 202, 8 }, { 198, 206, 202, 8 }, { 189, 222, 205, 33 }, { 189, 222, 205, 33 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
-                { 206, 206, 206, 0 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
-                { 214, 214, 214, 0 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
-                { 222, 222, 222, 0 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 239, 230, 17 }, { 222, 239, 230, 17 }, { 231, 231, 231, 0 },
-                { 231, 231, 231, 0 }, { 222, 247, 234, 25 }, { 222, 247, 234, 25 }, { 231, 239, 235, 8 }, { 231, 239, 235, 8 }, { 222, 255, 238, 33 }, { 222, 255, 238, 33 }, { 239, 239, 239, 0 },
-                { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
-                { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
-            };
-
-            SingleColorTableEntry g_singleColor6_2[256] =
-            {
-                { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 },
-                { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 },
-                { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 },
-                { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 },
-                { 32, 32, 32, 0 }, { 32, 32, 32, 0 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 },
-                { 40, 40, 40, 0 }, { 40, 40, 40, 0 }, { 40, 44, 42, 4 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 48, 46, 4 }, { 48, 48, 48, 0 },
-                { 48, 48, 48, 0 }, { 48, 48, 48, 0 }, { 48, 52, 50, 4 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 56, 54, 4 }, { 56, 56, 56, 0 },
-                { 56, 56, 56, 0 }, { 56, 56, 56, 0 }, { 56, 60, 58, 4 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 65, 62, 5 }, { 60, 65, 62, 5 },
-                { 60, 69, 64, 9 }, { 65, 65, 65, 0 }, { 60, 73, 66, 13 }, { 65, 69, 67, 4 }, { 60, 77, 68, 17 }, { 69, 69, 69, 0 }, { 60, 81, 70, 21 }, { 69, 73, 71, 4 },
-                { 60, 85, 72, 25 }, { 73, 73, 73, 0 }, { 60, 89, 74, 29 }, { 73, 77, 75, 4 }, { 60, 93, 76, 33 }, { 77, 77, 77, 0 }, { 60, 97, 78, 37 }, { 77, 81, 79, 4 },
-                { 60, 101, 80, 41 }, { 81, 81, 81, 0 }, { 60, 105, 82, 45 }, { 81, 85, 83, 4 }, { 60, 109, 84, 49 }, { 85, 85, 85, 0 }, { 60, 113, 86, 53 }, { 85, 89, 87, 4 },
-                { 60, 117, 88, 57 }, { 89, 89, 89, 0 }, { 60, 121, 90, 61 }, { 89, 93, 91, 4 }, { 60, 125, 92, 65 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 95, 4 },
-                { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 103, 4 },
-                { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 107, 4 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 113, 111, 4 },
-                { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 117, 115, 4 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 121, 119, 4 },
-                { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 125, 123, 4 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 130, 127, 5 },
-                { 125, 130, 127, 5 }, { 125, 134, 129, 9 }, { 130, 130, 130, 0 }, { 125, 138, 131, 13 }, { 130, 134, 132, 4 }, { 125, 142, 133, 17 }, { 134, 134, 134, 0 }, { 125, 146, 135, 21 },
-                { 134, 138, 136, 4 }, { 125, 150, 137, 25 }, { 138, 138, 138, 0 }, { 125, 154, 139, 29 }, { 138, 142, 140, 4 }, { 125, 158, 141, 33 }, { 142, 142, 142, 0 }, { 125, 162, 143, 37 },
-                { 142, 146, 144, 4 }, { 125, 166, 145, 41 }, { 146, 146, 146, 0 }, { 125, 170, 147, 45 }, { 146, 150, 148, 4 }, { 125, 174, 149, 49 }, { 150, 150, 150, 0 }, { 125, 178, 151, 53 },
-                { 150, 154, 152, 4 }, { 125, 182, 153, 57 }, { 154, 154, 154, 0 }, { 125, 186, 155, 61 }, { 154, 158, 156, 4 }, { 125, 190, 157, 65 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 },
-                { 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 },
-                { 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 172, 4 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 },
-                { 174, 178, 176, 4 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 182, 180, 4 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 },
-                { 182, 186, 184, 4 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 190, 188, 4 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 },
-                { 190, 195, 192, 5 }, { 190, 195, 192, 5 }, { 190, 199, 194, 9 }, { 195, 195, 195, 0 }, { 190, 203, 196, 13 }, { 195, 199, 197, 4 }, { 190, 207, 198, 17 }, { 199, 199, 199, 0 },
-                { 190, 211, 200, 21 }, { 199, 203, 201, 4 }, { 190, 215, 202, 25 }, { 203, 203, 203, 0 }, { 190, 219, 204, 29 }, { 203, 207, 205, 4 }, { 190, 223, 206, 33 }, { 207, 207, 207, 0 },
-                { 190, 227, 208, 37 }, { 207, 211, 209, 4 }, { 190, 231, 210, 41 }, { 211, 211, 211, 0 }, { 190, 235, 212, 45 }, { 211, 215, 213, 4 }, { 190, 239, 214, 49 }, { 215, 215, 215, 0 },
-                { 190, 243, 216, 53 }, { 215, 219, 217, 4 }, { 190, 247, 218, 57 }, { 219, 219, 219, 0 }, { 190, 251, 220, 61 }, { 219, 223, 221, 4 }, { 190, 255, 222, 65 }, { 223, 223, 223, 0 },
-                { 223, 223, 223, 0 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 },
-                { 231, 231, 231, 0 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
-                { 239, 239, 239, 0 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
-                { 247, 247, 247, 0 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
-            };
-
-            SingleColorTableEntry g_singleColor5_3_p[256] =
-            {
-                { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 8, 0, 2, 8 }, { 8, 0, 2, 8 }, { 0, 8, 5, 8 }, { 0, 8, 5, 8 }, { 0, 8, 5, 8 }, { 8, 8, 8, 0 },
-                { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 16, 8, 10, 8 }, { 33, 0, 11, 33 }, { 8, 16, 13, 8 }, { 8, 16, 13, 8 }, { 8, 16, 13, 8 }, { 16, 16, 16, 0 },
-                { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 24, 16, 18, 8 }, { 41, 8, 19, 33 }, { 16, 24, 21, 8 }, { 16, 24, 21, 8 }, { 0, 33, 22, 33 }, { 24, 24, 24, 0 },
-                { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 33, 24, 27, 9 }, { 33, 24, 27, 9 }, { 33, 24, 27, 9 }, { 41, 24, 29, 17 }, { 24, 33, 30, 9 }, { 24, 33, 30, 9 },
-                { 16, 41, 32, 25 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 41, 33, 35, 8 }, { 41, 33, 35, 8 }, { 33, 41, 38, 8 }, { 33, 41, 38, 8 }, { 33, 41, 38, 8 },
-                { 24, 49, 40, 25 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 49, 41, 43, 8 }, { 66, 33, 44, 33 }, { 41, 49, 46, 8 }, { 41, 49, 46, 8 }, { 41, 49, 46, 8 },
-                { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 57, 49, 51, 8 }, { 74, 41, 52, 33 }, { 49, 57, 54, 8 }, { 49, 57, 54, 8 }, { 33, 66, 55, 33 },
-                { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 66, 57, 60, 9 }, { 66, 57, 60, 9 }, { 66, 57, 60, 9 }, { 74, 57, 62, 17 }, { 57, 66, 63, 9 },
-                { 57, 66, 63, 9 }, { 49, 74, 65, 25 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 74, 66, 68, 8 }, { 74, 66, 68, 8 }, { 66, 74, 71, 8 }, { 66, 74, 71, 8 },
-                { 66, 74, 71, 8 }, { 57, 82, 73, 25 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 82, 74, 76, 8 }, { 99, 66, 77, 33 }, { 74, 82, 79, 8 }, { 74, 82, 79, 8 },
-                { 74, 82, 79, 8 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 90, 82, 84, 8 }, { 107, 74, 85, 33 }, { 82, 90, 87, 8 }, { 82, 90, 87, 8 },
-                { 66, 99, 88, 33 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 99, 90, 93, 9 }, { 99, 90, 93, 9 }, { 99, 90, 93, 9 }, { 107, 90, 95, 17 },
-                { 90, 99, 96, 9 }, { 90, 99, 96, 9 }, { 82, 107, 98, 25 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 107, 99, 101, 8 }, { 107, 99, 101, 8 }, { 99, 107, 104, 8 },
-                { 99, 107, 104, 8 }, { 99, 107, 104, 8 }, { 90, 115, 106, 25 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 115, 107, 109, 8 }, { 132, 99, 110, 33 }, { 107, 115, 112, 8 },
-                { 107, 115, 112, 8 }, { 107, 115, 112, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 123, 115, 117, 8 }, { 140, 107, 118, 33 }, { 115, 123, 120, 8 },
-                { 115, 123, 120, 8 }, { 99, 132, 121, 33 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 132, 123, 126, 9 }, { 132, 123, 126, 9 }, { 132, 123, 126, 9 },
-                { 140, 123, 128, 17 }, { 123, 132, 129, 9 }, { 123, 132, 129, 9 }, { 115, 140, 131, 25 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 140, 132, 134, 8 }, { 140, 132, 134, 8 },
-                { 132, 140, 137, 8 }, { 132, 140, 137, 8 }, { 132, 140, 137, 8 }, { 123, 148, 139, 25 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 148, 140, 142, 8 }, { 165, 132, 143, 33 },
-                { 140, 148, 145, 8 }, { 140, 148, 145, 8 }, { 140, 148, 145, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 156, 148, 150, 8 }, { 173, 140, 151, 33 },
-                { 148, 156, 153, 8 }, { 148, 156, 153, 8 }, { 132, 165, 154, 33 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 165, 156, 159, 9 }, { 165, 156, 159, 9 },
-                { 165, 156, 159, 9 }, { 173, 156, 161, 17 }, { 156, 165, 162, 9 }, { 156, 165, 162, 9 }, { 148, 173, 164, 25 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 173, 165, 167, 8 },
-                { 173, 165, 167, 8 }, { 165, 173, 170, 8 }, { 165, 173, 170, 8 }, { 165, 173, 170, 8 }, { 156, 181, 172, 25 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 181, 173, 175, 8 },
-                { 198, 165, 176, 33 }, { 173, 181, 178, 8 }, { 173, 181, 178, 8 }, { 173, 181, 178, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 189, 181, 183, 8 },
-                { 206, 173, 184, 33 }, { 181, 189, 186, 8 }, { 181, 189, 186, 8 }, { 165, 198, 187, 33 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 198, 189, 192, 9 },
-                { 198, 189, 192, 9 }, { 198, 189, 192, 9 }, { 206, 189, 194, 17 }, { 189, 198, 195, 9 }, { 189, 198, 195, 9 }, { 181, 206, 197, 25 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
-                { 206, 198, 200, 8 }, { 206, 198, 200, 8 }, { 198, 206, 203, 8 }, { 198, 206, 203, 8 }, { 198, 206, 203, 8 }, { 189, 214, 205, 25 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
-                { 214, 206, 208, 8 }, { 231, 198, 209, 33 }, { 206, 214, 211, 8 }, { 206, 214, 211, 8 }, { 206, 214, 211, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
-                { 222, 214, 216, 8 }, { 239, 206, 217, 33 }, { 214, 222, 219, 8 }, { 214, 222, 219, 8 }, { 198, 231, 220, 33 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
-                { 231, 222, 225, 9 }, { 231, 222, 225, 9 }, { 231, 222, 225, 9 }, { 239, 222, 227, 17 }, { 222, 231, 228, 9 }, { 222, 231, 228, 9 }, { 214, 239, 230, 25 }, { 231, 231, 231, 0 },
-                { 231, 231, 231, 0 }, { 239, 231, 233, 8 }, { 239, 231, 233, 8 }, { 231, 239, 236, 8 }, { 231, 239, 236, 8 }, { 231, 239, 236, 8 }, { 222, 247, 238, 25 }, { 239, 239, 239, 0 },
-                { 239, 239, 239, 0 }, { 247, 239, 241, 8 }, { 247, 239, 241, 8 }, { 239, 247, 244, 8 }, { 239, 247, 244, 8 }, { 239, 247, 244, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
-                { 247, 247, 247, 0 }, { 255, 247, 249, 8 }, { 255, 247, 249, 8 }, { 247, 255, 252, 8 }, { 247, 255, 252, 8 }, { 247, 255, 252, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
-            };
-
-            SingleColorTableEntry g_singleColor6_3_p[256] =
-            {
-                { 0, 0, 0, 0 }, { 4, 0, 1, 4 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 8, 4, 5, 4 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 },
-                { 8, 8, 8, 0 }, { 12, 8, 9, 4 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 16, 12, 13, 4 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 },
-                { 16, 16, 16, 0 }, { 20, 16, 17, 4 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 24, 20, 21, 4 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 },
-                { 24, 24, 24, 0 }, { 28, 24, 25, 4 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 32, 28, 29, 4 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 },
-                { 32, 32, 32, 0 }, { 36, 32, 33, 4 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 40, 36, 37, 4 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 },
-                { 40, 40, 40, 0 }, { 44, 40, 41, 4 }, { 40, 44, 42, 4 }, { 65, 32, 43, 33 }, { 44, 44, 44, 0 }, { 48, 44, 45, 4 }, { 44, 48, 46, 4 }, { 69, 36, 47, 33 },
-                { 48, 48, 48, 0 }, { 52, 48, 49, 4 }, { 48, 52, 50, 4 }, { 65, 44, 51, 21 }, { 52, 52, 52, 0 }, { 56, 52, 53, 4 }, { 52, 56, 54, 4 }, { 69, 48, 55, 21 },
-                { 56, 56, 56, 0 }, { 60, 56, 57, 4 }, { 56, 60, 58, 4 }, { 65, 56, 59, 9 }, { 60, 60, 60, 0 }, { 65, 60, 61, 5 }, { 56, 65, 62, 9 }, { 60, 65, 63, 5 },
-                { 56, 69, 64, 13 }, { 65, 65, 65, 0 }, { 69, 65, 66, 4 }, { 65, 69, 67, 4 }, { 60, 73, 68, 13 }, { 69, 69, 69, 0 }, { 73, 69, 70, 4 }, { 69, 73, 71, 4 },
-                { 56, 81, 72, 25 }, { 73, 73, 73, 0 }, { 77, 73, 74, 4 }, { 73, 77, 75, 4 }, { 60, 85, 76, 25 }, { 77, 77, 77, 0 }, { 81, 77, 78, 4 }, { 77, 81, 79, 4 },
-                { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 85, 81, 82, 4 }, { 81, 85, 83, 4 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 89, 85, 86, 4 }, { 85, 89, 87, 4 },
-                { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 93, 89, 90, 4 }, { 89, 93, 91, 4 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 97, 93, 94, 4 }, { 93, 97, 95, 4 },
-                { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 101, 97, 98, 4 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 105, 101, 102, 4 }, { 101, 105, 103, 4 },
-                { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 109, 105, 106, 4 }, { 105, 109, 107, 4 }, { 130, 97, 108, 33 }, { 109, 109, 109, 0 }, { 113, 109, 110, 4 }, { 109, 113, 111, 4 },
-                { 134, 101, 112, 33 }, { 113, 113, 113, 0 }, { 117, 113, 114, 4 }, { 113, 117, 115, 4 }, { 130, 109, 116, 21 }, { 117, 117, 117, 0 }, { 121, 117, 118, 4 }, { 117, 121, 119, 4 },
-                { 134, 113, 120, 21 }, { 121, 121, 121, 0 }, { 125, 121, 122, 4 }, { 121, 125, 123, 4 }, { 130, 121, 124, 9 }, { 125, 125, 125, 0 }, { 130, 125, 126, 5 }, { 121, 130, 127, 9 },
-                { 125, 130, 128, 5 }, { 121, 134, 129, 13 }, { 130, 130, 130, 0 }, { 134, 130, 131, 4 }, { 130, 134, 132, 4 }, { 125, 138, 133, 13 }, { 134, 134, 134, 0 }, { 138, 134, 135, 4 },
-                { 134, 138, 136, 4 }, { 121, 146, 137, 25 }, { 138, 138, 138, 0 }, { 142, 138, 139, 4 }, { 138, 142, 140, 4 }, { 125, 150, 141, 25 }, { 142, 142, 142, 0 }, { 146, 142, 143, 4 },
-                { 142, 146, 144, 4 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 150, 146, 147, 4 }, { 146, 150, 148, 4 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, { 154, 150, 151, 4 },
-                { 150, 154, 152, 4 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 158, 154, 155, 4 }, { 154, 158, 156, 4 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, { 162, 158, 159, 4 },
-                { 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 166, 162, 163, 4 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 170, 166, 167, 4 },
-                { 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 174, 170, 171, 4 }, { 170, 174, 172, 4 }, { 195, 162, 173, 33 }, { 174, 174, 174, 0 }, { 178, 174, 175, 4 },
-                { 174, 178, 176, 4 }, { 199, 166, 177, 33 }, { 178, 178, 178, 0 }, { 182, 178, 179, 4 }, { 178, 182, 180, 4 }, { 195, 174, 181, 21 }, { 182, 182, 182, 0 }, { 186, 182, 183, 4 },
-                { 182, 186, 184, 4 }, { 199, 178, 185, 21 }, { 186, 186, 186, 0 }, { 190, 186, 187, 4 }, { 186, 190, 188, 4 }, { 195, 186, 189, 9 }, { 190, 190, 190, 0 }, { 195, 190, 191, 5 },
-                { 186, 195, 192, 9 }, { 190, 195, 193, 5 }, { 186, 199, 194, 13 }, { 195, 195, 195, 0 }, { 199, 195, 196, 4 }, { 195, 199, 197, 4 }, { 190, 203, 198, 13 }, { 199, 199, 199, 0 },
-                { 203, 199, 200, 4 }, { 199, 203, 201, 4 }, { 186, 211, 202, 25 }, { 203, 203, 203, 0 }, { 207, 203, 204, 4 }, { 203, 207, 205, 4 }, { 190, 215, 206, 25 }, { 207, 207, 207, 0 },
-                { 211, 207, 208, 4 }, { 207, 211, 209, 4 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 215, 211, 212, 4 }, { 211, 215, 213, 4 }, { 215, 215, 215, 0 }, { 215, 215, 215, 0 },
-                { 219, 215, 216, 4 }, { 215, 219, 217, 4 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 223, 219, 220, 4 }, { 219, 223, 221, 4 }, { 223, 223, 223, 0 }, { 223, 223, 223, 0 },
-                { 227, 223, 224, 4 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 231, 227, 228, 4 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 },
-                { 235, 231, 232, 4 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 239, 235, 236, 4 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
-                { 243, 239, 240, 4 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 247, 243, 244, 4 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
-                { 251, 247, 248, 4 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 255, 251, 252, 4 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
-            };
-
-            SingleColorTableEntry g_singleColor5_2_p[256] =
-            {
-                { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 },
-                { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 },
-                { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 },
-                { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 41, 32, 17 },
-                { 24, 41, 32, 17 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 24, 49, 36, 25 }, { 24, 49, 36, 25 }, { 33, 41, 37, 8 }, { 33, 41, 37, 8 }, { 24, 57, 40, 33 },
-                { 24, 57, 40, 33 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 49, 49, 49, 0 },
-                { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 57, 57, 57, 0 },
-                { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 },
-                { 57, 74, 65, 17 }, { 57, 74, 65, 17 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 57, 82, 69, 25 }, { 57, 82, 69, 25 }, { 66, 74, 70, 8 }, { 66, 74, 70, 8 },
-                { 57, 90, 73, 33 }, { 57, 90, 73, 33 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 },
-                { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 },
-                { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 },
-                { 90, 99, 94, 9 }, { 90, 107, 98, 17 }, { 90, 107, 98, 17 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 90, 115, 102, 25 }, { 90, 115, 102, 25 }, { 99, 107, 103, 8 },
-                { 99, 107, 103, 8 }, { 90, 123, 106, 33 }, { 90, 123, 106, 33 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 111, 8 }, { 107, 115, 111, 8 },
-                { 107, 115, 111, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 119, 8 }, { 115, 123, 119, 8 },
-                { 115, 123, 119, 8 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 127, 9 }, { 123, 132, 127, 9 },
-                { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, { 123, 140, 131, 17 }, { 123, 140, 131, 17 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 123, 148, 135, 25 }, { 123, 148, 135, 25 },
-                { 132, 140, 136, 8 }, { 132, 140, 136, 8 }, { 123, 156, 139, 33 }, { 123, 156, 139, 33 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 144, 8 },
-                { 140, 148, 144, 8 }, { 140, 148, 144, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 152, 8 },
-                { 148, 156, 152, 8 }, { 148, 156, 152, 8 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 160, 9 },
-                { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 173, 164, 17 }, { 156, 173, 164, 17 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 156, 181, 168, 25 },
-                { 156, 181, 168, 25 }, { 165, 173, 169, 8 }, { 165, 173, 169, 8 }, { 156, 189, 172, 33 }, { 156, 189, 172, 33 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 },
-                { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 },
-                { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 },
-                { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 206, 197, 17 }, { 189, 206, 197, 17 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
-                { 189, 214, 201, 25 }, { 189, 214, 201, 25 }, { 198, 206, 202, 8 }, { 198, 206, 202, 8 }, { 189, 222, 205, 33 }, { 189, 222, 205, 33 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
-                { 206, 206, 206, 0 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
-                { 214, 214, 214, 0 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
-                { 222, 222, 222, 0 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 239, 230, 17 }, { 222, 239, 230, 17 }, { 231, 231, 231, 0 },
-                { 231, 231, 231, 0 }, { 222, 247, 234, 25 }, { 222, 247, 234, 25 }, { 231, 239, 235, 8 }, { 231, 239, 235, 8 }, { 222, 255, 238, 33 }, { 222, 255, 238, 33 }, { 239, 239, 239, 0 },
-                { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
-                { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
-            };
-
-            SingleColorTableEntry g_singleColor6_2_p[256] =
-            {
-                { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 },
-                { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 },
-                { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 },
-                { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 },
-                { 32, 32, 32, 0 }, { 32, 32, 32, 0 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 },
-                { 40, 40, 40, 0 }, { 40, 40, 40, 0 }, { 40, 44, 42, 4 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 48, 46, 4 }, { 48, 48, 48, 0 },
-                { 48, 48, 48, 0 }, { 48, 48, 48, 0 }, { 48, 52, 50, 4 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 56, 54, 4 }, { 56, 56, 56, 0 },
-                { 56, 56, 56, 0 }, { 56, 56, 56, 0 }, { 56, 60, 58, 4 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 65, 62, 5 }, { 60, 65, 62, 5 },
-                { 60, 69, 64, 9 }, { 65, 65, 65, 0 }, { 60, 73, 66, 13 }, { 65, 69, 67, 4 }, { 60, 77, 68, 17 }, { 69, 69, 69, 0 }, { 60, 81, 70, 21 }, { 69, 73, 71, 4 },
-                { 60, 85, 72, 25 }, { 73, 73, 73, 0 }, { 60, 89, 74, 29 }, { 73, 77, 75, 4 }, { 60, 93, 76, 33 }, { 77, 77, 77, 0 }, { 77, 77, 77, 0 }, { 77, 81, 79, 4 },
-                { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 85, 83, 4 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 89, 87, 4 },
-                { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 93, 91, 4 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 95, 4 },
-                { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 103, 4 },
-                { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 107, 4 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 113, 111, 4 },
-                { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 117, 115, 4 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 121, 119, 4 },
-                { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 125, 123, 4 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 130, 127, 5 },
-                { 125, 130, 127, 5 }, { 125, 134, 129, 9 }, { 130, 130, 130, 0 }, { 125, 138, 131, 13 }, { 130, 134, 132, 4 }, { 125, 142, 133, 17 }, { 134, 134, 134, 0 }, { 125, 146, 135, 21 },
-                { 134, 138, 136, 4 }, { 125, 150, 137, 25 }, { 138, 138, 138, 0 }, { 125, 154, 139, 29 }, { 138, 142, 140, 4 }, { 125, 158, 141, 33 }, { 142, 142, 142, 0 }, { 142, 142, 142, 0 },
-                { 142, 146, 144, 4 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 150, 148, 4 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 },
-                { 150, 154, 152, 4 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 158, 156, 4 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 },
-                { 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 },
-                { 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 172, 4 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 },
-                { 174, 178, 176, 4 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 182, 180, 4 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 },
-                { 182, 186, 184, 4 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 190, 188, 4 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 },
-                { 190, 195, 192, 5 }, { 190, 195, 192, 5 }, { 190, 199, 194, 9 }, { 195, 195, 195, 0 }, { 190, 203, 196, 13 }, { 195, 199, 197, 4 }, { 190, 207, 198, 17 }, { 199, 199, 199, 0 },
-                { 190, 211, 200, 21 }, { 199, 203, 201, 4 }, { 190, 215, 202, 25 }, { 203, 203, 203, 0 }, { 190, 219, 204, 29 }, { 203, 207, 205, 4 }, { 190, 223, 206, 33 }, { 207, 207, 207, 0 },
-                { 207, 207, 207, 0 }, { 207, 211, 209, 4 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 215, 213, 4 }, { 215, 215, 215, 0 }, { 215, 215, 215, 0 },
-                { 215, 215, 215, 0 }, { 215, 219, 217, 4 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 223, 221, 4 }, { 223, 223, 223, 0 }, { 223, 223, 223, 0 },
-                { 223, 223, 223, 0 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 },
-                { 231, 231, 231, 0 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
-                { 239, 239, 239, 0 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
-                { 247, 247, 247, 0 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
-            };
-        }
-
-        class S3TCComputer
-        {
-        public:
-            typedef ParallelMath::Float MFloat;
-            typedef ParallelMath::SInt16 MSInt16;
-            typedef ParallelMath::UInt15 MUInt15;
-            typedef ParallelMath::UInt16 MUInt16;
-            typedef ParallelMath::SInt32 MSInt32;
-
-            static void Init(MFloat& error)
-            {
-                error = ParallelMath::MakeFloat(FLT_MAX);
-            }
-
-            static void QuantizeTo6Bits(MUInt15& v)
-            {
-                MUInt15 reduced = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(v, ParallelMath::MakeUInt15(253)) + ParallelMath::MakeUInt16(512), 10));
-                v = (reduced << 2) | ParallelMath::RightShift(reduced, 4);
-            }
-
-            static void QuantizeTo5Bits(MUInt15& v)
-            {
-                MUInt15 reduced = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(v, ParallelMath::MakeUInt15(249)) + ParallelMath::MakeUInt16(1024), 11));
-                v = (reduced << 3) | ParallelMath::RightShift(reduced, 2);
-            }
-
-            static void QuantizeTo565(MUInt15 endPoint[3])
-            {
-                QuantizeTo5Bits(endPoint[0]);
-                QuantizeTo6Bits(endPoint[1]);
-                QuantizeTo5Bits(endPoint[2]);
-            }
-
-            static MFloat ParanoidFactorForSpan(const MSInt16& span)
-            {
-                return ParallelMath::Abs(ParallelMath::ToFloat(span)) * 0.03f;
-            }
-
-            static MFloat ParanoidDiff(const MUInt15& a, const MUInt15& b, const MFloat& d)
-            {
-                MFloat absDiff = ParallelMath::Abs(ParallelMath::ToFloat(ParallelMath::LosslessCast<MSInt16>::Cast(a) - ParallelMath::LosslessCast<MSInt16>::Cast(b)));
-                absDiff = absDiff + d;
-                return absDiff * absDiff;
-            }
-
-            static void TestSingleColor(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], int range, const float* channelWeights,
-                MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, const ParallelMath::RoundTowardNearestForScope *rtn)
-            {
-                float channelWeightsSq[3];
-
-                for (int ch = 0; ch < 3; ch++)
-                    channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
-
-                MUInt15 totals[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) };
-
-                for (int px = 0; px < 16; px++)
-                {
-                    for (int ch = 0; ch < 3; ch++)
-                        totals[ch] = totals[ch] + pixels[px][ch];
-                }
-
-                MUInt15 average[3];
-                for (int ch = 0; ch < 3; ch++)
-                    average[ch] = ParallelMath::RightShift(totals[ch] + ParallelMath::MakeUInt15(8), 4);
-
-                const S3TCSingleColorTables::SingleColorTableEntry* rbTable = NULL;
-                const S3TCSingleColorTables::SingleColorTableEntry* gTable = NULL;
-                if (flags & cvtt::Flags::S3TC_Paranoid)
-                {
-                    if (range == 4)
-                    {
-                        rbTable = S3TCSingleColorTables::g_singleColor5_3_p;
-                        gTable = S3TCSingleColorTables::g_singleColor6_3_p;
-                    }
-                    else
-                    {
-                        assert(range == 3);
-                        rbTable = S3TCSingleColorTables::g_singleColor5_2_p;
-                        gTable = S3TCSingleColorTables::g_singleColor6_2_p;
-                    }
-                }
-                else
-                {
-                    if (range == 4)
-                    {
-                        rbTable = S3TCSingleColorTables::g_singleColor5_3;
-                        gTable = S3TCSingleColorTables::g_singleColor6_3;
-                    }
-                    else
-                    {
-                        assert(range == 3);
-                        rbTable = S3TCSingleColorTables::g_singleColor5_2;
-                        gTable = S3TCSingleColorTables::g_singleColor6_2;
-                    }
-                }
-
-                MUInt15 interpolated[3];
-                MUInt15 eps[2][3];
-                MSInt16 spans[3];
-                for (int i = 0; i < ParallelMath::ParallelSize; i++)
-                {
-                    for (int ch = 0; ch < 3; ch++)
-                    {
-                        uint16_t avg = ParallelMath::Extract(average[ch], i);
-                        const S3TCSingleColorTables::SingleColorTableEntry& tableEntry = ((ch == 1) ? gTable[avg] : rbTable[avg]);
-                        ParallelMath::PutUInt15(eps[0][ch], i, tableEntry.m_min);
-                        ParallelMath::PutUInt15(eps[1][ch], i, tableEntry.m_max);
-                        ParallelMath::PutUInt15(interpolated[ch], i, tableEntry.m_actualColor);
-                        ParallelMath::PutSInt16(spans[ch], i, tableEntry.m_span);
-                    }
-                }
-
-                MFloat error = ParallelMath::MakeFloatZero();
-                if (flags & cvtt::Flags::S3TC_Paranoid)
-                {
-                    MFloat spanParanoidFactors[3];
-                    for (int ch = 0; ch < 3; ch++)
-                        spanParanoidFactors[ch] = ParanoidFactorForSpan(spans[ch]);
-
-                    for (int px = 0; px < 16; px++)
-                    {
-                        for (int ch = 0; ch < 3; ch++)
-                            error = error + ParanoidDiff(interpolated[ch], pixels[px][ch], spanParanoidFactors[ch]) * channelWeightsSq[ch];
-                    }
-                }
-                else
-                {
-                    for (int px = 0; px < 16; px++)
-                    {
-                        for (int ch = 0; ch < 3; ch++)
-                            error = error + ParallelMath::ToFloat(ParallelMath::SqDiffUInt8(interpolated[ch], pixels[px][ch])) * channelWeightsSq[ch];
-                    }
-                }
-
-                ParallelMath::FloatCompFlag better = ParallelMath::Less(error, bestError);
-                ParallelMath::Int16CompFlag better16 = ParallelMath::FloatFlagToInt16(better);
-
-                if (ParallelMath::AnySet(better16))
-                {
-                    bestError = ParallelMath::Min(bestError, error);
-                    for (int epi = 0; epi < 2; epi++)
-                        for (int ch = 0; ch < 3; ch++)
-                            ParallelMath::ConditionalSet(bestEndpoints[epi][ch], better16, eps[epi][ch]);
-
-                    MUInt15 vindexes = ParallelMath::MakeUInt15(1);
-                    for (int px = 0; px < 16; px++)
-                        ParallelMath::ConditionalSet(bestIndexes[px], better16, vindexes);
-
-                    ParallelMath::ConditionalSet(bestRange, better16, ParallelMath::MakeUInt15(range));
-                }
-            }
-
-            static void TestEndpoints(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], const MUInt15 unquantizedEndPoints[2][3], int range, const float* channelWeights,
-                MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, EndpointRefiner<3> *refiner, const ParallelMath::RoundTowardNearestForScope *rtn)
-            {
-                float channelWeightsSq[3];
-
-                for (int ch = 0; ch < 3; ch++)
-                    channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
-
-                MUInt15 endPoints[2][3];
-
-                for (int ep = 0; ep < 2; ep++)
-                    for (int ch = 0; ch < 3; ch++)
-                        endPoints[ep][ch] = unquantizedEndPoints[ep][ch];
-
-                QuantizeTo565(endPoints[0]);
-                QuantizeTo565(endPoints[1]);
-
-                IndexSelector<3> selector;
-                selector.Init<false>(channelWeights, endPoints, range);
-
-                MUInt15 indexes[16];
-
-                MFloat paranoidFactors[3];
-                for (int ch = 0; ch < 3; ch++)
-                    paranoidFactors[ch] = ParanoidFactorForSpan(ParallelMath::LosslessCast<MSInt16>::Cast(endPoints[0][ch]) - ParallelMath::LosslessCast<MSInt16>::Cast(endPoints[1][ch]));
-
-                MFloat error = ParallelMath::MakeFloatZero();
-                AggregatedError<3> aggError;
-                for (int px = 0; px < 16; px++)
-                {
-                    MUInt15 index = selector.SelectIndexLDR(floatPixels[px], rtn);
-                    indexes[px] = index;
-
-                    if (refiner)
-                        refiner->ContributeUnweightedPW(preWeightedPixels[px], index);
-
-                    MUInt15 reconstructed[3];
-                    selector.ReconstructLDRPrecise(index, reconstructed);
-
-                    if (flags & Flags::S3TC_Paranoid)
-                    {
-                        for (int ch = 0; ch < 3; ch++)
-                            error = error + ParanoidDiff(reconstructed[ch], pixels[px][ch], paranoidFactors[ch]) * channelWeightsSq[ch];
-                    }
-                    else
-                        BCCommon::ComputeErrorLDR<3>(flags, reconstructed, pixels[px], aggError);
-                }
-
-                if (!(flags & Flags::S3TC_Paranoid))
-                    error = aggError.Finalize(flags, channelWeightsSq);
-
-                ParallelMath::FloatCompFlag better = ParallelMath::Less(error, bestError);
-
-                if (ParallelMath::AnySet(better))
-                {
-                    ParallelMath::Int16CompFlag betterInt16 = ParallelMath::FloatFlagToInt16(better);
-
-                    ParallelMath::ConditionalSet(bestError, better, error);
-
-                    for (int ep = 0; ep < 2; ep++)
-                        for (int ch = 0; ch < 3; ch++)
-                            ParallelMath::ConditionalSet(bestEndpoints[ep][ch], betterInt16, endPoints[ep][ch]);
-
-                    for (int px = 0; px < 16; px++)
-                        ParallelMath::ConditionalSet(bestIndexes[px], betterInt16, indexes[px]);
-
-                    ParallelMath::ConditionalSet(bestRange, betterInt16, ParallelMath::MakeUInt15(static_cast<uint16_t>(range)));
-                }
-            }
-
-            static void TestCounts(uint32_t flags, const int *counts, int nCounts, const MUInt15 &numElements, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], bool alphaTest,
-                const MFloat floatSortedInputs[16][4], const MFloat preWeightedFloatSortedInputs[16][4], const float *channelWeights, MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange,
-                const ParallelMath::RoundTowardNearestForScope* rtn)
-            {
-                UNREFERENCED_PARAMETER(alphaTest);
-                UNREFERENCED_PARAMETER(flags);
-
-                EndpointRefiner<3> refiner;
-
-                refiner.Init(nCounts, channelWeights);
-
-                bool escape = false;
-                int e = 0;
-                for (int i = 0; i < nCounts; i++)
-                {
-                    for (int n = 0; n < counts[i]; n++)
-                    {
-                        ParallelMath::Int16CompFlag valid = ParallelMath::Less(ParallelMath::MakeUInt15(static_cast<uint16_t>(n)), numElements);
-                        if (!ParallelMath::AnySet(valid))
-                        {
-                            escape = true;
-                            break;
-                        }
-
-                        if (ParallelMath::AllSet(valid))
-                            refiner.ContributeUnweightedPW(preWeightedFloatSortedInputs[e++], ParallelMath::MakeUInt15(static_cast<uint16_t>(i)));
-                        else
-                        {
-                            MFloat weight = ParallelMath::Select(ParallelMath::Int16FlagToFloat(valid), ParallelMath::MakeFloat(1.0f), ParallelMath::MakeFloat(0.0f));
-                            refiner.ContributePW(preWeightedFloatSortedInputs[e++], ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), weight);
-                        }
-                    }
-
-                    if (escape)
-                        break;
-                }
-
-                MUInt15 endPoints[2][3];
-                refiner.GetRefinedEndpointsLDR(endPoints, rtn);
-
-                TestEndpoints(flags, pixels, floatPixels, preWeightedPixels, endPoints, nCounts, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, NULL, rtn);
-            }
-
-            static void PackExplicitAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride)
-            {
-                UNREFERENCED_PARAMETER(flags);
-                ParallelMath::RoundTowardNearestForScope rtn;
-
-                float weights[1] = { 1.0f };
-
-                MUInt15 pixels[16];
-                MFloat floatPixels[16];
-
-                for (int px = 0; px < 16; px++)
-                {
-                    ParallelMath::ConvertLDRInputs(inputs, px, inputChannel, pixels[px]);
-                    floatPixels[px] = ParallelMath::ToFloat(pixels[px]);
-                }
-
-                MUInt15 ep[2][1] = { { ParallelMath::MakeUInt15(0) },{ ParallelMath::MakeUInt15(255) } };
-
-                IndexSelector<1> selector;
-                selector.Init<false>(weights, ep, 16);
-
-                MUInt15 indexes[16];
-
-                for (int px = 0; px < 16; px++)
-                    indexes[px] = selector.SelectIndexLDR(&floatPixels[px], &rtn);
-
-                for (int block = 0; block < ParallelMath::ParallelSize; block++)
-                {
-                    for (int px = 0; px < 16; px += 8)
-                    {
-                        int index0 = ParallelMath::Extract(indexes[px], block);
-                        int index1 = ParallelMath::Extract(indexes[px], block);
-
-                        packedBlocks[px / 2] = static_cast<uint8_t>(index0 | (index1 << 4));
-                    }
-
-                    packedBlocks += packedBlockStride;
-                }
-            }
-
-            static void PackInterpolatedAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride, bool isSigned, int maxTweakRounds, int numRefineRounds)
-            {
-                if (maxTweakRounds < 1)
-                    maxTweakRounds = 1;
-
-                if (numRefineRounds < 1)
-                    numRefineRounds = 1;
-
-                ParallelMath::RoundTowardNearestForScope rtn;
-
-                float oneWeight[1] = { 1.0f };
-
-                MUInt15 pixels[16];
-                MFloat floatPixels[16];
-
-                MUInt15 highTerminal = isSigned ? ParallelMath::MakeUInt15(254) : ParallelMath::MakeUInt15(255);
-                MUInt15 highTerminalMinusOne = highTerminal - ParallelMath::MakeUInt15(1);
-
-                for (int px = 0; px < 16; px++)
-                {
-                    ParallelMath::ConvertLDRInputs(inputs, px, inputChannel, pixels[px]);
-
-                    if (isSigned)
-                        pixels[px] = ParallelMath::Min(pixels[px], highTerminal);
-
-                    floatPixels[px] = ParallelMath::ToFloat(pixels[px]);
-                }
-
-                MUInt15 sortedPixels[16];
-                for (int px = 0; px < 16; px++)
-                    sortedPixels[px] = pixels[px];
-
-                for (int sortEnd = 15; sortEnd > 0; sortEnd--)
-                {
-                    for (int sortOffset = 0; sortOffset < sortEnd; sortOffset++)
-                    {
-                        MUInt15 a = sortedPixels[sortOffset];
-                        MUInt15 b = sortedPixels[sortOffset + 1];
-
-                        sortedPixels[sortOffset] = ParallelMath::Min(a, b);
-                        sortedPixels[sortOffset + 1] = ParallelMath::Max(a, b);
-                    }
-                }
-
-                MUInt15 zero = ParallelMath::MakeUInt15(0);
-                MUInt15 one = ParallelMath::MakeUInt15(1);
-
-                MUInt15 bestIsFullRange = zero;
-                MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
-                MUInt15 bestEP[2] = { zero, zero };
-                MUInt15 bestIndexes[16] = {
-                    zero, zero, zero, zero,
-                    zero, zero, zero, zero,
-                    zero, zero, zero, zero,
-                    zero, zero, zero, zero
-                };
-
-                // Full-precision
-                {
-                    MUInt15 minEP = sortedPixels[0];
-                    MUInt15 maxEP = sortedPixels[15];
-
-                    MFloat base[1] = { ParallelMath::ToFloat(minEP) };
-                    MFloat offset[1] = { ParallelMath::ToFloat(maxEP - minEP) };
-
-                    UnfinishedEndpoints<1> ufep = UnfinishedEndpoints<1>(base, offset);
-
-                    int numTweakRounds = BCCommon::TweakRoundsForRange(8);
-                    if (numTweakRounds > maxTweakRounds)
-                        numTweakRounds = maxTweakRounds;
-
-                    for (int tweak = 0; tweak < numTweakRounds; tweak++)
-                    {
-                        MUInt15 ep[2][1];
-
-                        ufep.FinishLDR(tweak, 8, ep[0], ep[1]);
-
-                        for (int refinePass = 0; refinePass < numRefineRounds; refinePass++)
-                        {
-                            EndpointRefiner<1> refiner;
-                            refiner.Init(8, oneWeight);
-
-                            if (isSigned)
-                                for (int epi = 0; epi < 2; epi++)
-                                    ep[epi][0] = ParallelMath::Min(ep[epi][0], highTerminal);
-
-                            IndexSelector<1> indexSelector;
-                            indexSelector.Init<false>(oneWeight, ep, 8);
-
-                            MUInt15 indexes[16];
-
-                            AggregatedError<1> aggError;
-                            for (int px = 0; px < 16; px++)
-                            {
-                                MUInt15 index = indexSelector.SelectIndexLDR(&floatPixels[px], &rtn);
-
-                                MUInt15 reconstructedPixel;
-
-                                indexSelector.ReconstructLDRPrecise(index, &reconstructedPixel);
-                                BCCommon::ComputeErrorLDR<1>(flags, &reconstructedPixel, &pixels[px], aggError);
-
-                                if (refinePass != numRefineRounds - 1)
-                                    refiner.ContributeUnweightedPW(&floatPixels[px], index);
-
-                                indexes[px] = index;
-                            }
-                            MFloat error = aggError.Finalize(flags | Flags::Uniform, oneWeight);
-
-                            ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
-                            ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
-
-                            if (ParallelMath::AnySet(errorBetter16))
-                            {
-                                bestError = ParallelMath::Min(error, bestError);
-                                ParallelMath::ConditionalSet(bestIsFullRange, errorBetter16, one);
-                                for (int px = 0; px < 16; px++)
-                                    ParallelMath::ConditionalSet(bestIndexes[px], errorBetter16, indexes[px]);
-
-                                for (int epi = 0; epi < 2; epi++)
-                                    ParallelMath::ConditionalSet(bestEP[epi], errorBetter16, ep[epi][0]);
-                            }
-
-                            if (refinePass != numRefineRounds - 1)
-                                refiner.GetRefinedEndpointsLDR(ep, &rtn);
-                        }
-                    }
-                }
-
-                // Reduced precision with special endpoints
-                {
-                    MUInt15 bestHeuristicMin = sortedPixels[0];
-                    MUInt15 bestHeuristicMax = sortedPixels[15];
-
-                    ParallelMath::Int16CompFlag canTryClipping;
-
-                    // In reduced precision, we want try putting endpoints at the reserved indexes at the ends.
-                    // The heuristic we use is to assign indexes to the end as long as they aren't off by more than half of the index range.
-                    // This will usually not find anything, but it's cheap to check.
-
-                    {
-                        MUInt15 largestPossibleRange = bestHeuristicMax - bestHeuristicMin; // Max: 255
-                        MUInt15 lowestPossibleClearance = ParallelMath::Min(bestHeuristicMin, static_cast<MUInt15>(highTerminal - bestHeuristicMax));
-
-                        MUInt15 lowestPossibleClearanceTimes10 = (lowestPossibleClearance << 2) + (lowestPossibleClearance << 4);
-                        canTryClipping = ParallelMath::LessOrEqual(lowestPossibleClearanceTimes10, largestPossibleRange);
-                    }
-
-                    if (ParallelMath::AnySet(canTryClipping))
-                    {
-                        MUInt15 lowClearances[16];
-                        MUInt15 highClearances[16];
-                        MUInt15 bestSkipCount = ParallelMath::MakeUInt15(0);
-
-                        lowClearances[0] = highClearances[0] = ParallelMath::MakeUInt15(0);
-
-                        for (int px = 1; px < 16; px++)
-                        {
-                            lowClearances[px] = sortedPixels[px - 1];
-                            highClearances[px] = highTerminal - sortedPixels[16 - px];
-                        }
-
-                        for (uint16_t firstIndex = 0; firstIndex < 16; firstIndex++)
-                        {
-                            uint16_t numSkippedLow = firstIndex;
-
-                            MUInt15 lowClearance = lowClearances[firstIndex];
-
-                            for (uint16_t lastIndex = firstIndex; lastIndex < 16; lastIndex++)
-                            {
-                                uint16_t numSkippedHigh = 15 - lastIndex;
-                                uint16_t numSkipped = numSkippedLow + numSkippedHigh;
-
-                                MUInt15 numSkippedV = ParallelMath::MakeUInt15(numSkipped);
-
-                                ParallelMath::Int16CompFlag areMoreSkipped = ParallelMath::Less(bestSkipCount, numSkippedV);
-
-                                if (!ParallelMath::AnySet(areMoreSkipped))
-                                    continue;
-
-                                MUInt15 clearance = ParallelMath::Max(highClearances[numSkippedHigh], lowClearance);
-                                MUInt15 clearanceTimes10 = (clearance << 2) + (clearance << 4);
-
-                                MUInt15 range = sortedPixels[lastIndex] - sortedPixels[firstIndex];
-
-                                ParallelMath::Int16CompFlag isBetter = (areMoreSkipped & ParallelMath::LessOrEqual(clearanceTimes10, range));
-                                ParallelMath::ConditionalSet(bestHeuristicMin, isBetter, sortedPixels[firstIndex]);
-                                ParallelMath::ConditionalSet(bestHeuristicMax, isBetter, sortedPixels[lastIndex]);
-                            }
-                        }
-                    }
-
-                    MUInt15 bestSimpleMin = one;
-                    MUInt15 bestSimpleMax = highTerminalMinusOne;
-
-                    for (int px = 0; px < 16; px++)
-                    {
-                        ParallelMath::ConditionalSet(bestSimpleMin, ParallelMath::Less(zero, sortedPixels[15 - px]), sortedPixels[15 - px]);
-                        ParallelMath::ConditionalSet(bestSimpleMax, ParallelMath::Less(sortedPixels[px], highTerminal), sortedPixels[px]);
-                    }
-
-                    MUInt15 minEPs[2] = { bestSimpleMin, bestHeuristicMin };
-                    MUInt15 maxEPs[2] = { bestSimpleMax, bestHeuristicMax };
-
-                    int minEPRange = 2;
-                    if (ParallelMath::AllSet(ParallelMath::Equal(minEPs[0], minEPs[1])))
-                        minEPRange = 1;
-
-                    int maxEPRange = 2;
-                    if (ParallelMath::AllSet(ParallelMath::Equal(maxEPs[0], maxEPs[1])))
-                        maxEPRange = 1;
-
-                    for (int minEPIndex = 0; minEPIndex < minEPRange; minEPIndex++)
-                    {
-                        for (int maxEPIndex = 0; maxEPIndex < maxEPRange; maxEPIndex++)
-                        {
-                            MFloat base[1] = { ParallelMath::ToFloat(minEPs[minEPIndex]) };
-                            MFloat offset[1] = { ParallelMath::ToFloat(maxEPs[maxEPIndex] - minEPs[minEPIndex]) };
-
-                            UnfinishedEndpoints<1> ufep = UnfinishedEndpoints<1>(base, offset);
-
-                            int numTweakRounds = BCCommon::TweakRoundsForRange(6);
-                            if (numTweakRounds > maxTweakRounds)
-                                numTweakRounds = maxTweakRounds;
-
-                            for (int tweak = 0; tweak < numTweakRounds; tweak++)
-                            {
-                                MUInt15 ep[2][1];
-
-                                ufep.FinishLDR(tweak, 8, ep[0], ep[1]);
-
-                                for (int refinePass = 0; refinePass < numRefineRounds; refinePass++)
-                                {
-                                    EndpointRefiner<1> refiner;
-                                    refiner.Init(6, oneWeight);
-
-                                    if (isSigned)
-                                        for (int epi = 0; epi < 2; epi++)
-                                            ep[epi][0] = ParallelMath::Min(ep[epi][0], highTerminal);
-
-                                    IndexSelector<1> indexSelector;
-                                    indexSelector.Init<false>(oneWeight, ep, 6);
-
-                                    MUInt15 indexes[16];
-                                    MFloat error = ParallelMath::MakeFloatZero();
-
-                                    for (int px = 0; px < 16; px++)
-                                    {
-                                        MUInt15 selectedIndex = indexSelector.SelectIndexLDR(&floatPixels[px], &rtn);
-
-                                        MUInt15 reconstructedPixel;
-
-                                        indexSelector.ReconstructLDRPrecise(selectedIndex, &reconstructedPixel);
-
-                                        MFloat zeroError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &zero, &pixels[px], 1, oneWeight);
-                                        MFloat highTerminalError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &highTerminal, &pixels[px], 1, oneWeight);
-                                        MFloat selectedIndexError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &reconstructedPixel, &pixels[px], 1, oneWeight);
-
-                                        MFloat bestPixelError = zeroError;
-                                        MUInt15 index = ParallelMath::MakeUInt15(6);
-
-                                        ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(ParallelMath::Less(highTerminalError, bestPixelError)), ParallelMath::MakeUInt15(7));
-                                        bestPixelError = ParallelMath::Min(bestPixelError, highTerminalError);
-
-                                        ParallelMath::FloatCompFlag selectedIndexBetter = ParallelMath::Less(selectedIndexError, bestPixelError);
-
-                                        if (ParallelMath::AllSet(selectedIndexBetter))
-                                        {
-                                            if (refinePass != numRefineRounds - 1)
-                                                refiner.ContributeUnweightedPW(&floatPixels[px], selectedIndex);
-                                        }
-                                        else
-                                        {
-                                            MFloat refineWeight = ParallelMath::Select(selectedIndexBetter, ParallelMath::MakeFloat(1.0f), ParallelMath::MakeFloatZero());
-
-                                            if (refinePass != numRefineRounds - 1)
-                                                refiner.ContributePW(&floatPixels[px], selectedIndex, refineWeight);
-                                        }
-
-                                        ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(selectedIndexBetter), selectedIndex);
-                                        bestPixelError = ParallelMath::Min(bestPixelError, selectedIndexError);
-
-                                        error = error + bestPixelError;
-
-                                        indexes[px] = index;
-                                    }
-
-                                    ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
-                                    ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
-
-                                    if (ParallelMath::AnySet(errorBetter16))
-                                    {
-                                        bestError = ParallelMath::Min(error, bestError);
-                                        ParallelMath::ConditionalSet(bestIsFullRange, errorBetter16, zero);
-                                        for (int px = 0; px < 16; px++)
-                                            ParallelMath::ConditionalSet(bestIndexes[px], errorBetter16, indexes[px]);
-
-                                        for (int epi = 0; epi < 2; epi++)
-                                            ParallelMath::ConditionalSet(bestEP[epi], errorBetter16, ep[epi][0]);
-                                    }
-
-                                    if (refinePass != numRefineRounds - 1)
-                                        refiner.GetRefinedEndpointsLDR(ep, &rtn);
-                                }
-                            }
-                        }
-                    }
-                }
-
-                for (int block = 0; block < ParallelMath::ParallelSize; block++)
-                {
-                    int ep0 = ParallelMath::Extract(bestEP[0], block);
-                    int ep1 = ParallelMath::Extract(bestEP[1], block);
-                    int isFullRange = ParallelMath::Extract(bestIsFullRange, block);
-
-                    if (isSigned)
-                    {
-                        ep0 -= 127;
-                        ep1 -= 127;
-
-                        assert(ep0 >= -127 && ep0 <= 127);
-                        assert(ep1 >= -127 && ep1 <= 127);
-                    }
-
-
-                    bool swapEndpoints = (isFullRange != 0) != (ep0 > ep1);
-
-                    if (swapEndpoints)
-                        std::swap(ep0, ep1);
-
-                    uint16_t dumpBits = 0;
-                    int dumpBitsOffset = 0;
-                    int dumpByteOffset = 2;
-                    packedBlocks[0] = static_cast<uint8_t>(ep0 & 0xff);
-                    packedBlocks[1] = static_cast<uint8_t>(ep1 & 0xff);
-
-                    int maxValue = (isFullRange != 0) ? 7 : 5;
-
-                    for (int px = 0; px < 16; px++)
-                    {
-                        int index = ParallelMath::Extract(bestIndexes[px], block);
-
-                        if (swapEndpoints && index <= maxValue)
-                            index = maxValue - index;
-
-                        if (index != 0)
-                        {
-                            if (index == maxValue)
-                                index = 1;
-                            else if (index < maxValue)
-                                index++;
-                        }
-
-                        assert(index >= 0 && index < 8);
-
-                        dumpBits |= static_cast<uint16_t>(index << dumpBitsOffset);
-                        dumpBitsOffset += 3;
-
-                        if (dumpBitsOffset >= 8)
-                        {
-                            assert(dumpByteOffset < 8);
-                            packedBlocks[dumpByteOffset] = static_cast<uint8_t>(dumpBits & 0xff);
-                            dumpBits >>= 8;
-                            dumpBitsOffset -= 8;
-                            dumpByteOffset++;
-                        }
-                    }
-
-                    assert(dumpBitsOffset == 0);
-                    assert(dumpByteOffset == 8);
-
-                    packedBlocks += packedBlockStride;
-                }
-            }
-
-            static void PackRGB(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, size_t packedBlockStride, const float channelWeights[4], bool alphaTest, float alphaThreshold, bool exhaustive, int maxTweakRounds, int numRefineRounds)
-            {
-                ParallelMath::RoundTowardNearestForScope rtn;
-
-                if (numRefineRounds < 1)
-                    numRefineRounds = 1;
-
-                if (maxTweakRounds < 1)
-                    maxTweakRounds = 1;
-
-                EndpointSelector<3, 8> endpointSelector;
-
-                MUInt15 pixels[16][4];
-                MFloat floatPixels[16][4];
-
-                MFloat preWeightedPixels[16][4];
-
-                for (int px = 0; px < 16; px++)
-                {
-                    for (int ch = 0; ch < 4; ch++)
-                        ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]);
-                }
-
-                for (int px = 0; px < 16; px++)
-                {
-                    for (int ch = 0; ch < 4; ch++)
-                        floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]);
-                }
-
-                if (alphaTest)
-                {
-                    MUInt15 threshold = ParallelMath::MakeUInt15(static_cast<uint16_t>(floor(alphaThreshold * 255.0f + 0.5f)));
-
-                    for (int px = 0; px < 16; px++)
-                    {
-                        ParallelMath::Int16CompFlag belowThreshold = ParallelMath::Less(pixels[px][3], threshold);
-                        pixels[px][3] = ParallelMath::Select(belowThreshold, ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(255));
-                    }
-                }
-
-                BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights);
-
-                MUInt15 minAlpha = ParallelMath::MakeUInt15(255);
-
-                for (int px = 0; px < 16; px++)
-                    minAlpha = ParallelMath::Min(minAlpha, pixels[px][3]);
-
-                MFloat pixelWeights[16];
-                for (int px = 0; px < 16; px++)
-                {
-                    pixelWeights[px] = ParallelMath::MakeFloat(1.0f);
-                    if (alphaTest)
-                    {
-                        ParallelMath::Int16CompFlag isTransparent = ParallelMath::Less(pixels[px][3], ParallelMath::MakeUInt15(255));
-
-                        ParallelMath::ConditionalSet(pixelWeights[px], ParallelMath::Int16FlagToFloat(isTransparent), ParallelMath::MakeFloatZero());
-                    }
-                }
-
-                for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)
-                {
-                    for (int px = 0; px < 16; px++)
-                        endpointSelector.ContributePass(preWeightedPixels[px], pass, pixelWeights[px]);
-
-                    endpointSelector.FinishPass(pass);
-                }
-
-                UnfinishedEndpoints<3> ufep = endpointSelector.GetEndpoints(channelWeights);
-
-                MUInt15 bestEndpoints[2][3];
-                MUInt15 bestIndexes[16];
-                MUInt15 bestRange = ParallelMath::MakeUInt15(0);
-                MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
-
-                for (int px = 0; px < 16; px++)
-                    bestIndexes[px] = ParallelMath::MakeUInt15(0);
-
-                for (int ep = 0; ep < 2; ep++)
-                    for (int ch = 0; ch < 3; ch++)
-                        bestEndpoints[ep][ch] = ParallelMath::MakeUInt15(0);
-
-                if (exhaustive)
-                {
-                    MSInt16 sortBins[16];
-
-                    {
-                        // Compute an 11-bit index, change it to signed, stuff it in the high bits of the sort bins,
-                        // and pack the original indexes into the low bits.
-
-                        MUInt15 sortEP[2][3];
-                        ufep.FinishLDR(0, 11, sortEP[0], sortEP[1]);
-
-                        IndexSelector<3> sortSelector;
-                        sortSelector.Init<false>(channelWeights, sortEP, 1 << 11);
-
-                        for (int16_t px = 0; px < 16; px++)
-                        {
-                            MSInt16 sortBin = ParallelMath::LosslessCast<MSInt16>::Cast(sortSelector.SelectIndexLDR(floatPixels[px], &rtn) << 4);
-
-                            if (alphaTest)
-                            {
-                                ParallelMath::Int16CompFlag isTransparent = ParallelMath::Less(pixels[px][3], ParallelMath::MakeUInt15(255));
-
-                                ParallelMath::ConditionalSet(sortBin, isTransparent, ParallelMath::MakeSInt16(-16)); // 0xfff0
-                            }
-
-                            sortBin = sortBin + ParallelMath::MakeSInt16(px);
-
-                            sortBins[px] = sortBin;
-                        }
-                    }
-
-                    // Sort bins
-                    for (int sortEnd = 1; sortEnd < 16; sortEnd++)
-                    {
-                        for (int sortLoc = sortEnd; sortLoc > 0; sortLoc--)
-                        {
-                            MSInt16 a = sortBins[sortLoc];
-                            MSInt16 b = sortBins[sortLoc - 1];
-
-                            sortBins[sortLoc] = ParallelMath::Max(a, b);
-                            sortBins[sortLoc - 1] = ParallelMath::Min(a, b);
-                        }
-                    }
-
-                    MUInt15 firstElement = ParallelMath::MakeUInt15(0);
-                    for (uint16_t e = 0; e < 16; e++)
-                    {
-                        ParallelMath::Int16CompFlag isInvalid = ParallelMath::Less(sortBins[e], ParallelMath::MakeSInt16(0));
-                        ParallelMath::ConditionalSet(firstElement, isInvalid, ParallelMath::MakeUInt15(e + 1));
-                        if (!ParallelMath::AnySet(isInvalid))
-                            break;
-                    }
-
-                    MUInt15 numElements = ParallelMath::MakeUInt15(16) - firstElement;
-
-                    MUInt15 sortedInputs[16][4];
-                    MFloat floatSortedInputs[16][4];
-                    MFloat pwFloatSortedInputs[16][4];
-
-                    for (int e = 0; e < 16; e++)
-                    {
-                        for (int ch = 0; ch < 4; ch++)
-                            sortedInputs[e][ch] = ParallelMath::MakeUInt15(0);
-                    }
-
-                    for (int block = 0; block < ParallelMath::ParallelSize; block++)
-                    {
-                        for (int e = ParallelMath::Extract(firstElement, block); e < 16; e++)
-                        {
-                            ParallelMath::ScalarUInt16 sortBin = ParallelMath::Extract(sortBins[e], block);
-                            int originalIndex = (sortBin & 15);
-
-                            for (int ch = 0; ch < 4; ch++)
-                                ParallelMath::PutUInt15(sortedInputs[15 - e][ch], block, ParallelMath::Extract(pixels[originalIndex][ch], block));
-                        }
-                    }
-
-                    for (int e = 0; e < 16; e++)
-                    {
-                        for (int ch = 0; ch < 4; ch++)
-                        {
-                            MFloat f = ParallelMath::ToFloat(sortedInputs[e][ch]);
-                            floatSortedInputs[e][ch] = f;
-                            pwFloatSortedInputs[e][ch] = f * channelWeights[ch];
-                        }
-                    }
-
-                    for (int n0 = 0; n0 <= 15; n0++)
-                    {
-                        int remainingFor1 = 16 - n0;
-                        if (remainingFor1 == 16)
-                            remainingFor1 = 15;
-
-                        for (int n1 = 0; n1 <= remainingFor1; n1++)
-                        {
-                            int remainingFor2 = 16 - n1 - n0;
-                            if (remainingFor2 == 16)
-                                remainingFor2 = 15;
-
-                            for (int n2 = 0; n2 <= remainingFor2; n2++)
-                            {
-                                int n3 = 16 - n2 - n1 - n0;
-
-                                if (n3 == 16)
-                                    continue;
-
-                                int counts[4] = { n0, n1, n2, n3 };
-
-                                TestCounts(flags, counts, 4, numElements, pixels, floatPixels, preWeightedPixels, alphaTest, floatSortedInputs, pwFloatSortedInputs, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn);
-                            }
-                        }
-                    }
-
-                    TestSingleColor(flags, pixels, floatPixels, 4, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn);
-
-                    if (alphaTest)
-                    {
-                        for (int n0 = 0; n0 <= 15; n0++)
-                        {
-                            int remainingFor1 = 16 - n0;
-                            if (remainingFor1 == 16)
-                                remainingFor1 = 15;
-
-                            for (int n1 = 0; n1 <= remainingFor1; n1++)
-                            {
-                                int n2 = 16 - n1 - n0;
-
-                                if (n2 == 16)
-                                    continue;
-
-                                int counts[3] = { n0, n1, n2 };
-
-                                TestCounts(flags, counts, 3, numElements, pixels, floatPixels, preWeightedPixels, alphaTest, floatSortedInputs, pwFloatSortedInputs, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn);
-                            }
-                        }
-
-                        TestSingleColor(flags, pixels, floatPixels, 3, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn);
-                    }
-                }
-                else
-                {
-                    int minRange = alphaTest ? 3 : 4;
-
-                    for (int range = minRange; range <= 4; range++)
-                    {
-                        int tweakRounds = BCCommon::TweakRoundsForRange(range);
-                        if (tweakRounds > maxTweakRounds)
-                            tweakRounds = maxTweakRounds;
-
-                        for (int tweak = 0; tweak < tweakRounds; tweak++)
-                        {
-                            MUInt15 endPoints[2][3];
-
-                            ufep.FinishLDR(tweak, range, endPoints[0], endPoints[1]);
-
-                            for (int refine = 0; refine < numRefineRounds; refine++)
-                            {
-                                EndpointRefiner<3> refiner;
-                                refiner.Init(range, channelWeights);
-
-                                TestEndpoints(flags, pixels, floatPixels, preWeightedPixels, endPoints, range, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &refiner, &rtn);
-
-                                if (refine != numRefineRounds - 1)
-                                    refiner.GetRefinedEndpointsLDR(endPoints, &rtn);
-                            }
-                        }
-                    }
-                }
-
-                for (int block = 0; block < ParallelMath::ParallelSize; block++)
-                {
-                    ParallelMath::ScalarUInt16 range = ParallelMath::Extract(bestRange, block);
-                    assert(range == 3 || range == 4);
-
-                    ParallelMath::ScalarUInt16 compressedEP[2];
-                    for (int ep = 0; ep < 2; ep++)
-                    {
-                        ParallelMath::ScalarUInt16 endPoint[3];
-                        for (int ch = 0; ch < 3; ch++)
-                            endPoint[ch] = ParallelMath::Extract(bestEndpoints[ep][ch], block);
-
-                        int compressed = (endPoint[0] & 0xf8) << 8;
-                        compressed |= (endPoint[1] & 0xfc) << 3;
-                        compressed |= (endPoint[2] & 0xf8) >> 3;
-
-                        compressedEP[ep] = static_cast<ParallelMath::ScalarUInt16>(compressed);
-                    }
-
-                    int indexOrder[4];
-
-                    if (range == 4)
-                    {
-                        if (compressedEP[0] == compressedEP[1])
-                        {
-                            indexOrder[0] = 0;
-                            indexOrder[1] = 0;
-                            indexOrder[2] = 0;
-                            indexOrder[3] = 0;
-                        }
-                        else if (compressedEP[0] < compressedEP[1])
-                        {
-                            std::swap(compressedEP[0], compressedEP[1]);
-                            indexOrder[0] = 1;
-                            indexOrder[1] = 3;
-                            indexOrder[2] = 2;
-                            indexOrder[3] = 0;
-                        }
-                        else
-                        {
-                            indexOrder[0] = 0;
-                            indexOrder[1] = 2;
-                            indexOrder[2] = 3;
-                            indexOrder[3] = 1;
-                        }
-                    }
-                    else
-                    {
-                        assert(range == 3);
-
-                        if (compressedEP[0] > compressedEP[1])
-                        {
-                            std::swap(compressedEP[0], compressedEP[1]);
-                            indexOrder[0] = 1;
-                            indexOrder[1] = 2;
-                            indexOrder[2] = 0;
-                        }
-                        else
-                        {
-                            indexOrder[0] = 0;
-                            indexOrder[1] = 2;
-                            indexOrder[2] = 1;
-                        }
-                        indexOrder[3] = 3;
-                    }
-
-                    packedBlocks[0] = static_cast<uint8_t>(compressedEP[0] & 0xff);
-                    packedBlocks[1] = static_cast<uint8_t>((compressedEP[0] >> 8) & 0xff);
-                    packedBlocks[2] = static_cast<uint8_t>(compressedEP[1] & 0xff);
-                    packedBlocks[3] = static_cast<uint8_t>((compressedEP[1] >> 8) & 0xff);
-
-                    for (int i = 0; i < 16; i += 4)
-                    {
-                        int packedIndexes = 0;
-                        for (int subi = 0; subi < 4; subi++)
-                        {
-                            ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[i + subi], block);
-                            packedIndexes |= (indexOrder[index] << (subi * 2));
-                        }
-
-                        packedBlocks[4 + i / 4] = static_cast<uint8_t>(packedIndexes);
-                    }
-
-                    packedBlocks += packedBlockStride;
-                }
-            }
-        };
-
-        // Signed input blocks are converted into unsigned space, with the maximum value being 254
-        void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize])
-        {
-            for (size_t block = 0; block < ParallelMath::ParallelSize; block++)
-            {
-                const PixelBlockS8& inputSignedBlock = inputSigned[block];
-                PixelBlockU8& inputNormalizedBlock = inputNormalized[block];
-
-                for (size_t px = 0; px < 16; px++)
-                {
-                    for (size_t ch = 0; ch < 4; ch++)
-                        inputNormalizedBlock.m_pixels[px][ch] = static_cast<uint8_t>(std::max<int>(inputSignedBlock.m_pixels[px][ch], -127) + 127);
-                }
-            }
-        }
-
-        void FillWeights(const Options &options, float channelWeights[4])
-        {
-            if (options.flags & Flags::Uniform)
-                channelWeights[0] = channelWeights[1] = channelWeights[2] = channelWeights[3] = 1.0f;
-            else
-            {
-                channelWeights[0] = options.redWeight;
-                channelWeights[1] = options.greenWeight;
-                channelWeights[2] = options.blueWeight;
-                channelWeights[3] = options.alphaWeight;
-            }
-        }
-    }
-
-    namespace Kernels
-    {
-        void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            float channelWeights[4];
-            Internal::FillWeights(options, channelWeights);
-
-            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
-            {
-                Internal::BC7Computer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, options.seedPoints, options.refineRoundsBC7);
-                pBC += ParallelMath::ParallelSize * 16;
-            }
-        }
-
-        void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            float channelWeights[4];
-            Internal::FillWeights(options, channelWeights);
-
-            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
-            {
-                Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, false, options.seedPoints, options.refineRoundsBC6H);
-                pBC += ParallelMath::ParallelSize * 16;
-            }
-        }
-
-        void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            float channelWeights[4];
-            Internal::FillWeights(options, channelWeights);
-
-            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
-            {
-                Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, true, options.seedPoints, options.refineRoundsBC6H);
-                pBC += ParallelMath::ParallelSize * 16;
-            }
-        }
-
-        void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            float channelWeights[4];
-            Internal::FillWeights(options, channelWeights);
-
-            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
-            {
-                Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC, 8, channelWeights, true, options.threshold, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
-                pBC += ParallelMath::ParallelSize * 8;
-            }
-        }
-
-        void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            float channelWeights[4];
-            Internal::FillWeights(options, channelWeights);
-
-            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
-            {
-                Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
-                Internal::S3TCComputer::PackExplicitAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16);
-                pBC += ParallelMath::ParallelSize * 16;
-            }
-        }
-
-        void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            float channelWeights[4];
-            Internal::FillWeights(options, channelWeights);
-
-            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
-            {
-                Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
-                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16, false, options.seedPoints, options.refineRoundsIIC);
-                pBC += ParallelMath::ParallelSize * 16;
-            }
-        }
-
-        void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            float channelWeights[4];
-            Internal::FillWeights(options, channelWeights);
-
-            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
-            {
-                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 8, false, options.seedPoints, options.refineRoundsIIC);
-                pBC += ParallelMath::ParallelSize * 8;
-            }
-        }
-
-        void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            float channelWeights[4];
-            Internal::FillWeights(options, channelWeights);
-
-            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
-            {
-                PixelBlockU8 inputBlocks[ParallelMath::ParallelSize];
-                Internal::BiasSignedInput(inputBlocks, pBlocks + blockBase);
-
-                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 8, true, options.seedPoints, options.refineRoundsIIC);
-                pBC += ParallelMath::ParallelSize * 8;
-            }
-        }
-
-        void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            float channelWeights[4];
-            Internal::FillWeights(options, channelWeights);
-
-            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
-            {
-                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 16, false, options.seedPoints, options.refineRoundsIIC);
-                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 1, pBC + 8, 16, false, options.seedPoints, options.refineRoundsIIC);
-                pBC += ParallelMath::ParallelSize * 16;
-            }
-        }
-
-        void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            float channelWeights[4];
-            Internal::FillWeights(options, channelWeights);
-
-            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
-            {
-                PixelBlockU8 inputBlocks[ParallelMath::ParallelSize];
-                Internal::BiasSignedInput(inputBlocks, pBlocks + blockBase);
-
-                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 16, true, options.seedPoints, options.refineRoundsIIC);
-                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 1, pBC + 8, 16, true, options.seedPoints, options.refineRoundsIIC);
-                pBC += ParallelMath::ParallelSize * 16;
-            }
-        }
-
-        void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
-            {
-                Internal::BC7Computer::UnpackOne(pBlocks[blockBase], pBC);
-                pBC += 16;
-            }
-        }
-
-        void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
-            {
-                Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, false);
-                pBC += 16;
-            }
-        }
-
-        void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC)
-        {
-            assert(pBlocks);
-            assert(pBC);
-
-            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
-            {
-                Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, true);
-                pBC += 16;
-            }
-        }
-    }
-}
diff --git a/thirdparty/cvtt/ConvectionKernels.h b/thirdparty/cvtt/ConvectionKernels.h
index fb5ca130f9..3da48405ff 100644
--- a/thirdparty/cvtt/ConvectionKernels.h
+++ b/thirdparty/cvtt/ConvectionKernels.h
@@ -25,21 +25,13 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #ifndef __CVTT_CONVECTION_KERNELS__
 #define __CVTT_CONVECTION_KERNELS__
 
+#include <stddef.h>
 #include <stdint.h>
 
 namespace cvtt
 {
     namespace Flags
     {
-        // Enable partitioned modes in BC7 encoding (slower, better quality)
-        const uint32_t BC7_EnablePartitioning   = 0x001;
-
-        // Enable 3-partition modes in BC7 encoding (slower, better quality, requires BC7_EnablePartitioning)
-        const uint32_t BC7_Enable3Subsets       = 0x002;
-
-        // Enable dual-plane modes in BC7 encoding (slower, better quality)
-        const uint32_t BC7_EnableDualPlane      = 0x004;
-
         // Use fast indexing in BC7 encoding (about 2x faster, slightly worse quality)
         const uint32_t BC7_FastIndexing         = 0x008;
 
@@ -61,13 +53,19 @@ namespace cvtt
         // Uniform color channel importance
         const uint32_t Uniform                  = 0x200;
 
+        // Use fake BT.709 color space for etc2comp compatibility (slower)
+        const uint32_t ETC_UseFakeBT709         = 0x400;
+
+        // Use accurate quantization functions when quantizing fake BT.709 (much slower, marginal improvement on specific blocks)
+        const uint32_t ETC_FakeBT709Accurate    = 0x800;
+
         // Misc useful default flag combinations
-        const uint32_t Fastest = (BC6H_FastIndexing | S3TC_Paranoid);
-        const uint32_t Faster = (BC7_EnableDualPlane | BC6H_FastIndexing | S3TC_Paranoid);
-        const uint32_t Fast = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_FastIndexing | S3TC_Paranoid);
-        const uint32_t Default = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_Enable3Subsets | BC7_FastIndexing | S3TC_Paranoid);
-        const uint32_t Better = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_Enable3Subsets | S3TC_Paranoid | S3TC_Exhaustive);
-        const uint32_t Ultra = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_Enable3Subsets | BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive);
+        const uint32_t Fastest = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);
+        const uint32_t Faster = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);
+        const uint32_t Fast = (BC7_FastIndexing | S3TC_Paranoid);
+        const uint32_t Default = (BC7_FastIndexing | S3TC_Paranoid);
+        const uint32_t Better = (S3TC_Paranoid | S3TC_Exhaustive);
+        const uint32_t Ultra = (BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive | ETC_FakeBT709Accurate);
     }
 
     const unsigned int NumParallelBlocks = 8;
@@ -81,7 +79,7 @@ namespace cvtt
         float blueWeight;       // Blue channel importance
         float alphaWeight;      // Alpha channel importance
 
-        int refineRoundsBC7;    // Number of refine rounds for BC7
+        int refineRoundsBC7;   // Number of refine rounds for BC7
         int refineRoundsBC6H;   // Number of refine rounds for BC6H (max 3)
         int refineRoundsIIC;    // Number of refine rounds for independent interpolated channels (BC3 alpha, BC4, BC5)
         int refineRoundsS3TC;   // Number of refine rounds for S3TC RGB
@@ -104,6 +102,102 @@ namespace cvtt
         }
     };
 
+    struct BC7FineTuningParams
+    {
+        // Seed point counts for each mode+configuration combination
+        uint8_t mode0SP[16];
+        uint8_t mode1SP[64];
+        uint8_t mode2SP[64];
+        uint8_t mode3SP[64];
+        uint8_t mode4SP[4][2];
+        uint8_t mode5SP[4];
+        uint8_t mode6SP;
+        uint8_t mode7SP[64];
+
+        BC7FineTuningParams()
+        {
+            for (int i = 0; i < 16; i++)
+                this->mode0SP[i] = 4;
+
+            for (int i = 0; i < 64; i++)
+            {
+                this->mode1SP[i] = 4;
+                this->mode2SP[i] = 4;
+                this->mode3SP[i] = 4;
+                this->mode7SP[i] = 4;
+            }
+
+            for (int i = 0; i < 4; i++)
+            {
+                for (int j = 0; j < 2; j++)
+                    this->mode4SP[i][j] = 4;
+
+                this->mode5SP[i] = 4;
+            }
+
+            this->mode6SP = 4;
+        }
+    };
+
+    struct BC7EncodingPlan
+    {
+        static const int kNumRGBAShapes = 129;
+        static const int kNumRGBShapes = 243;
+
+        uint64_t mode1PartitionEnabled;
+        uint64_t mode2PartitionEnabled;
+        uint64_t mode3PartitionEnabled;
+        uint16_t mode0PartitionEnabled;
+        uint64_t mode7RGBAPartitionEnabled;
+        uint64_t mode7RGBPartitionEnabled;
+        uint8_t mode4SP[4][2];
+        uint8_t mode5SP[4];
+        bool mode6Enabled;
+
+        uint8_t seedPointsForShapeRGB[kNumRGBShapes];
+        uint8_t seedPointsForShapeRGBA[kNumRGBAShapes];
+
+        uint8_t rgbaShapeList[kNumRGBAShapes];
+        uint8_t rgbaNumShapesToEvaluate;
+
+        uint8_t rgbShapeList[kNumRGBShapes];
+        uint8_t rgbNumShapesToEvaluate;
+
+        BC7EncodingPlan()
+        {
+            for (int i = 0; i < kNumRGBShapes; i++)
+            {
+                this->rgbShapeList[i] = i;
+                this->seedPointsForShapeRGB[i] = 4;
+            }
+            this->rgbNumShapesToEvaluate = kNumRGBShapes;
+
+            for (int i = 0; i < kNumRGBAShapes; i++)
+            {
+                this->rgbaShapeList[i] = i;
+                this->seedPointsForShapeRGBA[i] = 4;
+            }
+            this->rgbaNumShapesToEvaluate = kNumRGBAShapes;
+
+
+            this->mode0PartitionEnabled = 0xffff;
+            this->mode1PartitionEnabled = 0xffffffffffffffffULL;
+            this->mode2PartitionEnabled = 0xffffffffffffffffULL;
+            this->mode3PartitionEnabled = 0xffffffffffffffffULL;
+            this->mode6Enabled = true;
+            this->mode7RGBPartitionEnabled = 0xffffffffffffffffULL;
+            this->mode7RGBAPartitionEnabled = 0xffffffffffffffffULL;
+
+            for (int i = 0; i < 4; i++)
+            {
+                for (int j = 0; j < 2; j++)
+                    this->mode4SP[i][j] = 4;
+
+                this->mode5SP[i] = 4;
+            }
+        }
+    };
+
     // RGBA input block for unsigned 8-bit formats
     struct PixelBlockU8
     {
@@ -116,14 +210,34 @@ namespace cvtt
         int8_t m_pixels[16][4];
     };
 
+    struct PixelBlockScalarS16
+    {
+        int16_t m_pixels[16];
+    };
+
     // RGBA input block for half-precision float formats (bit-cast to int16_t)
     struct PixelBlockF16
     {
         int16_t m_pixels[16][4];
     };
 
+    class ETC2CompressionData
+    {
+    protected:
+        ETC2CompressionData() {}
+    };
+
+    class ETC1CompressionData
+    {
+    protected:
+        ETC1CompressionData() {}
+    };
+
     namespace Kernels
     {
+        typedef void* allocFunc_t(void *context, size_t size);
+        typedef void freeFunc_t(void *context, void* ptr, size_t size);
+
         // NOTE: All functions accept and output NumParallelBlocks blocks at once
         void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
         void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
@@ -134,7 +248,28 @@ namespace cvtt
         void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options);
         void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);
         void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);
-        void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
+        void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, const BC7EncodingPlan &encodingPlan);
+        void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC1CompressionData *compressionData);
+        void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC2CompressionData *compressionData);
+        void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
+        void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
+
+        void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options);
+        void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options);
+
+        // Generates a BC7 encoding plan from a quality parameter that ranges from 1 (fastest) to 100 (best)
+        void ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality);
+
+        // Generates a BC7 encoding plan from fine-tuning parameters.
+        bool ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams &params);
+
+        // ETC compression requires temporary storage that normally consumes a large amount of stack space.
+        // To allocate and release it, use one of these functions.
+        ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options);
+        void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc);
+
+        ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context);
+        void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc);
 
         void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC);
         void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC);
diff --git a/thirdparty/cvtt/ConvectionKernels_API.cpp b/thirdparty/cvtt/ConvectionKernels_API.cpp
new file mode 100644
index 0000000000..707e71d474
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_API.cpp
@@ -0,0 +1,346 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+#include "ConvectionKernels_Config.h"
+
+#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
+
+#include <stdint.h>
+#include "ConvectionKernels.h"
+#include "ConvectionKernels_Util.h"
+#include "ConvectionKernels_BC67.h"
+#include "ConvectionKernels_ETC.h"
+#include "ConvectionKernels_S3TC.h"
+
+#include <assert.h>
+
+namespace cvtt
+{
+    namespace Kernels
+    {
+        void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, const BC7EncodingPlan &encodingPlan)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::BC7Computer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, encodingPlan, options.refineRoundsBC7);
+                pBC += ParallelMath::ParallelSize * 16;
+            }
+        }
+
+        void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, false, options.seedPoints, options.refineRoundsBC6H);
+                pBC += ParallelMath::ParallelSize * 16;
+            }
+        }
+
+        void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, true, options.seedPoints, options.refineRoundsBC6H);
+                pBC += ParallelMath::ParallelSize * 16;
+            }
+        }
+
+        void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC, 8, channelWeights, true, options.threshold, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
+                pBC += ParallelMath::ParallelSize * 8;
+            }
+        }
+
+        void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
+                Internal::S3TCComputer::PackExplicitAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16);
+                pBC += ParallelMath::ParallelSize * 16;
+            }
+        }
+
+        void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
+                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16, false, options.seedPoints, options.refineRoundsIIC);
+                pBC += ParallelMath::ParallelSize * 16;
+            }
+        }
+
+        void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 8, false, options.seedPoints, options.refineRoundsIIC);
+                pBC += ParallelMath::ParallelSize * 8;
+            }
+        }
+
+        void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                PixelBlockU8 inputBlocks[ParallelMath::ParallelSize];
+                Util::BiasSignedInput(inputBlocks, pBlocks + blockBase);
+
+                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 8, true, options.seedPoints, options.refineRoundsIIC);
+                pBC += ParallelMath::ParallelSize * 8;
+            }
+        }
+
+        void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 16, false, options.seedPoints, options.refineRoundsIIC);
+                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 1, pBC + 8, 16, false, options.seedPoints, options.refineRoundsIIC);
+                pBC += ParallelMath::ParallelSize * 16;
+            }
+        }
+
+        void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                PixelBlockU8 inputBlocks[ParallelMath::ParallelSize];
+                Util::BiasSignedInput(inputBlocks, pBlocks + blockBase);
+
+                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 16, true, options.seedPoints, options.refineRoundsIIC);
+                Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 1, pBC + 8, 16, true, options.seedPoints, options.refineRoundsIIC);
+                pBC += ParallelMath::ParallelSize * 16;
+            }
+        }
+
+        void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC1CompressionData *compressionData)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::ETCComputer::CompressETC1Block(pBC, pBlocks + blockBase, compressionData, options);
+                pBC += ParallelMath::ParallelSize * 8;
+            }
+        }
+
+        void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, false);
+                pBC += ParallelMath::ParallelSize * 8;
+            }
+        }
+
+        void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            float channelWeights[4];
+            Util::FillWeights(options, channelWeights);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, true);
+                pBC += ParallelMath::ParallelSize * 8;
+            }
+        }
+
+        void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::ETCComputer::CompressETC2AlphaBlock(pBC, pBlocks + blockBase, options);
+                pBC += ParallelMath::ParallelSize * 8;
+            }
+        }
+
+        void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
+            {
+                Internal::ETCComputer::CompressEACBlock(pBC, pBlocks + blockBase, isSigned, options);
+                pBC += ParallelMath::ParallelSize * 8;
+            }
+        }
+
+        void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData)
+        {
+            uint8_t alphaBlockData[cvtt::NumParallelBlocks * 8];
+            uint8_t colorBlockData[cvtt::NumParallelBlocks * 8];
+
+            EncodeETC2(colorBlockData, pBlocks, options, compressionData);
+            EncodeETC2Alpha(alphaBlockData, pBlocks, options);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
+            {
+                for (size_t blockData = 0; blockData < 8; blockData++)
+                    pBC[blockBase * 16 + blockData] = alphaBlockData[blockBase * 8 + blockData];
+
+                for (size_t blockData = 0; blockData < 8; blockData++)
+                    pBC[blockBase * 16 + 8 + blockData] = colorBlockData[blockBase * 8 + blockData];
+            }
+        }
+
+        void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
+            {
+                Internal::BC7Computer::UnpackOne(pBlocks[blockBase], pBC);
+                pBC += 16;
+            }
+        }
+
+        void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
+            {
+                Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, false);
+                pBC += 16;
+            }
+        }
+
+        void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC)
+        {
+            assert(pBlocks);
+            assert(pBC);
+
+            for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
+            {
+                Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, true);
+                pBC += 16;
+            }
+        }
+
+        ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context)
+        {
+            return cvtt::Internal::ETCComputer::AllocETC1Data(allocFunc, context);
+        }
+
+        void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc)
+        {
+            cvtt::Internal::ETCComputer::ReleaseETC1Data(compressionData, freeFunc);
+        }
+
+        ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options)
+        {
+            return cvtt::Internal::ETCComputer::AllocETC2Data(allocFunc, context, options);
+        }
+
+        void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc)
+        {
+            cvtt::Internal::ETCComputer::ReleaseETC2Data(compressionData, freeFunc);
+        }
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_AggregatedError.h b/thirdparty/cvtt/ConvectionKernels_AggregatedError.h
new file mode 100644
index 0000000000..9f9356a345
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_AggregatedError.h
@@ -0,0 +1,55 @@
+#pragma once
+#ifndef __CVTT_AGGREGATEDERROR_H__
+#define __CVTT_AGGREGATEDERROR_H__
+
+#include "ConvectionKernels_ParallelMath.h"
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        template<int TVectorSize>
+        class AggregatedError
+        {
+        public:
+            typedef ParallelMath::UInt16 MUInt16;
+            typedef ParallelMath::UInt31 MUInt31;
+            typedef ParallelMath::Float MFloat;
+
+            AggregatedError()
+            {
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    m_errorUnweighted[ch] = ParallelMath::MakeUInt31(0);
+            }
+
+            void Add(const MUInt16 &channelErrorUnweighted, int ch)
+            {
+                m_errorUnweighted[ch] = m_errorUnweighted[ch] + ParallelMath::ToUInt31(channelErrorUnweighted);
+            }
+
+            MFloat Finalize(uint32_t flags, const float channelWeightsSq[TVectorSize]) const
+            {
+                if (flags & cvtt::Flags::Uniform)
+                {
+                    MUInt31 total = m_errorUnweighted[0];
+                    for (int ch = 1; ch < TVectorSize; ch++)
+                        total = total + m_errorUnweighted[ch];
+                    return ParallelMath::ToFloat(total);
+                }
+                else
+                {
+                    MFloat total = ParallelMath::ToFloat(m_errorUnweighted[0]) * channelWeightsSq[0];
+                    for (int ch = 1; ch < TVectorSize; ch++)
+                        total = total + ParallelMath::ToFloat(m_errorUnweighted[ch]) * channelWeightsSq[ch];
+                    return total;
+                }
+            }
+
+        private:
+            MUInt31 m_errorUnweighted[TVectorSize];
+        };
+    }
+}
+
+#endif
+
diff --git a/thirdparty/cvtt/ConvectionKernels_BC67.cpp b/thirdparty/cvtt/ConvectionKernels_BC67.cpp
new file mode 100644
index 0000000000..791859b232
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_BC67.cpp
@@ -0,0 +1,3485 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-------------------------------------------------------------------------------------
+
+Portions based on DirectX Texture Library (DirectXTex)
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+
+http://go.microsoft.com/fwlink/?LinkId=248926
+*/
+#include "ConvectionKernels_Config.h"
+
+#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
+
+#include "ConvectionKernels_BC67.h"
+
+#include "ConvectionKernels_AggregatedError.h"
+#include "ConvectionKernels_BCCommon.h"
+#include "ConvectionKernels_BC7_Prio.h"
+#include "ConvectionKernels_BC7_SingleColor.h"
+#include "ConvectionKernels_BC6H_IO.h"
+#include "ConvectionKernels_EndpointRefiner.h"
+#include "ConvectionKernels_EndpointSelector.h"
+#include "ConvectionKernels_IndexSelectorHDR.h"
+#include "ConvectionKernels_ParallelMath.h"
+#include "ConvectionKernels_UnfinishedEndpoints.h"
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        namespace BC67
+        {
+            typedef ParallelMath::Float MFloat;
+            typedef ParallelMath::UInt15 MUInt15;
+
+            struct WorkInfo
+            {
+                MUInt15 m_mode;
+                MFloat m_error;
+                MUInt15 m_ep[3][2][4];
+                MUInt15 m_indexes[16];
+                MUInt15 m_indexes2[16];
+
+                union
+                {
+                    MUInt15 m_partition;
+                    struct IndexSelectorAndRotation
+                    {
+                        MUInt15 m_indexSelector;
+                        MUInt15 m_rotation;
+                    } m_isr;
+                } m_u;
+            };
+        }
+
+        namespace BC7Data
+        {
+            enum AlphaMode
+            {
+                AlphaMode_Combined,
+                AlphaMode_Separate,
+                AlphaMode_None,
+            };
+
+            enum PBitMode
+            {
+                PBitMode_PerEndpoint,
+                PBitMode_PerSubset,
+                PBitMode_None
+            };
+
+            struct BC7ModeInfo
+            {
+                PBitMode m_pBitMode;
+                AlphaMode m_alphaMode;
+                int m_rgbBits;
+                int m_alphaBits;
+                int m_partitionBits;
+                int m_numSubsets;
+                int m_indexBits;
+                int m_alphaIndexBits;
+                bool m_hasIndexSelector;
+            };
+
+            BC7ModeInfo g_modes[] =
+            {
+                { PBitMode_PerEndpoint, AlphaMode_None, 4, 0, 4, 3, 3, 0, false },     // 0
+                { PBitMode_PerSubset, AlphaMode_None, 6, 0, 6, 2, 3, 0, false },       // 1
+                { PBitMode_None, AlphaMode_None, 5, 0, 6, 3, 2, 0, false },            // 2
+                { PBitMode_PerEndpoint, AlphaMode_None, 7, 0, 6, 2, 2, 0, false },     // 3 (Mode reference has an error, P-bit is really per-endpoint)
+
+                { PBitMode_None, AlphaMode_Separate, 5, 6, 0, 1, 2, 3, true },         // 4
+                { PBitMode_None, AlphaMode_Separate, 7, 8, 0, 1, 2, 2, false },        // 5
+                { PBitMode_PerEndpoint, AlphaMode_Combined, 7, 7, 0, 1, 4, 0, false }, // 6
+                { PBitMode_PerEndpoint, AlphaMode_Combined, 5, 5, 6, 2, 2, 0, false }  // 7
+            };
+
+            const int g_weight2[] = { 0, 21, 43, 64 };
+            const int g_weight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
+            const int g_weight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
+
+            const int *g_weightTables[] =
+            {
+                NULL,
+                NULL,
+                g_weight2,
+                g_weight3,
+                g_weight4
+            };
+
+            struct BC6HModeInfo
+            {
+                uint16_t m_modeID;
+                bool m_partitioned;
+                bool m_transformed;
+                int m_aPrec;
+                int m_bPrec[3];
+            };
+
+            // [partitioned][precision]
+            bool g_hdrModesExistForPrecision[2][17] =
+            {
+                //0      1      2      3      4      5      6      7      8      9      10     11     12     13     14     15     16
+                { false, false, false, false, false, false, false, false, false, false, true,  true,  true,  false, false, false, true },
+                { false, false, false, false, false, false, true,  true,  true,  true,  true,  true,  false, false, false, false, false },
+            };
+
+            BC6HModeInfo g_hdrModes[] =
+            {
+                { 0x00, true,  true,  10,{ 5, 5, 5 } },
+                { 0x01, true,  true,  7,{ 6, 6, 6 } },
+                { 0x02, true,  true,  11,{ 5, 4, 4 } },
+                { 0x06, true,  true,  11,{ 4, 5, 4 } },
+                { 0x0a, true,  true,  11,{ 4, 4, 5 } },
+                { 0x0e, true,  true,  9,{ 5, 5, 5 } },
+                { 0x12, true,  true,  8,{ 6, 5, 5 } },
+                { 0x16, true,  true,  8,{ 5, 6, 5 } },
+                { 0x1a, true,  true,  8,{ 5, 5, 6 } },
+                { 0x1e, true,  false, 6,{ 6, 6, 6 } },
+                { 0x03, false, false, 10,{ 10, 10, 10 } },
+                { 0x07, false, true,  11,{ 9, 9, 9 } },
+                { 0x0b, false, true,  12,{ 8, 8, 8 } },
+                { 0x0f, false, true,  16,{ 4, 4, 4 } },
+            };
+
+            const int g_maxHDRPrecision = 16;
+
+            static const size_t g_numHDRModes = sizeof(g_hdrModes) / sizeof(g_hdrModes[0]);
+
+            static uint16_t g_partitionMap[64] =
+            {
+                0xCCCC, 0x8888, 0xEEEE, 0xECC8,
+                0xC880, 0xFEEC, 0xFEC8, 0xEC80,
+                0xC800, 0xFFEC, 0xFE80, 0xE800,
+                0xFFE8, 0xFF00, 0xFFF0, 0xF000,
+                0xF710, 0x008E, 0x7100, 0x08CE,
+                0x008C, 0x7310, 0x3100, 0x8CCE,
+                0x088C, 0x3110, 0x6666, 0x366C,
+                0x17E8, 0x0FF0, 0x718E, 0x399C,
+                0xaaaa, 0xf0f0, 0x5a5a, 0x33cc,
+                0x3c3c, 0x55aa, 0x9696, 0xa55a,
+                0x73ce, 0x13c8, 0x324c, 0x3bdc,
+                0x6996, 0xc33c, 0x9966, 0x660,
+                0x272, 0x4e4, 0x4e40, 0x2720,
+                0xc936, 0x936c, 0x39c6, 0x639c,
+                0x9336, 0x9cc6, 0x817e, 0xe718,
+                0xccf0, 0xfcc, 0x7744, 0xee22,
+            };
+
+            static uint32_t g_partitionMap2[64] =
+            {
+                0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8,
+                0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050,
+                0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090,
+                0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250,
+                0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0,
+                0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500,
+                0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400,
+                0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200,
+                0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424,
+                0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50,
+                0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0,
+                0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600,
+                0xaa444444, 0x54a854a8, 0x95809580, 0x96969600,
+                0xa85454a8, 0x80959580, 0xaa141414, 0x96960000,
+                0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000,
+                0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254,
+            };
+
+            static int g_fixupIndexes2[64] =
+            {
+                15,15,15,15,
+                15,15,15,15,
+                15,15,15,15,
+                15,15,15,15,
+                15, 2, 8, 2,
+                2, 8, 8,15,
+                2, 8, 2, 2,
+                8, 8, 2, 2,
+
+                15,15, 6, 8,
+                2, 8,15,15,
+                2, 8, 2, 2,
+                2,15,15, 6,
+                6, 2, 6, 8,
+                15,15, 2, 2,
+                15,15,15,15,
+                15, 2, 2,15,
+            };
+
+            static int g_fixupIndexes3[64][2] =
+            {
+                { 3,15 },{ 3, 8 },{ 15, 8 },{ 15, 3 },
+                { 8,15 },{ 3,15 },{ 15, 3 },{ 15, 8 },
+                { 8,15 },{ 8,15 },{ 6,15 },{ 6,15 },
+                { 6,15 },{ 5,15 },{ 3,15 },{ 3, 8 },
+                { 3,15 },{ 3, 8 },{ 8,15 },{ 15, 3 },
+                { 3,15 },{ 3, 8 },{ 6,15 },{ 10, 8 },
+                { 5, 3 },{ 8,15 },{ 8, 6 },{ 6,10 },
+                { 8,15 },{ 5,15 },{ 15,10 },{ 15, 8 },
+
+                { 8,15 },{ 15, 3 },{ 3,15 },{ 5,10 },
+                { 6,10 },{ 10, 8 },{ 8, 9 },{ 15,10 },
+                { 15, 6 },{ 3,15 },{ 15, 8 },{ 5,15 },
+                { 15, 3 },{ 15, 6 },{ 15, 6 },{ 15, 8 },
+                { 3,15 },{ 15, 3 },{ 5,15 },{ 5,15 },
+                { 5,15 },{ 8,15 },{ 5,15 },{ 10,15 },
+                { 5,15 },{ 10,15 },{ 8,15 },{ 13,15 },
+                { 15, 3 },{ 12,15 },{ 3,15 },{ 3, 8 },
+            };
+
+            static const unsigned char g_fragments[] =
+            {
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  // 0, 16
+                0, 1, 2, 3,  // 16, 4
+                0, 1, 4,  // 20, 3
+                0, 1, 2, 4,  // 23, 4
+                2, 3, 7,  // 27, 3
+                1, 2, 3, 7,  // 30, 4
+                0, 1, 2, 3, 4, 5, 6, 7,  // 34, 8
+                0, 1, 4, 8,  // 42, 4
+                0, 1, 2, 4, 5, 8,  // 46, 6
+                0, 1, 2, 3, 4, 5, 6, 8,  // 52, 8
+                1, 4, 5, 6, 9,  // 60, 5
+                2, 5, 6, 7, 10,  // 65, 5
+                5, 6, 9, 10,  // 70, 4
+                2, 3, 7, 11,  // 74, 4
+                1, 2, 3, 6, 7, 11,  // 78, 6
+                0, 1, 2, 3, 5, 6, 7, 11,  // 84, 8
+                0, 1, 2, 3, 8, 9, 10, 11,  // 92, 8
+                2, 3, 6, 7, 8, 9, 10, 11,  // 100, 8
+                4, 5, 6, 7, 8, 9, 10, 11,  // 108, 8
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,  // 116, 12
+                0, 4, 8, 12,  // 128, 4
+                0, 2, 3, 4, 6, 7, 8, 12,  // 132, 8
+                0, 1, 2, 4, 5, 8, 9, 12,  // 140, 8
+                0, 1, 2, 3, 4, 5, 6, 8, 9, 12,  // 148, 10
+                3, 6, 7, 8, 9, 12,  // 158, 6
+                3, 5, 6, 7, 8, 9, 10, 12,  // 164, 8
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12,  // 172, 12
+                0, 1, 2, 5, 6, 7, 11, 12,  // 184, 8
+                5, 8, 9, 10, 13,  // 192, 5
+                8, 12, 13,  // 197, 3
+                4, 8, 12, 13,  // 200, 4
+                2, 3, 6, 9, 12, 13,  // 204, 6
+                0, 1, 2, 3, 8, 9, 12, 13,  // 210, 8
+                0, 1, 4, 5, 8, 9, 12, 13,  // 218, 8
+                2, 3, 6, 7, 8, 9, 12, 13,  // 226, 8
+                2, 3, 5, 6, 9, 10, 12, 13,  // 234, 8
+                0, 3, 6, 7, 9, 10, 12, 13,  // 242, 8
+                0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13,  // 250, 12
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13,  // 262, 13
+                2, 3, 4, 7, 8, 11, 12, 13,  // 275, 8
+                1, 2, 6, 7, 8, 11, 12, 13,  // 283, 8
+                2, 3, 4, 6, 7, 8, 9, 11, 12, 13,  // 291, 10
+                2, 3, 4, 5, 10, 11, 12, 13,  // 301, 8
+                0, 1, 6, 7, 10, 11, 12, 13,  // 309, 8
+                6, 9, 10, 11, 14,  // 317, 5
+                0, 2, 4, 6, 8, 10, 12, 14,  // 322, 8
+                1, 3, 5, 7, 8, 10, 12, 14,  // 330, 8
+                1, 3, 4, 6, 9, 11, 12, 14,  // 338, 8
+                0, 2, 5, 7, 9, 11, 12, 14,  // 346, 8
+                0, 3, 4, 5, 8, 9, 13, 14,  // 354, 8
+                2, 3, 4, 7, 8, 9, 13, 14,  // 362, 8
+                1, 2, 5, 6, 9, 10, 13, 14,  // 370, 8
+                0, 3, 4, 7, 9, 10, 13, 14,  // 378, 8
+                0, 3, 5, 6, 8, 11, 13, 14,  // 386, 8
+                1, 2, 4, 7, 8, 11, 13, 14,  // 394, 8
+                0, 1, 4, 7, 10, 11, 13, 14,  // 402, 8
+                0, 3, 6, 7, 10, 11, 13, 14,  // 410, 8
+                8, 12, 13, 14,  // 418, 4
+                1, 2, 3, 7, 8, 12, 13, 14,  // 422, 8
+                4, 8, 9, 12, 13, 14,  // 430, 6
+                0, 4, 5, 8, 9, 12, 13, 14,  // 436, 8
+                1, 2, 3, 6, 7, 8, 9, 12, 13, 14,  // 444, 10
+                2, 6, 8, 9, 10, 12, 13, 14,  // 454, 8
+                0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14,  // 462, 12
+                0, 7, 9, 10, 11, 12, 13, 14,  // 474, 8
+                1, 2, 3, 4, 5, 6, 8, 15,  // 482, 8
+                3, 7, 11, 15,  // 490, 4
+                0, 1, 3, 4, 5, 7, 11, 15,  // 494, 8
+                0, 4, 5, 10, 11, 15,  // 502, 6
+                1, 2, 3, 6, 7, 10, 11, 15,  // 508, 8
+                0, 1, 2, 3, 5, 6, 7, 10, 11, 15,  // 516, 10
+                0, 4, 5, 6, 9, 10, 11, 15,  // 526, 8
+                0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15,  // 534, 12
+                1, 2, 4, 5, 8, 9, 12, 15,  // 546, 8
+                2, 3, 5, 6, 8, 9, 12, 15,  // 554, 8
+                0, 3, 5, 6, 9, 10, 12, 15,  // 562, 8
+                1, 2, 4, 7, 9, 10, 12, 15,  // 570, 8
+                1, 2, 5, 6, 8, 11, 12, 15,  // 578, 8
+                0, 3, 4, 7, 8, 11, 12, 15,  // 586, 8
+                0, 1, 5, 6, 10, 11, 12, 15,  // 594, 8
+                1, 2, 6, 7, 10, 11, 12, 15,  // 602, 8
+                1, 3, 4, 6, 8, 10, 13, 15,  // 610, 8
+                0, 2, 5, 7, 8, 10, 13, 15,  // 618, 8
+                0, 2, 4, 6, 9, 11, 13, 15,  // 626, 8
+                1, 3, 5, 7, 9, 11, 13, 15,  // 634, 8
+                0, 1, 2, 3, 4, 5, 7, 8, 12, 13, 15,  // 642, 11
+                2, 3, 4, 5, 8, 9, 14, 15,  // 653, 8
+                0, 1, 6, 7, 8, 9, 14, 15,  // 661, 8
+                0, 1, 5, 10, 14, 15,  // 669, 6
+                0, 3, 4, 5, 9, 10, 14, 15,  // 675, 8
+                0, 1, 5, 6, 9, 10, 14, 15,  // 683, 8
+                11, 14, 15,  // 691, 3
+                7, 11, 14, 15,  // 694, 4
+                1, 2, 4, 5, 8, 11, 14, 15,  // 698, 8
+                0, 1, 4, 7, 8, 11, 14, 15,  // 706, 8
+                0, 1, 4, 5, 10, 11, 14, 15,  // 714, 8
+                2, 3, 6, 7, 10, 11, 14, 15,  // 722, 8
+                4, 5, 6, 7, 10, 11, 14, 15,  // 730, 8
+                0, 1, 4, 5, 7, 8, 10, 11, 14, 15,  // 738, 10
+                0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15,  // 748, 12
+                0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15,  // 760, 13
+                0, 1, 2, 3, 4, 6, 7, 11, 12, 14, 15,  // 773, 11
+                3, 4, 8, 9, 10, 13, 14, 15,  // 784, 8
+                11, 13, 14, 15,  // 792, 4
+                0, 1, 2, 4, 11, 13, 14, 15,  // 796, 8
+                0, 1, 2, 4, 5, 10, 11, 13, 14, 15,  // 804, 10
+                7, 10, 11, 13, 14, 15,  // 814, 6
+                3, 6, 7, 10, 11, 13, 14, 15,  // 820, 8
+                1, 5, 9, 10, 11, 13, 14, 15,  // 828, 8
+                1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15,  // 836, 12
+                12, 13, 14, 15,  // 848, 4
+                0, 1, 2, 3, 12, 13, 14, 15,  // 852, 8
+                0, 1, 4, 5, 12, 13, 14, 15,  // 860, 8
+                4, 5, 6, 7, 12, 13, 14, 15,  // 868, 8
+                4, 8, 9, 10, 12, 13, 14, 15,  // 876, 8
+                0, 4, 5, 8, 9, 10, 12, 13, 14, 15,  // 884, 10
+                0, 1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15,  // 894, 12
+                0, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 15,  // 906, 12
+                0, 1, 3, 4, 8, 9, 11, 12, 13, 14, 15,  // 918, 11
+                0, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15,  // 929, 11
+                7, 9, 10, 11, 12, 13, 14, 15,  // 940, 8
+                3, 6, 7, 9, 10, 11, 12, 13, 14, 15,  // 948, 10
+                2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15,  // 958, 12
+                8, 9, 10, 11, 12, 13, 14, 15,  // 970, 8
+                0, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15,  // 978, 12
+                0, 1, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15,  // 990, 13
+                3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  // 1003, 12
+                2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  // 1015, 13
+                4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,  // 1028, 12
+                0, 2,  // 1040, 2
+                1, 3,  // 1042, 2
+                0, 1, 4, 5,  // 1044, 4
+                0, 1, 2, 4, 5,  // 1048, 5
+                2, 3, 6,  // 1053, 3
+                0, 2, 4, 6,  // 1056, 4
+                1, 2, 5, 6,  // 1060, 4
+                0, 1, 2, 3, 5, 6,  // 1064, 6
+                0, 1, 2, 4, 5, 6,  // 1070, 6
+                0, 1, 2, 3, 4, 5, 6,  // 1076, 7
+                0, 3, 4, 7,  // 1083, 4
+                0, 1, 2, 3, 4, 7,  // 1087, 6
+                1, 3, 5, 7,  // 1093, 4
+                2, 3, 6, 7,  // 1097, 4
+                1, 2, 3, 6, 7,  // 1101, 5
+                1, 2, 3, 5, 6, 7,  // 1106, 6
+                0, 1, 2, 3, 5, 6, 7,  // 1112, 7
+                4, 5, 6, 7,  // 1119, 4
+                0, 8,  // 1123, 2
+                0, 1, 4, 5, 8,  // 1125, 5
+                0, 1, 8, 9,  // 1130, 4
+                4, 5, 8, 9,  // 1134, 4
+                0, 1, 4, 5, 8, 9,  // 1138, 6
+                2, 6, 8, 9,  // 1144, 4
+                6, 7, 8, 9,  // 1148, 4
+                0, 2, 4, 6, 8, 10,  // 1152, 6
+                1, 2, 5, 6, 9, 10,  // 1158, 6
+                0, 3, 4, 7, 9, 10,  // 1164, 6
+                0, 1, 2, 8, 9, 10,  // 1170, 6
+                4, 5, 6, 8, 9, 10,  // 1176, 6
+                3, 11,  // 1182, 2
+                2, 3, 6, 7, 11,  // 1184, 5
+                0, 3, 8, 11,  // 1189, 4
+                0, 3, 4, 7, 8, 11,  // 1193, 6
+                1, 3, 5, 7, 9, 11,  // 1199, 6
+                2, 3, 10, 11,  // 1205, 4
+                1, 5, 10, 11,  // 1209, 4
+                4, 5, 10, 11,  // 1213, 4
+                6, 7, 10, 11,  // 1217, 4
+                2, 3, 6, 7, 10, 11,  // 1221, 6
+                1, 2, 3, 9, 10, 11,  // 1227, 6
+                5, 6, 7, 9, 10, 11,  // 1233, 6
+                8, 9, 10, 11,  // 1239, 4
+                4, 12,  // 1243, 2
+                0, 1, 2, 3, 4, 5, 8, 12,  // 1245, 8
+                8, 9, 12,  // 1253, 3
+                0, 4, 5, 8, 9, 12,  // 1256, 6
+                0, 1, 4, 5, 8, 9, 12,  // 1262, 7
+                2, 3, 5, 6, 8, 9, 12,  // 1269, 7
+                1, 5, 9, 13,  // 1276, 4
+                6, 7, 9, 13,  // 1280, 4
+                1, 4, 7, 10, 13,  // 1284, 5
+                1, 6, 8, 11, 13,  // 1289, 5
+                0, 1, 12, 13,  // 1294, 4
+                4, 5, 12, 13,  // 1298, 4
+                0, 1, 6, 7, 12, 13,  // 1302, 6
+                0, 1, 4, 8, 12, 13,  // 1308, 6
+                8, 9, 12, 13,  // 1314, 4
+                4, 8, 9, 12, 13,  // 1318, 5
+                4, 5, 8, 9, 12, 13,  // 1323, 6
+                0, 4, 5, 8, 9, 12, 13,  // 1329, 7
+                0, 1, 6, 10, 12, 13,  // 1336, 6
+                3, 6, 7, 9, 10, 12, 13,  // 1342, 7
+                0, 1, 10, 11, 12, 13,  // 1349, 6
+                2, 4, 7, 9, 14,  // 1355, 5
+                4, 5, 10, 14,  // 1360, 4
+                2, 6, 10, 14,  // 1364, 4
+                2, 5, 8, 11, 14,  // 1368, 5
+                0, 2, 12, 14,  // 1373, 4
+                8, 10, 12, 14,  // 1377, 4
+                4, 6, 8, 10, 12, 14,  // 1381, 6
+                13, 14,  // 1387, 2
+                9, 10, 13, 14,  // 1389, 4
+                5, 6, 9, 10, 13, 14,  // 1393, 6
+                0, 1, 2, 12, 13, 14,  // 1399, 6
+                4, 5, 6, 12, 13, 14,  // 1405, 6
+                8, 9, 12, 13, 14,  // 1411, 5
+                8, 9, 10, 12, 13, 14,  // 1416, 6
+                7, 15,  // 1422, 2
+                0, 5, 10, 15,  // 1424, 4
+                0, 1, 2, 3, 6, 7, 11, 15,  // 1428, 8
+                10, 11, 15,  // 1436, 3
+                0, 1, 5, 6, 10, 11, 15,  // 1439, 7
+                3, 6, 7, 10, 11, 15,  // 1446, 6
+                12, 15,  // 1452, 2
+                0, 3, 12, 15,  // 1454, 4
+                4, 7, 12, 15,  // 1458, 4
+                0, 3, 6, 9, 12, 15,  // 1462, 6
+                0, 3, 5, 10, 12, 15,  // 1468, 6
+                8, 11, 12, 15,  // 1474, 4
+                5, 6, 8, 11, 12, 15,  // 1478, 6
+                4, 7, 8, 11, 12, 15,  // 1484, 6
+                1, 3, 13, 15,  // 1490, 4
+                9, 11, 13, 15,  // 1494, 4
+                5, 7, 9, 11, 13, 15,  // 1498, 6
+                2, 3, 14, 15,  // 1504, 4
+                2, 3, 4, 5, 14, 15,  // 1508, 6
+                6, 7, 14, 15,  // 1514, 4
+                2, 3, 5, 9, 14, 15,  // 1518, 6
+                2, 3, 8, 9, 14, 15,  // 1524, 6
+                10, 14, 15,  // 1530, 3
+                0, 4, 5, 9, 10, 14, 15,  // 1533, 7
+                2, 3, 7, 11, 14, 15,  // 1540, 6
+                10, 11, 14, 15,  // 1546, 4
+                7, 10, 11, 14, 15,  // 1550, 5
+                6, 7, 10, 11, 14, 15,  // 1555, 6
+                1, 2, 3, 13, 14, 15,  // 1561, 6
+                5, 6, 7, 13, 14, 15,  // 1567, 6
+                10, 11, 13, 14, 15,  // 1573, 5
+                9, 10, 11, 13, 14, 15,  // 1578, 6
+                0, 4, 8, 9, 12, 13, 14, 15,  // 1584, 8
+                9, 10, 12, 13, 14, 15,  // 1592, 6
+                8, 11, 12, 13, 14, 15,  // 1598, 6
+                3, 7, 10, 11, 12, 13, 14, 15,  // 1604, 8
+            };
+            static const int g_shapeRanges[][2] =
+            {
+                { 0, 16 },{ 16, 4 },{ 20, 3 },{ 23, 4 },{ 27, 3 },{ 30, 4 },{ 34, 8 },{ 42, 4 },{ 46, 6 },{ 52, 8 },{ 60, 5 },
+                { 65, 5 },{ 70, 4 },{ 74, 4 },{ 78, 6 },{ 84, 8 },{ 92, 8 },{ 100, 8 },{ 108, 8 },{ 116, 12 },{ 128, 4 },{ 132, 8 },
+                { 140, 8 },{ 148, 10 },{ 158, 6 },{ 164, 8 },{ 172, 12 },{ 184, 8 },{ 192, 5 },{ 197, 3 },{ 200, 4 },{ 204, 6 },{ 210, 8 },
+                { 218, 8 },{ 226, 8 },{ 234, 8 },{ 242, 8 },{ 250, 12 },{ 262, 13 },{ 275, 8 },{ 283, 8 },{ 291, 10 },{ 301, 8 },{ 309, 8 },
+                { 317, 5 },{ 322, 8 },{ 330, 8 },{ 338, 8 },{ 346, 8 },{ 354, 8 },{ 362, 8 },{ 370, 8 },{ 378, 8 },{ 386, 8 },{ 394, 8 },
+                { 402, 8 },{ 410, 8 },{ 418, 4 },{ 422, 8 },{ 430, 6 },{ 436, 8 },{ 444, 10 },{ 454, 8 },{ 462, 12 },{ 474, 8 },{ 482, 8 },
+                { 490, 4 },{ 494, 8 },{ 502, 6 },{ 508, 8 },{ 516, 10 },{ 526, 8 },{ 534, 12 },{ 546, 8 },{ 554, 8 },{ 562, 8 },{ 570, 8 },
+                { 578, 8 },{ 586, 8 },{ 594, 8 },{ 602, 8 },{ 610, 8 },{ 618, 8 },{ 626, 8 },{ 634, 8 },{ 642, 11 },{ 653, 8 },{ 661, 8 },
+                { 669, 6 },{ 675, 8 },{ 683, 8 },{ 691, 3 },{ 694, 4 },{ 698, 8 },{ 706, 8 },{ 714, 8 },{ 722, 8 },{ 730, 8 },{ 738, 10 },
+                { 748, 12 },{ 760, 13 },{ 773, 11 },{ 784, 8 },{ 792, 4 },{ 796, 8 },{ 804, 10 },{ 814, 6 },{ 820, 8 },{ 828, 8 },{ 836, 12 },
+                { 848, 4 },{ 852, 8 },{ 860, 8 },{ 868, 8 },{ 876, 8 },{ 884, 10 },{ 894, 12 },{ 906, 12 },{ 918, 11 },{ 929, 11 },{ 940, 8 },
+                { 948, 10 },{ 958, 12 },{ 970, 8 },{ 978, 12 },{ 990, 13 },{ 1003, 12 },{ 1015, 13 },{ 1028, 12 },{ 1040, 2 },{ 1042, 2 },{ 1044, 4 },
+                { 1048, 5 },{ 1053, 3 },{ 1056, 4 },{ 1060, 4 },{ 1064, 6 },{ 1070, 6 },{ 1076, 7 },{ 1083, 4 },{ 1087, 6 },{ 1093, 4 },{ 1097, 4 },
+                { 1101, 5 },{ 1106, 6 },{ 1112, 7 },{ 1119, 4 },{ 1123, 2 },{ 1125, 5 },{ 1130, 4 },{ 1134, 4 },{ 1138, 6 },{ 1144, 4 },{ 1148, 4 },
+                { 1152, 6 },{ 1158, 6 },{ 1164, 6 },{ 1170, 6 },{ 1176, 6 },{ 1182, 2 },{ 1184, 5 },{ 1189, 4 },{ 1193, 6 },{ 1199, 6 },{ 1205, 4 },
+                { 1209, 4 },{ 1213, 4 },{ 1217, 4 },{ 1221, 6 },{ 1227, 6 },{ 1233, 6 },{ 1239, 4 },{ 1243, 2 },{ 1245, 8 },{ 1253, 3 },{ 1256, 6 },
+                { 1262, 7 },{ 1269, 7 },{ 1276, 4 },{ 1280, 4 },{ 1284, 5 },{ 1289, 5 },{ 1294, 4 },{ 1298, 4 },{ 1302, 6 },{ 1308, 6 },{ 1314, 4 },
+                { 1318, 5 },{ 1323, 6 },{ 1329, 7 },{ 1336, 6 },{ 1342, 7 },{ 1349, 6 },{ 1355, 5 },{ 1360, 4 },{ 1364, 4 },{ 1368, 5 },{ 1373, 4 },
+                { 1377, 4 },{ 1381, 6 },{ 1387, 2 },{ 1389, 4 },{ 1393, 6 },{ 1399, 6 },{ 1405, 6 },{ 1411, 5 },{ 1416, 6 },{ 1422, 2 },{ 1424, 4 },
+                { 1428, 8 },{ 1436, 3 },{ 1439, 7 },{ 1446, 6 },{ 1452, 2 },{ 1454, 4 },{ 1458, 4 },{ 1462, 6 },{ 1468, 6 },{ 1474, 4 },{ 1478, 6 },
+                { 1484, 6 },{ 1490, 4 },{ 1494, 4 },{ 1498, 6 },{ 1504, 4 },{ 1508, 6 },{ 1514, 4 },{ 1518, 6 },{ 1524, 6 },{ 1530, 3 },{ 1533, 7 },
+                { 1540, 6 },{ 1546, 4 },{ 1550, 5 },{ 1555, 6 },{ 1561, 6 },{ 1567, 6 },{ 1573, 5 },{ 1578, 6 },{ 1584, 8 },{ 1592, 6 },{ 1598, 6 },
+                { 1604, 8 },
+            };
+            static const int g_shapes1[][2] =
+            {
+                { 0, 16 }
+            };
+            static const int g_shapes2[64][2] =
+            {
+                { 33, 96 },{ 63, 66 },{ 20, 109 },{ 22, 107 },{ 37, 92 },{ 7, 122 },{ 8, 121 },{ 23, 106 },
+                { 38, 91 },{ 2, 127 },{ 9, 120 },{ 26, 103 },{ 3, 126 },{ 6, 123 },{ 1, 128 },{ 19, 110 },
+                { 15, 114 },{ 124, 5 },{ 72, 57 },{ 115, 14 },{ 125, 4 },{ 70, 59 },{ 100, 29 },{ 60, 69 },
+                { 116, 13 },{ 99, 30 },{ 78, 51 },{ 94, 35 },{ 104, 25 },{ 111, 18 },{ 71, 58 },{ 90, 39 },
+                { 45, 84 },{ 16, 113 },{ 82, 47 },{ 95, 34 },{ 87, 42 },{ 83, 46 },{ 53, 76 },{ 48, 81 },
+                { 68, 61 },{ 105, 24 },{ 98, 31 },{ 88, 41 },{ 75, 54 },{ 43, 86 },{ 52, 77 },{ 117, 12 },
+                { 119, 10 },{ 118, 11 },{ 85, 44 },{ 101, 28 },{ 36, 93 },{ 55, 74 },{ 89, 40 },{ 79, 50 },
+                { 56, 73 },{ 49, 80 },{ 64, 65 },{ 27, 102 },{ 32, 97 },{ 112, 17 },{ 67, 62 },{ 21, 108 },
+            };
+            static const int g_shapes3[64][3] =
+            {
+                { 148, 160, 240 },{ 132, 212, 205 },{ 136, 233, 187 },{ 175, 237, 143 },{ 6, 186, 232 },{ 33, 142, 232 },{ 131, 123, 142 },{ 131, 96, 186 },
+                { 6, 171, 110 },{ 1, 18, 110 },{ 1, 146, 123 },{ 33, 195, 66 },{ 20, 51, 66 },{ 20, 178, 96 },{ 2, 177, 106 },{ 211, 4, 59 },
+                { 8, 191, 91 },{ 230, 14, 29 },{ 1, 188, 234 },{ 151, 110, 168 },{ 20, 144, 238 },{ 137, 66, 206 },{ 173, 179, 232 },{ 209, 194, 186 },
+                { 239, 165, 142 },{ 131, 152, 242 },{ 214, 54, 12 },{ 140, 219, 201 },{ 190, 150, 231 },{ 156, 135, 241 },{ 185, 227, 167 },{ 145, 210, 59 },
+                { 138, 174, 106 },{ 189, 229, 14 },{ 176, 133, 106 },{ 78, 178, 195 },{ 111, 146, 171 },{ 216, 180, 196 },{ 217, 181, 193 },{ 184, 228, 166 },
+                { 192, 225, 153 },{ 134, 141, 123 },{ 6, 222, 198 },{ 149, 183, 96 },{ 33, 226, 164 },{ 161, 215, 51 },{ 197, 221, 18 },{ 1, 223, 199 },
+                { 154, 163, 110 },{ 20, 236, 169 },{ 157, 204, 66 },{ 1, 202, 220 },{ 20, 170, 235 },{ 203, 158, 66 },{ 162, 155, 110 },{ 6, 201, 218 },
+                { 139, 135, 123 },{ 33, 167, 224 },{ 182, 150, 96 },{ 19, 200, 213 },{ 63, 207, 159 },{ 147, 172, 109 },{ 129, 130, 128 },{ 208, 14, 59 },
+            };
+
+            static const int g_shapeList1[] =
+            {
+                0,
+            };
+
+            static const int g_shapeList2[] =
+            {
+                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+                23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+                34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+                45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
+                56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
+                67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
+                78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
+                89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
+                100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+                111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
+                122, 123, 124, 125, 126, 127, 128,
+            };
+
+            static const int g_shapeList12[] =
+            {
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
+                55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
+                66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
+                77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
+                88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
+                99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
+                110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
+                121, 122, 123, 124, 125, 126, 127, 128,
+            };
+
+            static const int g_shapeList3[] =
+            {
+                1, 2, 4, 6, 8, 12, 14, 18, 19, 20, 29,
+                33, 51, 54, 59, 63, 66, 78, 91, 96, 106, 109,
+                110, 111, 123, 128, 129, 130, 131, 132, 133, 134, 135,
+                136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
+                147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
+                158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
+                169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+                180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190,
+                191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
+                202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
+                213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
+                224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,
+                235, 236, 237, 238, 239, 240, 241, 242,
+            };
+
+            static const int g_shapeList3Short[] =
+            {
+                1, 2, 4, 6, 18, 20, 33, 51, 59, 66, 96,
+                106, 110, 123, 131, 132, 136, 142, 143, 146, 148, 160,
+                171, 175, 177, 178, 186, 187, 195, 205, 211, 212, 232,
+                233, 237, 240,
+            };
+
+            static const int g_shapeListAll[] =
+            {
+                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+                22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
+                55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
+                66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
+                77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
+                88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
+                99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
+                110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
+                121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
+                132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+                143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
+                154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+                165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+                176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
+                187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,
+                198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
+                209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
+                220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
+                231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
+                242,
+            };
+
+            static const int g_numShapes1 = sizeof(g_shapeList1) / sizeof(g_shapeList1[0]);
+            static const int g_numShapes2 = sizeof(g_shapeList2) / sizeof(g_shapeList2[0]);
+            static const int g_numShapes12 = sizeof(g_shapeList12) / sizeof(g_shapeList12[0]);
+            static const int g_numShapes3 = sizeof(g_shapeList3) / sizeof(g_shapeList3[0]);
+            static const int g_numShapes3Short = sizeof(g_shapeList3Short) / sizeof(g_shapeList3Short[0]);
+            static const int g_numShapesAll = sizeof(g_shapeListAll) / sizeof(g_shapeListAll[0]);
+            static const int g_numFragments = sizeof(g_fragments) / sizeof(g_fragments[0]);
+        }
+
+        struct PackingVector
+        {
+            uint32_t m_vector[4];
+            int m_offset;
+
+            void Init()
+            {
+                for (int i = 0; i < 4; i++)
+                    m_vector[i] = 0;
+
+                m_offset = 0;
+            }
+
+            void InitPacked(const uint32_t *v, int bits)
+            {
+                for (int b = 0; b < bits; b += 32)
+                    m_vector[b / 32] = v[b / 32];
+
+                m_offset = bits;
+            }
+
+            inline void Pack(ParallelMath::ScalarUInt16 value, int bits)
+            {
+                int vOffset = m_offset >> 5;
+                int bitOffset = m_offset & 0x1f;
+
+                m_vector[vOffset] |= (static_cast<uint32_t>(value) << bitOffset) & static_cast<uint32_t>(0xffffffff);
+
+                int overflowBits = bitOffset + bits - 32;
+                if (overflowBits > 0)
+                    m_vector[vOffset + 1] |= (static_cast<uint32_t>(value) >> (bits - overflowBits));
+
+                m_offset += bits;
+            }
+
+            inline void Flush(uint8_t* output)
+            {
+                assert(m_offset == 128);
+
+                for (int v = 0; v < 4; v++)
+                {
+                    uint32_t chunk = m_vector[v];
+                    for (int b = 0; b < 4; b++)
+                        output[v * 4 + b] = static_cast<uint8_t>((chunk >> (b * 8)) & 0xff);
+                }
+            }
+        };
+
+
+        struct UnpackingVector
+        {
+            uint32_t m_vector[4];
+
+            void Init(const uint8_t *bytes)
+            {
+                for (int i = 0; i < 4; i++)
+                    m_vector[i] = 0;
+
+                for (int b = 0; b < 16; b++)
+                    m_vector[b / 4] |= (bytes[b] << ((b % 4) * 8));
+            }
+
+            inline void UnpackStart(uint32_t *v, int bits)
+            {
+                for (int b = 0; b < bits; b += 32)
+                    v[b / 32] = m_vector[b / 32];
+
+                int entriesShifted = bits / 32;
+                int carry = bits % 32;
+
+                for (int i = entriesShifted; i < 4; i++)
+                    m_vector[i - entriesShifted] = m_vector[i];
+
+                int entriesRemaining = 4 - entriesShifted;
+                if (carry)
+                {
+                    uint32_t bitMask = (1 << carry) - 1;
+                    for (int i = 0; i < 4; i++)
+                    {
+                        m_vector[i] >>= carry;
+                        if (i != 3)
+                            m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - carry);
+                    }
+                }
+            }
+
+            inline ParallelMath::ScalarUInt16 Unpack(int bits)
+            {
+                uint32_t bitMask = (1 << bits) - 1;
+
+                ParallelMath::ScalarUInt16 result = static_cast<ParallelMath::ScalarUInt16>(m_vector[0] & bitMask);
+
+                for (int i = 0; i < 4; i++)
+                {
+                    m_vector[i] >>= bits;
+                    if (i != 3)
+                        m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - bits);
+                }
+
+                return result;
+            }
+        };
+
+        ParallelMath::Float ScaleHDRValue(const ParallelMath::Float &v, bool isSigned)
+        {
+            if (isSigned)
+            {
+                ParallelMath::Float offset = ParallelMath::Select(ParallelMath::Less(v, ParallelMath::MakeFloatZero()), ParallelMath::MakeFloat(-30.0f), ParallelMath::MakeFloat(30.0f));
+                return (v * 32.0f + offset) / 31.0f;
+            }
+            else
+                return (v * 64.0f + 30.0f) / 31.0f;
+        }
+
+        ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v)
+        {
+#ifdef CVTT_ENABLE_ASSERTS
+            for (int i = 0; i < ParallelMath::ParallelSize; i++)
+                assert(ParallelMath::Extract(v, i) != -32768)
+#endif
+
+                ParallelMath::Int16CompFlag negative = ParallelMath::Less(v, ParallelMath::MakeSInt16(0));
+            ParallelMath::UInt15 absComp = ParallelMath::LosslessCast<ParallelMath::UInt15>::Cast(ParallelMath::Select(negative, ParallelMath::SInt16(ParallelMath::MakeSInt16(0) - v), v));
+
+            ParallelMath::UInt31 multiplied = ParallelMath::XMultiply(absComp, ParallelMath::MakeUInt15(31));
+            ParallelMath::UInt31 shifted = ParallelMath::RightShift(multiplied, 5);
+            ParallelMath::UInt15 absCompScaled = ParallelMath::ToUInt15(shifted);
+            ParallelMath::SInt16 signBits = ParallelMath::SelectOrZero(negative, ParallelMath::MakeSInt16(-32768));
+
+            return ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(absCompScaled) | signBits;
+        }
+
+        ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v)
+        {
+            return ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(v, ParallelMath::MakeUInt15(31)), 6));
+        }
+
+        void UnscaleHDREndpoints(const ParallelMath::AInt16 inEP[2][3], ParallelMath::AInt16 outEP[2][3], bool isSigned)
+        {
+            for (int epi = 0; epi < 2; epi++)
+            {
+                for (int ch = 0; ch < 3; ch++)
+                {
+                    if (isSigned)
+                        outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueSigned(ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(inEP[epi][ch])));
+                    else
+                        outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::LosslessCast<ParallelMath::UInt16>::Cast(inEP[epi][ch])));
+                }
+            }
+        }
+
+        struct SinglePlaneTemporaries
+        {
+            UnfinishedEndpoints<3> unfinishedRGB[BC7Data::g_numShapesAll];
+            UnfinishedEndpoints<4> unfinishedRGBA[BC7Data::g_numShapes12];
+
+            ParallelMath::UInt15 fragmentBestIndexes[BC7Data::g_numFragments];
+            ParallelMath::UInt15 shapeBestEP[BC7Data::g_numShapesAll][2][4];
+            ParallelMath::Float shapeBestError[BC7Data::g_numShapesAll];
+        };
+    }
+}
+
+void cvtt::Internal::BC7Computer::TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2])
+{
+    ParallelMath::RoundTowardNearestForScope roundingMode;
+
+    float tf[2];
+    Util::ComputeTweakFactors(tweak, range, tf);
+
+    MFloat base = ParallelMath::ToFloat(original[0]);
+    MFloat offs = ParallelMath::ToFloat(original[1]) - base;
+
+    result[0] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[0], 0.0f, 255.0f), &roundingMode);
+    result[1] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[1], 0.0f, 255.0f), &roundingMode);
+}
+
+void cvtt::Internal::BC7Computer::Quantize(MUInt15* color, int bits, int channels)
+{
+    for (int ch = 0; ch < channels; ch++)
+        color[ch] = ParallelMath::RightShift(((color[ch] << bits) - color[ch]) + ParallelMath::MakeUInt15(127 + (1 << (7 - bits))), 8);
+}
+
+void cvtt::Internal::BC7Computer::QuantizeP(MUInt15* color, int bits, uint16_t p, int channels)
+{
+    int16_t addend;
+    if (p)
+        addend = ((1 << (8 - bits)) - 1);
+    else
+        addend = 255;
+
+    for (int ch = 0; ch < channels; ch++)
+    {
+        MUInt16 ch16 = ParallelMath::LosslessCast<MUInt16>::Cast(color[ch]);
+        ch16 = ParallelMath::RightShift((ch16 << (bits + 1)) - ch16 + addend, 9);
+        ch16 = (ch16 << 1) | ParallelMath::MakeUInt16(p);
+        color[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ch16);
+    }
+}
+
+void cvtt::Internal::BC7Computer::Unquantize(MUInt15* color, int bits, int channels)
+{
+    for (int ch = 0; ch < channels; ch++)
+    {
+        MUInt15 clr = color[ch];
+        clr = clr << (8 - bits);
+        color[ch] = clr | ParallelMath::RightShift(clr, bits);
+    }
+}
+
+void cvtt::Internal::BC7Computer::CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2])
+{
+    for (int j = 0; j < 2; j++)
+    {
+        QuantizeP(ep[j], 4, p[j], 3);
+        Unquantize(ep[j], 5, 3);
+        ep[j][3] = ParallelMath::MakeUInt15(255);
+    }
+}
+
+void cvtt::Internal::BC7Computer::CompressEndpoints1(MUInt15 ep[2][4], uint16_t p)
+{
+    for (int j = 0; j < 2; j++)
+    {
+        QuantizeP(ep[j], 6, p, 3);
+        Unquantize(ep[j], 7, 3);
+        ep[j][3] = ParallelMath::MakeUInt15(255);
+    }
+}
+
+void cvtt::Internal::BC7Computer::CompressEndpoints2(MUInt15 ep[2][4])
+{
+    for (int j = 0; j < 2; j++)
+    {
+        Quantize(ep[j], 5, 3);
+        Unquantize(ep[j], 5, 3);
+        ep[j][3] = ParallelMath::MakeUInt15(255);
+    }
+}
+
+void cvtt::Internal::BC7Computer::CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2])
+{
+    for (int j = 0; j < 2; j++)
+    {
+        QuantizeP(ep[j], 7, p[j], 3);
+        ep[j][3] = ParallelMath::MakeUInt15(255);
+    }
+}
+
+void cvtt::Internal::BC7Computer::CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2])
+{
+    for (int j = 0; j < 2; j++)
+    {
+        Quantize(epRGB[j], 5, 3);
+        Unquantize(epRGB[j], 5, 3);
+
+        Quantize(epA + j, 6, 1);
+        Unquantize(epA + j, 6, 1);
+    }
+}
+
+void cvtt::Internal::BC7Computer::CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2])
+{
+    for (int j = 0; j < 2; j++)
+    {
+        Quantize(epRGB[j], 7, 3);
+        Unquantize(epRGB[j], 7, 3);
+    }
+
+    // Alpha is full precision
+    (void)epA;
+}
+
+void cvtt::Internal::BC7Computer::CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2])
+{
+    for (int j = 0; j < 2; j++)
+        QuantizeP(ep[j], 7, p[j], 4);
+}
+
+void cvtt::Internal::BC7Computer::CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2])
+{
+    for (int j = 0; j < 2; j++)
+    {
+        QuantizeP(ep[j], 5, p[j], 4);
+        Unquantize(ep[j], 6, 4);
+    }
+}
+
+void cvtt::Internal::BC7Computer::TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn)
+{
+    MFloat bestAverageError = ParallelMath::MakeFloat(FLT_MAX);
+
+    MUInt15 intAverage[4];
+    for (int ch = 0; ch < 4; ch++)
+        intAverage[ch] = ParallelMath::RoundAndConvertToU15(average[ch], rtn);
+
+    MUInt15 eps[2][4];
+    MUInt15 reconstructed[4];
+    MUInt15 index = ParallelMath::MakeUInt15(0);
+
+    for (int epi = 0; epi < 2; epi++)
+    {
+        for (int ch = 0; ch < 3; ch++)
+            eps[epi][ch] = ParallelMath::MakeUInt15(0);
+        eps[epi][3] = ParallelMath::MakeUInt15(255);
+    }
+
+    for (int ch = 0; ch < 3; ch++)
+        reconstructed[ch] = ParallelMath::MakeUInt15(0);
+    reconstructed[3] = ParallelMath::MakeUInt15(255);
+
+    // Depending on the target index and parity bits, there are multiple valid solid colors.
+    // We want to find the one closest to the actual average.
+    MFloat epsAverageDiff = ParallelMath::MakeFloat(FLT_MAX);
+    for (int t = 0; t < numTables; t++)
+    {
+        const cvtt::Tables::BC7SC::Table& table = *(tables[t]);
+
+        ParallelMath::Int16CompFlag pti = punchThroughInvalid[table.m_pBits];
+
+        MUInt15 candidateReconstructed[4];
+        MUInt15 candidateEPs[2][4];
+
+        for (int i = 0; i < ParallelMath::ParallelSize; i++)
+        {
+            for (int ch = 0; ch < numRealChannels; ch++)
+            {
+                ParallelMath::ScalarUInt16 avgValue = ParallelMath::Extract(intAverage[ch], i);
+                assert(avgValue >= 0 && avgValue <= 255);
+
+                const cvtt::Tables::BC7SC::TableEntry &entry = table.m_entries[avgValue];
+
+                ParallelMath::PutUInt15(candidateEPs[0][ch], i, entry.m_min);
+                ParallelMath::PutUInt15(candidateEPs[1][ch], i, entry.m_max);
+                ParallelMath::PutUInt15(candidateReconstructed[ch], i, entry.m_actualColor);
+            }
+        }
+
+        MFloat avgError = ParallelMath::MakeFloatZero();
+        for (int ch = 0; ch < numRealChannels; ch++)
+        {
+            MFloat delta = ParallelMath::ToFloat(candidateReconstructed[ch]) - average[ch];
+            avgError = avgError + delta * delta * channelWeightsSq[ch];
+        }
+
+        ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(avgError, bestAverageError));
+        better = ParallelMath::AndNot(pti, better); // Mask out punch-through invalidations
+
+        if (ParallelMath::AnySet(better))
+        {
+            ParallelMath::ConditionalSet(bestAverageError, ParallelMath::Int16FlagToFloat(better), avgError);
+
+            MUInt15 candidateIndex = ParallelMath::MakeUInt15(table.m_index);
+
+            ParallelMath::ConditionalSet(index, better, candidateIndex);
+
+            for (int ch = 0; ch < numRealChannels; ch++)
+                ParallelMath::ConditionalSet(reconstructed[ch], better, candidateReconstructed[ch]);
+
+            for (int epi = 0; epi < 2; epi++)
+                for (int ch = 0; ch < numRealChannels; ch++)
+                    ParallelMath::ConditionalSet(eps[epi][ch], better, candidateEPs[epi][ch]);
+        }
+    }
+
+    AggregatedError<4> aggError;
+    for (int pxi = 0; pxi < shapeLength; pxi++)
+    {
+        int px = fragmentStart[pxi];
+
+        BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError);
+    }
+
+    MFloat error = aggError.Finalize(flags, channelWeightsSq) + staticAlphaError;
+
+    ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, shapeBestError));
+    if (ParallelMath::AnySet(better))
+    {
+        shapeBestError = ParallelMath::Min(shapeBestError, error);
+        for (int epi = 0; epi < 2; epi++)
+        {
+            for (int ch = 0; ch < numRealChannels; ch++)
+                ParallelMath::ConditionalSet(shapeBestEP[epi][ch], better, eps[epi][ch]);
+        }
+
+        for (int pxi = 0; pxi < shapeLength; pxi++)
+            ParallelMath::ConditionalSet(fragmentBestIndexes[pxi], better, index);
+    }
+}
+
+void cvtt::Internal::BC7Computer::TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn)
+{
+    if (numRefineRounds < 1)
+        numRefineRounds = 1;
+
+    float channelWeightsSq[4];
+
+    for (int ch = 0; ch < 4; ch++)
+        channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
+
+    SinglePlaneTemporaries temps;
+
+    MUInt15 maxAlpha = ParallelMath::MakeUInt15(0);
+    MUInt15 minAlpha = ParallelMath::MakeUInt15(255);
+    ParallelMath::Int16CompFlag isPunchThrough = ParallelMath::MakeBoolInt16(true);
+    for (int px = 0; px < 16; px++)
+    {
+        MUInt15 a = pixels[px][3];
+        maxAlpha = ParallelMath::Max(maxAlpha, a);
+        minAlpha = ParallelMath::Min(minAlpha, a);
+
+        isPunchThrough = (isPunchThrough & (ParallelMath::Equal(a, ParallelMath::MakeUInt15(0)) | ParallelMath::Equal(a, ParallelMath::MakeUInt15(255))));
+    }
+
+    ParallelMath::Int16CompFlag blockHasNonMaxAlpha = ParallelMath::Less(minAlpha, ParallelMath::MakeUInt15(255));
+    ParallelMath::Int16CompFlag blockHasNonZeroAlpha = ParallelMath::Less(ParallelMath::MakeUInt15(0), maxAlpha);
+
+    bool anyBlockHasAlpha = ParallelMath::AnySet(blockHasNonMaxAlpha);
+
+    // Try RGB modes if any block has a min alpha 251 or higher
+    bool allowRGBModes = ParallelMath::AnySet(ParallelMath::Less(ParallelMath::MakeUInt15(250), minAlpha));
+
+    // Try mode 7 if any block has alpha.
+    // Mode 7 is almost never selected for RGB blocks because mode 4 has very accurate 7.7.7.1 endpoints
+    // and its parity bit doesn't affect alpha, meaning mode 7 can only be better in extremely specific
+    // situations, and only by at most 1 unit of error per pixel.
+    bool allowMode7 = anyBlockHasAlpha || (encodingPlan.mode7RGBPartitionEnabled != 0);
+
+    MFloat preWeightedPixels[16][4];
+
+    BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights);
+
+    // Get initial RGB endpoints
+    if (allowRGBModes)
+    {
+        const uint8_t *shapeList = encodingPlan.rgbShapeList;
+        int numShapesToEvaluate = encodingPlan.rgbNumShapesToEvaluate;
+
+        for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++)
+        {
+            int shape = shapeList[shapeIter];
+
+            int shapeStart = BC7Data::g_shapeRanges[shape][0];
+            int shapeSize = BC7Data::g_shapeRanges[shape][1];
+
+            EndpointSelector<3, 8> epSelector;
+
+            for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
+            {
+                for (int spx = 0; spx < shapeSize; spx++)
+                {
+                    int px = BC7Data::g_fragments[shapeStart + spx];
+                    epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f));
+                }
+                epSelector.FinishPass(epPass);
+            }
+            temps.unfinishedRGB[shape] = epSelector.GetEndpoints(channelWeights);
+        }
+    }
+
+    // Get initial RGBA endpoints
+    {
+        const uint8_t *shapeList = encodingPlan.rgbaShapeList;
+        int numShapesToEvaluate = encodingPlan.rgbaNumShapesToEvaluate;
+
+        for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++)
+        {
+            int shape = shapeList[shapeIter];
+
+            if (anyBlockHasAlpha || !allowRGBModes)
+            {
+                int shapeStart = BC7Data::g_shapeRanges[shape][0];
+                int shapeSize = BC7Data::g_shapeRanges[shape][1];
+
+                EndpointSelector<4, 8> epSelector;
+
+                for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
+                {
+                    for (int spx = 0; spx < shapeSize; spx++)
+                    {
+                        int px = BC7Data::g_fragments[shapeStart + spx];
+                        epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f));
+                    }
+                    epSelector.FinishPass(epPass);
+                }
+                temps.unfinishedRGBA[shape] = epSelector.GetEndpoints(channelWeights);
+            }
+            else
+            {
+                temps.unfinishedRGBA[shape] = temps.unfinishedRGB[shape].ExpandTo<4>(255);
+            }
+        }
+    }
+
+    for (uint16_t mode = 0; mode <= 7; mode++)
+    {
+        if (mode == 4 || mode == 5)
+            continue;
+
+        if (mode < 4 && !allowRGBModes)
+            continue;
+
+        if (mode == 7 && !allowMode7)
+            continue;
+
+        uint64_t partitionEnabledBits = 0;
+        switch (mode)
+        {
+        case 0:
+            partitionEnabledBits = encodingPlan.mode0PartitionEnabled;
+            break;
+        case 1:
+            partitionEnabledBits = encodingPlan.mode1PartitionEnabled;
+            break;
+        case 2:
+            partitionEnabledBits = encodingPlan.mode2PartitionEnabled;
+            break;
+        case 3:
+            partitionEnabledBits = encodingPlan.mode3PartitionEnabled;
+            break;
+        case 6:
+            partitionEnabledBits = encodingPlan.mode6Enabled ? 1 : 0;
+            break;
+        case 7:
+            if (anyBlockHasAlpha)
+                partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled;
+            else
+                partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled;
+            break;
+        default:
+            break;
+        }
+
+        bool isRGB = (mode < 4);
+
+        unsigned int numPartitions = 1 << BC7Data::g_modes[mode].m_partitionBits;
+        int numSubsets = BC7Data::g_modes[mode].m_numSubsets;
+        int indexPrec = BC7Data::g_modes[mode].m_indexBits;
+
+        int parityBitMax = 1;
+        if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerEndpoint)
+            parityBitMax = 4;
+        else if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerSubset)
+            parityBitMax = 2;
+
+        int numRealChannels = isRGB ? 3 : 4;
+
+        int numShapes;
+        const int *shapeList;
+
+        if (numSubsets == 1)
+        {
+            numShapes = BC7Data::g_numShapes1;
+            shapeList = BC7Data::g_shapeList1;
+        }
+        else if (numSubsets == 2)
+        {
+            numShapes = BC7Data::g_numShapes2;
+            shapeList = BC7Data::g_shapeList2;
+        }
+        else
+        {
+            assert(numSubsets == 3);
+            if (numPartitions == 16)
+            {
+                numShapes = BC7Data::g_numShapes3Short;
+                shapeList = BC7Data::g_shapeList3Short;
+            }
+            else
+            {
+                assert(numPartitions == 64);
+                numShapes = BC7Data::g_numShapes3;
+                shapeList = BC7Data::g_shapeList3;
+            }
+        }
+
+        for (int slot = 0; slot < BC7Data::g_numShapesAll; slot++)
+            temps.shapeBestError[slot] = ParallelMath::MakeFloat(FLT_MAX);
+
+        for (int shapeIter = 0; shapeIter < numShapes; shapeIter++)
+        {
+            int shape = shapeList[shapeIter];
+
+            int numTweakRounds = 0;
+            if (isRGB)
+                numTweakRounds = encodingPlan.seedPointsForShapeRGB[shape];
+            else
+                numTweakRounds = encodingPlan.seedPointsForShapeRGBA[shape];
+
+            if (numTweakRounds == 0)
+                continue;
+
+            if (numTweakRounds > MaxTweakRounds)
+                numTweakRounds = MaxTweakRounds;
+
+            int shapeStart = BC7Data::g_shapeRanges[shape][0];
+            int shapeLength = BC7Data::g_shapeRanges[shape][1];
+
+            AggregatedError<1> alphaAggError;
+            if (isRGB && anyBlockHasAlpha)
+            {
+                MUInt15 filledAlpha[1] = { ParallelMath::MakeUInt15(255) };
+
+                for (int pxi = 0; pxi < shapeLength; pxi++)
+                {
+                    int px = BC7Data::g_fragments[shapeStart + pxi];
+                    MUInt15 original[1] = { pixels[px][3] };
+                    BCCommon::ComputeErrorLDR<1>(flags, filledAlpha, original, alphaAggError);
+                }
+            }
+
+            float alphaWeightsSq[1] = { channelWeightsSq[3] };
+            MFloat staticAlphaError = alphaAggError.Finalize(flags, alphaWeightsSq);
+
+            MUInt15 tweakBaseEP[MaxTweakRounds][2][4];
+
+            for (int tweak = 0; tweak < numTweakRounds; tweak++)
+            {
+                if (isRGB)
+                {
+                    temps.unfinishedRGB[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]);
+                    tweakBaseEP[tweak][0][3] = tweakBaseEP[tweak][1][3] = ParallelMath::MakeUInt15(255);
+                }
+                else
+                {
+                    temps.unfinishedRGBA[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]);
+                }
+            }
+
+            ParallelMath::Int16CompFlag punchThroughInvalid[4];
+            for (int pIter = 0; pIter < parityBitMax; pIter++)
+            {
+                punchThroughInvalid[pIter] = ParallelMath::MakeBoolInt16(false);
+
+                if ((flags & Flags::BC7_RespectPunchThrough) && (mode == 6 || mode == 7))
+                {
+                    // Modes 6 and 7 have parity bits that affect alpha
+                    if (pIter == 0)
+                        punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonZeroAlpha);
+                    else if (pIter == parityBitMax - 1)
+                        punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonMaxAlpha);
+                    else
+                        punchThroughInvalid[pIter] = isPunchThrough;
+                }
+            }
+
+            for (int pIter = 0; pIter < parityBitMax; pIter++)
+            {
+                if (ParallelMath::AllSet(punchThroughInvalid[pIter]))
+                    continue;
+
+                bool needPunchThroughCheck = ParallelMath::AnySet(punchThroughInvalid[pIter]);
+
+                for (int tweak = 0; tweak < numTweakRounds; tweak++)
+                {
+                    uint16_t p[2];
+                    p[0] = (pIter & 1);
+                    p[1] = ((pIter >> 1) & 1);
+
+                    MUInt15 ep[2][4];
+
+                    for (int epi = 0; epi < 2; epi++)
+                        for (int ch = 0; ch < 4; ch++)
+                            ep[epi][ch] = tweakBaseEP[tweak][epi][ch];
+
+                    for (int refine = 0; refine < numRefineRounds; refine++)
+                    {
+                        switch (mode)
+                        {
+                        case 0:
+                            CompressEndpoints0(ep, p);
+                            break;
+                        case 1:
+                            CompressEndpoints1(ep, p[0]);
+                            break;
+                        case 2:
+                            CompressEndpoints2(ep);
+                            break;
+                        case 3:
+                            CompressEndpoints3(ep, p);
+                            break;
+                        case 6:
+                            CompressEndpoints6(ep, p);
+                            break;
+                        case 7:
+                            CompressEndpoints7(ep, p);
+                            break;
+                        default:
+                            assert(false);
+                            break;
+                        };
+
+                        MFloat shapeError = ParallelMath::MakeFloatZero();
+
+                        IndexSelector<4> indexSelector;
+                        indexSelector.Init<false>(channelWeights, ep, 1 << indexPrec);
+
+                        EndpointRefiner<4> epRefiner;
+                        epRefiner.Init(1 << indexPrec, channelWeights);
+
+                        MUInt15 indexes[16];
+
+                        AggregatedError<4> aggError;
+                        for (int pxi = 0; pxi < shapeLength; pxi++)
+                        {
+                            int px = BC7Data::g_fragments[shapeStart + pxi];
+
+                            MUInt15 index;
+                            MUInt15 reconstructed[4];
+
+                            index = indexSelector.SelectIndexLDR(floatPixels[px], rtn);
+                            indexSelector.ReconstructLDR_BC7(index, reconstructed, numRealChannels);
+
+                            if (flags & cvtt::Flags::BC7_FastIndexing)
+                                BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError);
+                            else
+                            {
+                                MFloat error = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq);
+
+                                MUInt15 altIndexes[2];
+                                altIndexes[0] = ParallelMath::Max(index, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
+                                altIndexes[1] = ParallelMath::Min(index + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << indexPrec) - 1)));
+
+                                for (int ii = 0; ii < 2; ii++)
+                                {
+                                    indexSelector.ReconstructLDR_BC7(altIndexes[ii], reconstructed, numRealChannels);
+
+                                    MFloat altError = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq);
+                                    ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altError, error));
+                                    error = ParallelMath::Min(error, altError);
+                                    ParallelMath::ConditionalSet(index, better, altIndexes[ii]);
+                                }
+
+                                shapeError = shapeError + error;
+                            }
+
+                            if (refine != numRefineRounds - 1)
+                                epRefiner.ContributeUnweightedPW(preWeightedPixels[px], index, numRealChannels);
+
+                            indexes[pxi] = index;
+                        }
+
+                        if (flags & cvtt::Flags::BC7_FastIndexing)
+                            shapeError = aggError.Finalize(flags, channelWeightsSq);
+
+                        if (isRGB)
+                            shapeError = shapeError + staticAlphaError;
+
+                        ParallelMath::FloatCompFlag shapeErrorBetter;
+                        ParallelMath::Int16CompFlag shapeErrorBetter16;
+
+                        shapeErrorBetter = ParallelMath::Less(shapeError, temps.shapeBestError[shape]);
+                        shapeErrorBetter16 = ParallelMath::FloatFlagToInt16(shapeErrorBetter);
+
+                        if (ParallelMath::AnySet(shapeErrorBetter16))
+                        {
+                            bool punchThroughOK = true;
+                            if (needPunchThroughCheck)
+                            {
+                                shapeErrorBetter16 = ParallelMath::AndNot(punchThroughInvalid[pIter], shapeErrorBetter16);
+                                shapeErrorBetter = ParallelMath::Int16FlagToFloat(shapeErrorBetter16);
+
+                                if (!ParallelMath::AnySet(shapeErrorBetter16))
+                                    punchThroughOK = false;
+                            }
+
+                            if (punchThroughOK)
+                            {
+                                ParallelMath::ConditionalSet(temps.shapeBestError[shape], shapeErrorBetter, shapeError);
+                                for (int epi = 0; epi < 2; epi++)
+                                    for (int ch = 0; ch < numRealChannels; ch++)
+                                        ParallelMath::ConditionalSet(temps.shapeBestEP[shape][epi][ch], shapeErrorBetter16, ep[epi][ch]);
+
+                                for (int pxi = 0; pxi < shapeLength; pxi++)
+                                    ParallelMath::ConditionalSet(temps.fragmentBestIndexes[shapeStart + pxi], shapeErrorBetter16, indexes[pxi]);
+                            }
+                        }
+
+                        if (refine != numRefineRounds - 1)
+                            epRefiner.GetRefinedEndpointsLDR(ep, numRealChannels, rtn);
+                    } // refine
+                } // tweak
+            } // p
+
+            if (flags & cvtt::Flags::BC7_TrySingleColor)
+            {
+                MUInt15 total[4];
+                for (int ch = 0; ch < 4; ch++)
+                    total[ch] = ParallelMath::MakeUInt15(0);
+
+                for (int pxi = 0; pxi < shapeLength; pxi++)
+                {
+                    int px = BC7Data::g_fragments[shapeStart + pxi];
+                    for (int ch = 0; ch < 4; ch++)
+                        total[ch] = total[ch] + pixels[pxi][ch];
+                }
+
+                MFloat rcpShapeLength = ParallelMath::MakeFloat(1.0f / static_cast<float>(shapeLength));
+                MFloat average[4];
+                for (int ch = 0; ch < 4; ch++)
+                    average[ch] = ParallelMath::ToFloat(total[ch]) * rcpShapeLength;
+
+                const uint8_t *fragment = BC7Data::g_fragments + shapeStart;
+                MFloat &shapeBestError = temps.shapeBestError[shape];
+                MUInt15 (&shapeBestEP)[2][4] = temps.shapeBestEP[shape];
+                MUInt15 *fragmentBestIndexes = temps.fragmentBestIndexes + shapeStart;
+
+                const cvtt::Tables::BC7SC::Table **scTables = NULL;
+                int numSCTables = 0;
+
+                const cvtt::Tables::BC7SC::Table *tables0[] =
+                {
+                    &cvtt::Tables::BC7SC::g_mode0_p00_i1,
+                    &cvtt::Tables::BC7SC::g_mode0_p00_i2,
+                    &cvtt::Tables::BC7SC::g_mode0_p00_i3,
+                    &cvtt::Tables::BC7SC::g_mode0_p01_i1,
+                    &cvtt::Tables::BC7SC::g_mode0_p01_i2,
+                    &cvtt::Tables::BC7SC::g_mode0_p01_i3,
+                    &cvtt::Tables::BC7SC::g_mode0_p10_i1,
+                    &cvtt::Tables::BC7SC::g_mode0_p10_i2,
+                    &cvtt::Tables::BC7SC::g_mode0_p10_i3,
+                    &cvtt::Tables::BC7SC::g_mode0_p11_i1,
+                    &cvtt::Tables::BC7SC::g_mode0_p11_i2,
+                    &cvtt::Tables::BC7SC::g_mode0_p11_i3,
+                };
+
+                const cvtt::Tables::BC7SC::Table *tables1[] =
+                {
+                    &cvtt::Tables::BC7SC::g_mode1_p0_i1,
+                    &cvtt::Tables::BC7SC::g_mode1_p0_i2,
+                    &cvtt::Tables::BC7SC::g_mode1_p0_i3,
+                    &cvtt::Tables::BC7SC::g_mode1_p1_i1,
+                    &cvtt::Tables::BC7SC::g_mode1_p1_i2,
+                    &cvtt::Tables::BC7SC::g_mode1_p1_i3,
+                };
+
+                const cvtt::Tables::BC7SC::Table *tables2[] =
+                {
+                    &cvtt::Tables::BC7SC::g_mode2,
+                };
+
+                const cvtt::Tables::BC7SC::Table *tables3[] =
+                {
+                    &cvtt::Tables::BC7SC::g_mode3_p0,
+                    &cvtt::Tables::BC7SC::g_mode3_p1,
+                };
+
+                const cvtt::Tables::BC7SC::Table *tables6[] =
+                {
+                    &cvtt::Tables::BC7SC::g_mode6_p0_i1,
+                    &cvtt::Tables::BC7SC::g_mode6_p0_i2,
+                    &cvtt::Tables::BC7SC::g_mode6_p0_i3,
+                    &cvtt::Tables::BC7SC::g_mode6_p0_i4,
+                    &cvtt::Tables::BC7SC::g_mode6_p0_i5,
+                    &cvtt::Tables::BC7SC::g_mode6_p0_i6,
+                    &cvtt::Tables::BC7SC::g_mode6_p0_i7,
+                    &cvtt::Tables::BC7SC::g_mode6_p1_i1,
+                    &cvtt::Tables::BC7SC::g_mode6_p1_i2,
+                    &cvtt::Tables::BC7SC::g_mode6_p1_i3,
+                    &cvtt::Tables::BC7SC::g_mode6_p1_i4,
+                    &cvtt::Tables::BC7SC::g_mode6_p1_i5,
+                    &cvtt::Tables::BC7SC::g_mode6_p1_i6,
+                    &cvtt::Tables::BC7SC::g_mode6_p1_i7,
+                };
+
+                const cvtt::Tables::BC7SC::Table *tables7[] =
+                {
+                    &cvtt::Tables::BC7SC::g_mode7_p00,
+                    &cvtt::Tables::BC7SC::g_mode7_p01,
+                    &cvtt::Tables::BC7SC::g_mode7_p10,
+                    &cvtt::Tables::BC7SC::g_mode7_p11,
+                };
+
+                switch (mode)
+                {
+                case 0:
+                {
+                    scTables = tables0;
+                    numSCTables = sizeof(tables0) / sizeof(tables0[0]);
+                }
+                break;
+                case 1:
+                {
+                    scTables = tables1;
+                    numSCTables = sizeof(tables1) / sizeof(tables1[0]);
+                }
+                break;
+                case 2:
+                {
+
+                    scTables = tables2;
+                    numSCTables = sizeof(tables2) / sizeof(tables2[0]);
+                }
+                break;
+                case 3:
+                {
+                    scTables = tables3;
+                    numSCTables = sizeof(tables3) / sizeof(tables3[0]);
+                }
+                break;
+                case 6:
+                {
+                    scTables = tables6;
+                    numSCTables = sizeof(tables6) / sizeof(tables6[0]);
+                }
+                break;
+                case 7:
+                {
+                    scTables = tables7;
+                    numSCTables = sizeof(tables7) / sizeof(tables7[0]);
+                }
+                break;
+                default:
+                    assert(false);
+                    break;
+                }
+
+                TrySingleColorRGBAMultiTable(flags, pixels, average, numRealChannels, fragment, shapeLength, staticAlphaError, punchThroughInvalid, shapeBestError, shapeBestEP, fragmentBestIndexes, channelWeightsSq, scTables, numSCTables, rtn);
+            }
+        } // shapeIter
+
+        uint64_t partitionsEnabledBits = 0xffffffffffffffffULL;
+
+        switch (mode)
+        {
+        case 0:
+            partitionsEnabledBits = encodingPlan.mode0PartitionEnabled;
+            break;
+        case 1:
+            partitionsEnabledBits = encodingPlan.mode1PartitionEnabled;
+            break;
+        case 2:
+            partitionsEnabledBits = encodingPlan.mode2PartitionEnabled;
+            break;
+        case 3:
+            partitionsEnabledBits = encodingPlan.mode3PartitionEnabled;
+            break;
+        case 6:
+            partitionsEnabledBits = encodingPlan.mode6Enabled ? 1 : 0;
+            break;
+        case 7:
+            if (anyBlockHasAlpha)
+                partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled;
+            else
+                partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled;
+            break;
+        default:
+            break;
+        };
+
+        for (uint16_t partition = 0; partition < numPartitions; partition++)
+        {
+            if (((partitionsEnabledBits >> partition) & 1) == 0)
+                continue;
+
+            const int *partitionShapes;
+            if (numSubsets == 1)
+                partitionShapes = BC7Data::g_shapes1[partition];
+            else if (numSubsets == 2)
+                partitionShapes = BC7Data::g_shapes2[partition];
+            else
+            {
+                assert(numSubsets == 3);
+                partitionShapes = BC7Data::g_shapes3[partition];
+            }
+
+            MFloat totalError = ParallelMath::MakeFloatZero();
+            for (int subset = 0; subset < numSubsets; subset++)
+                totalError = totalError + temps.shapeBestError[partitionShapes[subset]];
+
+            ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(totalError, work.m_error);
+            ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
+
+            if (mode == 7 && anyBlockHasAlpha)
+            {
+                // Some lanes could be better, but we filter them out to ensure consistency with scalar
+                bool isRGBAllowedForThisPartition = (((encodingPlan.mode7RGBPartitionEnabled >> partition) & 1) != 0);
+
+                if (!isRGBAllowedForThisPartition)
+                {
+                    errorBetter16 = (errorBetter16 & blockHasNonMaxAlpha);
+                    errorBetter = ParallelMath::Int16FlagToFloat(errorBetter16);
+                }
+            }
+
+            if (ParallelMath::AnySet(errorBetter16))
+            {
+                for (int subset = 0; subset < numSubsets; subset++)
+                {
+                    int shape = partitionShapes[subset];
+                    int shapeStart = BC7Data::g_shapeRanges[shape][0];
+                    int shapeLength = BC7Data::g_shapeRanges[shape][1];
+
+                    for (int epi = 0; epi < 2; epi++)
+                        for (int ch = 0; ch < 4; ch++)
+                            ParallelMath::ConditionalSet(work.m_ep[subset][epi][ch], errorBetter16, temps.shapeBestEP[shape][epi][ch]);
+
+                    for (int pxi = 0; pxi < shapeLength; pxi++)
+                    {
+                        int px = BC7Data::g_fragments[shapeStart + pxi];
+                        ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, temps.fragmentBestIndexes[shapeStart + pxi]);
+                    }
+                }
+
+                ParallelMath::ConditionalSet(work.m_error, errorBetter, totalError);
+                ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode));
+                ParallelMath::ConditionalSet(work.m_u.m_partition, errorBetter16, ParallelMath::MakeUInt15(partition));
+            }
+        }
+    }
+}
+
+void cvtt::Internal::BC7Computer::TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn)
+{
+    // TODO: These error calculations are not optimal for weight-by-alpha, but this routine needs to be mostly rewritten for that.
+    // The alpha/color solutions are co-dependent in that case, but a good way to solve it would probably be to
+    // solve the alpha channel first, then solve the RGB channels, which in turn breaks down into two cases:
+    // - Separate alpha channel, then weighted RGB
+    // - Alpha+2 other channels, then the independent channel
+    if (numRefineRounds < 1)
+        numRefineRounds = 1;
+
+    float channelWeightsSq[4];
+    for (int ch = 0; ch < 4; ch++)
+        channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
+
+    for (uint16_t mode = 4; mode <= 5; mode++)
+    {
+        int numSP[2] = { 0, 0 };
+
+        for (uint16_t rotation = 0; rotation < 4; rotation++)
+        {
+            if (mode == 4)
+            {
+                numSP[0] = encodingPlan.mode4SP[rotation][0];
+                numSP[1] = encodingPlan.mode4SP[rotation][1];
+            }
+            else
+                numSP[0] = numSP[1] = encodingPlan.mode5SP[rotation];
+
+            if (numSP[0] == 0 && numSP[1] == 0)
+                continue;
+
+            int alphaChannel = (rotation + 3) & 3;
+            int redChannel = (rotation == 1) ? 3 : 0;
+            int greenChannel = (rotation == 2) ? 3 : 1;
+            int blueChannel = (rotation == 3) ? 3 : 2;
+
+            MUInt15 rotatedRGB[16][3];
+            MFloat floatRotatedRGB[16][3];
+
+            for (int px = 0; px < 16; px++)
+            {
+                rotatedRGB[px][0] = pixels[px][redChannel];
+                rotatedRGB[px][1] = pixels[px][greenChannel];
+                rotatedRGB[px][2] = pixels[px][blueChannel];
+
+                for (int ch = 0; ch < 3; ch++)
+                    floatRotatedRGB[px][ch] = ParallelMath::ToFloat(rotatedRGB[px][ch]);
+            }
+
+            uint16_t maxIndexSelector = (mode == 4) ? 2 : 1;
+
+            float rotatedRGBWeights[3] = { channelWeights[redChannel], channelWeights[greenChannel], channelWeights[blueChannel] };
+            float rotatedRGBWeightsSq[3] = { channelWeightsSq[redChannel], channelWeightsSq[greenChannel], channelWeightsSq[blueChannel] };
+            float rotatedAlphaWeight[1] = { channelWeights[alphaChannel] };
+            float rotatedAlphaWeightSq[1] = { channelWeightsSq[alphaChannel] };
+
+            float uniformWeight[1] = { 1.0f };   // Since the alpha channel is independent, there's no need to bother with weights when doing refinement or selection, only error
+
+            MFloat preWeightedRotatedRGB[16][3];
+            BCCommon::PreWeightPixelsLDR<3>(preWeightedRotatedRGB, rotatedRGB, rotatedRGBWeights);
+
+            for (uint16_t indexSelector = 0; indexSelector < maxIndexSelector; indexSelector++)
+            {
+                int numTweakRounds = numSP[indexSelector];
+
+                if (numTweakRounds <= 0)
+                    continue;
+
+                if (numTweakRounds > MaxTweakRounds)
+                    numTweakRounds = MaxTweakRounds;
+
+                EndpointSelector<3, 8> rgbSelector;
+
+                for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
+                {
+                    for (int px = 0; px < 16; px++)
+                        rgbSelector.ContributePass(preWeightedRotatedRGB[px], epPass, ParallelMath::MakeFloat(1.0f));
+
+                    rgbSelector.FinishPass(epPass);
+                }
+
+                MUInt15 alphaRange[2];
+
+                alphaRange[0] = alphaRange[1] = pixels[0][alphaChannel];
+                for (int px = 1; px < 16; px++)
+                {
+                    alphaRange[0] = ParallelMath::Min(pixels[px][alphaChannel], alphaRange[0]);
+                    alphaRange[1] = ParallelMath::Max(pixels[px][alphaChannel], alphaRange[1]);
+                }
+
+                int rgbPrec = 0;
+                int alphaPrec = 0;
+
+                if (mode == 4)
+                {
+                    rgbPrec = indexSelector ? 3 : 2;
+                    alphaPrec = indexSelector ? 2 : 3;
+                }
+                else
+                    rgbPrec = alphaPrec = 2;
+
+                UnfinishedEndpoints<3> unfinishedRGB = rgbSelector.GetEndpoints(rotatedRGBWeights);
+
+                MFloat bestRGBError = ParallelMath::MakeFloat(FLT_MAX);
+                MFloat bestAlphaError = ParallelMath::MakeFloat(FLT_MAX);
+
+                MUInt15 bestRGBIndexes[16];
+                MUInt15 bestAlphaIndexes[16];
+                MUInt15 bestEP[2][4];
+
+                for (int px = 0; px < 16; px++)
+                    bestRGBIndexes[px] = bestAlphaIndexes[px] = ParallelMath::MakeUInt15(0);
+
+                for (int tweak = 0; tweak < numTweakRounds; tweak++)
+                {
+                    MUInt15 rgbEP[2][3];
+                    MUInt15 alphaEP[2];
+
+                    unfinishedRGB.FinishLDR(tweak, 1 << rgbPrec, rgbEP[0], rgbEP[1]);
+
+                    TweakAlpha(alphaRange, tweak, 1 << alphaPrec, alphaEP);
+
+                    for (int refine = 0; refine < numRefineRounds; refine++)
+                    {
+                        if (mode == 4)
+                            CompressEndpoints4(rgbEP, alphaEP);
+                        else
+                            CompressEndpoints5(rgbEP, alphaEP);
+
+
+                        IndexSelector<1> alphaIndexSelector;
+                        IndexSelector<3> rgbIndexSelector;
+
+                        {
+                            MUInt15 alphaEPTemp[2][1] = { { alphaEP[0] },{ alphaEP[1] } };
+                            alphaIndexSelector.Init<false>(uniformWeight, alphaEPTemp, 1 << alphaPrec);
+                        }
+                        rgbIndexSelector.Init<false>(rotatedRGBWeights, rgbEP, 1 << rgbPrec);
+
+                        EndpointRefiner<3> rgbRefiner;
+                        EndpointRefiner<1> alphaRefiner;
+
+                        rgbRefiner.Init(1 << rgbPrec, rotatedRGBWeights);
+                        alphaRefiner.Init(1 << alphaPrec, uniformWeight);
+
+                        MFloat errorRGB = ParallelMath::MakeFloatZero();
+                        MFloat errorA = ParallelMath::MakeFloatZero();
+
+                        MUInt15 rgbIndexes[16];
+                        MUInt15 alphaIndexes[16];
+
+                        AggregatedError<3> rgbAggError;
+                        AggregatedError<1> alphaAggError;
+
+                        for (int px = 0; px < 16; px++)
+                        {
+                            MUInt15 rgbIndex = rgbIndexSelector.SelectIndexLDR(floatRotatedRGB[px], rtn);
+                            MUInt15 alphaIndex = alphaIndexSelector.SelectIndexLDR(floatPixels[px] + alphaChannel, rtn);
+
+                            MUInt15 reconstructedRGB[3];
+                            MUInt15 reconstructedAlpha[1];
+
+                            rgbIndexSelector.ReconstructLDR_BC7(rgbIndex, reconstructedRGB);
+                            alphaIndexSelector.ReconstructLDR_BC7(alphaIndex, reconstructedAlpha);
+
+                            if (flags & cvtt::Flags::BC7_FastIndexing)
+                            {
+                                BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], rgbAggError);
+                                BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, alphaAggError);
+                            }
+                            else
+                            {
+                                AggregatedError<3> baseRGBAggError;
+                                AggregatedError<1> baseAlphaAggError;
+
+                                BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], baseRGBAggError);
+                                BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, baseAlphaAggError);
+
+                                MFloat rgbError = baseRGBAggError.Finalize(flags, rotatedRGBWeightsSq);
+                                MFloat alphaError = baseAlphaAggError.Finalize(flags, rotatedAlphaWeightSq);
+
+                                MUInt15 altRGBIndexes[2];
+                                MUInt15 altAlphaIndexes[2];
+
+                                altRGBIndexes[0] = ParallelMath::Max(rgbIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
+                                altRGBIndexes[1] = ParallelMath::Min(rgbIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << rgbPrec) - 1)));
+
+                                altAlphaIndexes[0] = ParallelMath::Max(alphaIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
+                                altAlphaIndexes[1] = ParallelMath::Min(alphaIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << alphaPrec) - 1)));
+
+                                for (int ii = 0; ii < 2; ii++)
+                                {
+                                    rgbIndexSelector.ReconstructLDR_BC7(altRGBIndexes[ii], reconstructedRGB);
+                                    alphaIndexSelector.ReconstructLDR_BC7(altAlphaIndexes[ii], reconstructedAlpha);
+
+                                    AggregatedError<3> altRGBAggError;
+                                    AggregatedError<1> altAlphaAggError;
+
+                                    BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], altRGBAggError);
+                                    BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, altAlphaAggError);
+
+                                    MFloat altRGBError = altRGBAggError.Finalize(flags, rotatedRGBWeightsSq);
+                                    MFloat altAlphaError = altAlphaAggError.Finalize(flags, rotatedAlphaWeightSq);
+
+                                    ParallelMath::Int16CompFlag rgbBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altRGBError, rgbError));
+                                    ParallelMath::Int16CompFlag alphaBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altAlphaError, alphaError));
+
+                                    rgbError = ParallelMath::Min(altRGBError, rgbError);
+                                    alphaError = ParallelMath::Min(altAlphaError, alphaError);
+
+                                    ParallelMath::ConditionalSet(rgbIndex, rgbBetter, altRGBIndexes[ii]);
+                                    ParallelMath::ConditionalSet(alphaIndex, alphaBetter, altAlphaIndexes[ii]);
+                                }
+
+                                errorRGB = errorRGB + rgbError;
+                                errorA = errorA + alphaError;
+                            }
+
+                            if (refine != numRefineRounds - 1)
+                            {
+                                rgbRefiner.ContributeUnweightedPW(preWeightedRotatedRGB[px], rgbIndex);
+                                alphaRefiner.ContributeUnweightedPW(floatPixels[px] + alphaChannel, alphaIndex);
+                            }
+
+                            if (flags & Flags::BC7_FastIndexing)
+                            {
+                                errorRGB = rgbAggError.Finalize(flags, rotatedRGBWeightsSq);
+                                errorA = alphaAggError.Finalize(flags, rotatedAlphaWeightSq);
+                            }
+
+                            rgbIndexes[px] = rgbIndex;
+                            alphaIndexes[px] = alphaIndex;
+                        }
+
+                        ParallelMath::FloatCompFlag rgbBetter = ParallelMath::Less(errorRGB, bestRGBError);
+                        ParallelMath::FloatCompFlag alphaBetter = ParallelMath::Less(errorA, bestAlphaError);
+
+                        ParallelMath::Int16CompFlag rgbBetterInt16 = ParallelMath::FloatFlagToInt16(rgbBetter);
+                        ParallelMath::Int16CompFlag alphaBetterInt16 = ParallelMath::FloatFlagToInt16(alphaBetter);
+
+                        if (ParallelMath::AnySet(rgbBetterInt16))
+                        {
+                            bestRGBError = ParallelMath::Min(errorRGB, bestRGBError);
+
+                            for (int px = 0; px < 16; px++)
+                                ParallelMath::ConditionalSet(bestRGBIndexes[px], rgbBetterInt16, rgbIndexes[px]);
+
+                            for (int ep = 0; ep < 2; ep++)
+                            {
+                                for (int ch = 0; ch < 3; ch++)
+                                    ParallelMath::ConditionalSet(bestEP[ep][ch], rgbBetterInt16, rgbEP[ep][ch]);
+                            }
+                        }
+
+                        if (ParallelMath::AnySet(alphaBetterInt16))
+                        {
+                            bestAlphaError = ParallelMath::Min(errorA, bestAlphaError);
+
+                            for (int px = 0; px < 16; px++)
+                                ParallelMath::ConditionalSet(bestAlphaIndexes[px], alphaBetterInt16, alphaIndexes[px]);
+
+                            for (int ep = 0; ep < 2; ep++)
+                                ParallelMath::ConditionalSet(bestEP[ep][3], alphaBetterInt16, alphaEP[ep]);
+                        }
+
+                        if (refine != numRefineRounds - 1)
+                        {
+                            rgbRefiner.GetRefinedEndpointsLDR(rgbEP, rtn);
+
+                            MUInt15 alphaEPTemp[2][1];
+                            alphaRefiner.GetRefinedEndpointsLDR(alphaEPTemp, rtn);
+
+                            for (int i = 0; i < 2; i++)
+                                alphaEP[i] = alphaEPTemp[i][0];
+                        }
+                    }	// refine
+                } // tweak
+
+                MFloat combinedError = bestRGBError + bestAlphaError;
+
+                ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, work.m_error);
+                ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
+
+                work.m_error = ParallelMath::Min(combinedError, work.m_error);
+
+                ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode));
+                ParallelMath::ConditionalSet(work.m_u.m_isr.m_rotation, errorBetter16, ParallelMath::MakeUInt15(rotation));
+                ParallelMath::ConditionalSet(work.m_u.m_isr.m_indexSelector, errorBetter16, ParallelMath::MakeUInt15(indexSelector));
+
+                for (int px = 0; px < 16; px++)
+                {
+                    ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, indexSelector ? bestAlphaIndexes[px] : bestRGBIndexes[px]);
+                    ParallelMath::ConditionalSet(work.m_indexes2[px], errorBetter16, indexSelector ? bestRGBIndexes[px] : bestAlphaIndexes[px]);
+                }
+
+                for (int ep = 0; ep < 2; ep++)
+                    for (int ch = 0; ch < 4; ch++)
+                        ParallelMath::ConditionalSet(work.m_ep[0][ep][ch], errorBetter16, bestEP[ep][ch]);
+            }
+        }
+    }
+}
+
+template<class T>
+void cvtt::Internal::BC7Computer::Swap(T& a, T& b)
+{
+    T temp = a;
+    a = b;
+    b = temp;
+}
+
+void cvtt::Internal::BC7Computer::Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds)
+{
+    MUInt15 pixels[16][4];
+    MFloat floatPixels[16][4];
+
+    for (int px = 0; px < 16; px++)
+    {
+        for (int ch = 0; ch < 4; ch++)
+            ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]);
+    }
+
+    for (int px = 0; px < 16; px++)
+    {
+        for (int ch = 0; ch < 4; ch++)
+            floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]);
+    }
+
+    BC67::WorkInfo work;
+    memset(&work, 0, sizeof(work));
+
+    work.m_error = ParallelMath::MakeFloat(FLT_MAX);
+
+    {
+        ParallelMath::RoundTowardNearestForScope rtn;
+        TrySinglePlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn);
+        TryDualPlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn);
+    }
+
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        PackingVector pv;
+        pv.Init();
+
+        ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(work.m_mode, block);
+        ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(work.m_u.m_partition, block);
+        ParallelMath::ScalarUInt16 indexSelector = ParallelMath::Extract(work.m_u.m_isr.m_indexSelector, block);
+
+        const BC7Data::BC7ModeInfo& modeInfo = BC7Data::g_modes[mode];
+
+        ParallelMath::ScalarUInt16 indexes[16];
+        ParallelMath::ScalarUInt16 indexes2[16];
+        ParallelMath::ScalarUInt16 endPoints[3][2][4];
+
+        for (int i = 0; i < 16; i++)
+        {
+            indexes[i] = ParallelMath::Extract(work.m_indexes[i], block);
+            if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
+                indexes2[i] = ParallelMath::Extract(work.m_indexes2[i], block);
+        }
+
+        for (int subset = 0; subset < 3; subset++)
+        {
+            for (int ep = 0; ep < 2; ep++)
+            {
+                for (int ch = 0; ch < 4; ch++)
+                    endPoints[subset][ep][ch] = ParallelMath::Extract(work.m_ep[subset][ep][ch], block);
+            }
+        }
+
+        int fixups[3] = { 0, 0, 0 };
+
+        if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
+        {
+            bool flipRGB = ((indexes[0] & (1 << (modeInfo.m_indexBits - 1))) != 0);
+            bool flipAlpha = ((indexes2[0] & (1 << (modeInfo.m_alphaIndexBits - 1))) != 0);
+
+            if (flipRGB)
+            {
+                uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1;
+                for (int px = 0; px < 16; px++)
+                    indexes[px] = highIndex - indexes[px];
+            }
+
+            if (flipAlpha)
+            {
+                uint16_t highIndex = (1 << modeInfo.m_alphaIndexBits) - 1;
+                for (int px = 0; px < 16; px++)
+                    indexes2[px] = highIndex - indexes2[px];
+            }
+
+            if (indexSelector)
+                Swap(flipRGB, flipAlpha);
+
+            if (flipRGB)
+            {
+                for (int ch = 0; ch < 3; ch++)
+                    Swap(endPoints[0][0][ch], endPoints[0][1][ch]);
+            }
+            if (flipAlpha)
+                Swap(endPoints[0][0][3], endPoints[0][1][3]);
+
+        }
+        else
+        {
+            if (modeInfo.m_numSubsets == 2)
+                fixups[1] = BC7Data::g_fixupIndexes2[partition];
+            else if (modeInfo.m_numSubsets == 3)
+            {
+                fixups[1] = BC7Data::g_fixupIndexes3[partition][0];
+                fixups[2] = BC7Data::g_fixupIndexes3[partition][1];
+            }
+
+            bool flip[3] = { false, false, false };
+            for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+                flip[subset] = ((indexes[fixups[subset]] & (1 << (modeInfo.m_indexBits - 1))) != 0);
+
+            if (flip[0] || flip[1] || flip[2])
+            {
+                uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1;
+                for (int px = 0; px < 16; px++)
+                {
+                    int subset = 0;
+                    if (modeInfo.m_numSubsets == 2)
+                        subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
+                    else if (modeInfo.m_numSubsets == 3)
+                        subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3;
+
+                    if (flip[subset])
+                        indexes[px] = highIndex - indexes[px];
+                }
+
+                int maxCH = (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) ? 4 : 3;
+                for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+                {
+                    if (flip[subset])
+                        for (int ch = 0; ch < maxCH; ch++)
+                            Swap(endPoints[subset][0][ch], endPoints[subset][1][ch]);
+                }
+            }
+        }
+
+        pv.Pack(static_cast<uint8_t>(1 << mode), mode + 1);
+
+        if (modeInfo.m_partitionBits)
+            pv.Pack(partition, modeInfo.m_partitionBits);
+
+        if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
+        {
+            ParallelMath::ScalarUInt16 rotation = ParallelMath::Extract(work.m_u.m_isr.m_rotation, block);
+            pv.Pack(rotation, 2);
+        }
+
+        if (modeInfo.m_hasIndexSelector)
+            pv.Pack(indexSelector, 1);
+
+        // Encode RGB
+        for (int ch = 0; ch < 3; ch++)
+        {
+            for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+            {
+                for (int ep = 0; ep < 2; ep++)
+                {
+                    ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][ch];
+                    epPart >>= (8 - modeInfo.m_rgbBits);
+
+                    pv.Pack(epPart, modeInfo.m_rgbBits);
+                }
+            }
+        }
+
+        // Encode alpha
+        if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
+        {
+            for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+            {
+                for (int ep = 0; ep < 2; ep++)
+                {
+                    ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][3];
+                    epPart >>= (8 - modeInfo.m_alphaBits);
+
+                    pv.Pack(epPart, modeInfo.m_alphaBits);
+                }
+            }
+        }
+
+        // Encode parity bits
+        if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset)
+        {
+            for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+            {
+                ParallelMath::ScalarUInt16 epPart = endPoints[subset][0][0];
+                epPart >>= (7 - modeInfo.m_rgbBits);
+                epPart &= 1;
+
+                pv.Pack(epPart, 1);
+            }
+        }
+        else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint)
+        {
+            for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+            {
+                for (int ep = 0; ep < 2; ep++)
+                {
+                    ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][0];
+                    epPart >>= (7 - modeInfo.m_rgbBits);
+                    epPart &= 1;
+
+                    pv.Pack(epPart, 1);
+                }
+            }
+        }
+
+        // Encode indexes
+        for (int px = 0; px < 16; px++)
+        {
+            int bits = modeInfo.m_indexBits;
+            if ((px == 0) || (px == fixups[1]) || (px == fixups[2]))
+                bits--;
+
+            pv.Pack(indexes[px], bits);
+        }
+
+        // Encode secondary indexes
+        if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
+        {
+            for (int px = 0; px < 16; px++)
+            {
+                int bits = modeInfo.m_alphaIndexBits;
+                if (px == 0)
+                    bits--;
+
+                pv.Pack(indexes2[px], bits);
+            }
+        }
+
+        pv.Flush(packedBlocks);
+
+        packedBlocks += 16;
+    }
+}
+
+void cvtt::Internal::BC7Computer::UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock)
+{
+    UnpackingVector pv;
+    pv.Init(packedBlock);
+
+    int mode = 8;
+    for (int i = 0; i < 8; i++)
+    {
+        if (pv.Unpack(1) == 1)
+        {
+            mode = i;
+            break;
+        }
+    }
+
+    if (mode > 7)
+    {
+        for (int px = 0; px < 16; px++)
+            for (int ch = 0; ch < 4; ch++)
+                output.m_pixels[px][ch] = 0;
+
+        return;
+    }
+
+    const BC7Data::BC7ModeInfo &modeInfo = BC7Data::g_modes[mode];
+
+    int partition = 0;
+    if (modeInfo.m_partitionBits)
+        partition = pv.Unpack(modeInfo.m_partitionBits);
+
+    int rotation = 0;
+    if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
+        rotation = pv.Unpack(2);
+
+    int indexSelector = 0;
+    if (modeInfo.m_hasIndexSelector)
+        indexSelector = pv.Unpack(1);
+
+    // Resolve fixups
+    int fixups[3] = { 0, 0, 0 };
+
+    if (modeInfo.m_alphaMode != BC7Data::AlphaMode_Separate)
+    {
+        if (modeInfo.m_numSubsets == 2)
+            fixups[1] = BC7Data::g_fixupIndexes2[partition];
+        else if (modeInfo.m_numSubsets == 3)
+        {
+            fixups[1] = BC7Data::g_fixupIndexes3[partition][0];
+            fixups[2] = BC7Data::g_fixupIndexes3[partition][1];
+        }
+    }
+
+    int endPoints[3][2][4];
+
+    // Decode RGB
+    for (int ch = 0; ch < 3; ch++)
+    {
+        for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+        {
+            for (int ep = 0; ep < 2; ep++)
+                endPoints[subset][ep][ch] = (pv.Unpack(modeInfo.m_rgbBits) << (8 - modeInfo.m_rgbBits));
+        }
+    }
+
+    // Decode alpha
+    if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
+    {
+        for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+        {
+            for (int ep = 0; ep < 2; ep++)
+                endPoints[subset][ep][3] = (pv.Unpack(modeInfo.m_alphaBits) << (8 - modeInfo.m_alphaBits));
+        }
+    }
+    else
+    {
+        for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+        {
+            for (int ep = 0; ep < 2; ep++)
+                endPoints[subset][ep][3] = 255;
+        }
+    }
+
+    int parityBits = 0;
+
+    // Decode parity bits
+    if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset)
+    {
+        for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+        {
+            int p = pv.Unpack(1);
+
+            for (int ep = 0; ep < 2; ep++)
+            {
+                for (int ch = 0; ch < 3; ch++)
+                    endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits);
+
+                if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
+                    endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits);
+            }
+        }
+
+        parityBits = 1;
+    }
+    else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint)
+    {
+        for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+        {
+            for (int ep = 0; ep < 2; ep++)
+            {
+                int p = pv.Unpack(1);
+
+                for (int ch = 0; ch < 3; ch++)
+                    endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits);
+
+                if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
+                    endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits);
+            }
+        }
+
+        parityBits = 1;
+    }
+
+    // Fill endpoint bits
+    for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
+    {
+        for (int ep = 0; ep < 2; ep++)
+        {
+            for (int ch = 0; ch < 3; ch++)
+                endPoints[subset][ep][ch] |= (endPoints[subset][ep][ch] >> (modeInfo.m_rgbBits + parityBits));
+
+            if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
+                endPoints[subset][ep][3] |= (endPoints[subset][ep][3] >> (modeInfo.m_alphaBits + parityBits));
+        }
+    }
+
+    int indexes[16];
+    int indexes2[16];
+
+    // Decode indexes
+    for (int px = 0; px < 16; px++)
+    {
+        int bits = modeInfo.m_indexBits;
+        if ((px == 0) || (px == fixups[1]) || (px == fixups[2]))
+            bits--;
+
+        indexes[px] = pv.Unpack(bits);
+    }
+
+    // Decode secondary indexes
+    if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
+    {
+        for (int px = 0; px < 16; px++)
+        {
+            int bits = modeInfo.m_alphaIndexBits;
+            if (px == 0)
+                bits--;
+
+            indexes2[px] = pv.Unpack(bits);
+        }
+    }
+    else
+    {
+        for (int px = 0; px < 16; px++)
+            indexes2[px] = 0;
+    }
+
+    const int *alphaWeights = BC7Data::g_weightTables[modeInfo.m_alphaIndexBits];
+    const int *rgbWeights = BC7Data::g_weightTables[modeInfo.m_indexBits];
+
+    // Decode each pixel
+    for (int px = 0; px < 16; px++)
+    {
+        int rgbWeight = 0;
+        int alphaWeight = 0;
+
+        int rgbIndex = indexes[px];
+
+        rgbWeight = rgbWeights[indexes[px]];
+
+        if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined)
+            alphaWeight = rgbWeight;
+        else if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
+            alphaWeight = alphaWeights[indexes2[px]];
+
+        if (indexSelector == 1)
+        {
+            int temp = rgbWeight;
+            rgbWeight = alphaWeight;
+            alphaWeight = temp;
+        }
+
+        int pixel[4] = { 0, 0, 0, 255 };
+
+        int subset = 0;
+
+        if (modeInfo.m_numSubsets == 2)
+            subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
+        else if (modeInfo.m_numSubsets == 3)
+            subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3;
+
+        for (int ch = 0; ch < 3; ch++)
+            pixel[ch] = ((64 - rgbWeight) * endPoints[subset][0][ch] + rgbWeight * endPoints[subset][1][ch] + 32) >> 6;
+
+        if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
+            pixel[3] = ((64 - alphaWeight) * endPoints[subset][0][3] + alphaWeight * endPoints[subset][1][3] + 32) >> 6;
+
+        if (rotation != 0)
+        {
+            int ch = rotation - 1;
+            int temp = pixel[ch];
+            pixel[ch] = pixel[3];
+            pixel[3] = temp;
+        }
+
+        for (int ch = 0; ch < 4; ch++)
+            output.m_pixels[px][ch] = static_cast<uint8_t>(pixel[ch]);
+    }
+}
+
+cvtt::ParallelMath::SInt16 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru)
+{
+    assert(ParallelMath::AllSet(ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(31744))));
+    assert(ParallelMath::AllSet(ParallelMath::Less(ParallelMath::MakeSInt16(-31744), elem2CL)));
+
+    // Expand to full range
+    ParallelMath::Int16CompFlag isNegative = ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(0));
+    MUInt15 absElem = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - elem2CL, elem2CL));
+
+    absElem = ParallelMath::RightShift(ParallelMath::RoundAndConvertToU15(ParallelMath::ToFloat(absElem) * 32.0f / 31.0f, ru), 16 - precision);
+
+    MSInt16 absElemS16 = ParallelMath::LosslessCast<MSInt16>::Cast(absElem);
+
+    return ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - absElemS16, absElemS16);
+}
+
+cvtt::ParallelMath::UInt15 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru)
+{
+    MUInt16 expandedElem = ParallelMath::RoundAndConvertToU16(ParallelMath::Min(ParallelMath::ToFloat(elem) * 64.0f / 31.0f, ParallelMath::MakeFloat(65535.0f)), ru);
+    return ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(expandedElem, 16 - precision));
+}
+
+void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL)
+{
+    MSInt16 zero = ParallelMath::MakeSInt16(0);
+
+    ParallelMath::Int16CompFlag negative = ParallelMath::Less(comp, zero);
+    MUInt15 absComp = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(negative, MSInt16(zero - comp), comp));
+
+    MSInt16 unq;
+    MUInt15 absUnq;
+
+    if (precision >= 16)
+    {
+        unq = comp;
+        absUnq = absComp;
+    }
+    else
+    {
+        MSInt16 maxCompMinusOne = ParallelMath::MakeSInt16(static_cast<int16_t>((1 << (precision - 1)) - 2));
+        ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero);
+        ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp);
+
+        absUnq = (absComp << (16 - precision)) + ParallelMath::MakeUInt15(static_cast<uint16_t>(0x4000 >> (precision - 1)));
+        ParallelMath::ConditionalSet(absUnq, isZero, ParallelMath::MakeUInt15(0));
+        ParallelMath::ConditionalSet(absUnq, isMax, ParallelMath::MakeUInt15(0x7fff));
+
+        unq = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(absUnq));
+    }
+
+    outUnquantized = unq;
+
+    MUInt15 funq = ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(absUnq, ParallelMath::MakeUInt15(31)), 5));
+
+    outUnquantizedFinished2CL = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(funq));
+}
+
+void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished)
+{
+    MUInt16 unq = ParallelMath::LosslessCast<MUInt16>::Cast(comp);
+    if (precision < 15)
+    {
+        MUInt15 zero = ParallelMath::MakeUInt15(0);
+        MUInt15 maxCompMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << precision) - 2));
+
+        ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero);
+        ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp);
+
+        unq = (ParallelMath::LosslessCast<MUInt16>::Cast(comp) << (16 - precision)) + ParallelMath::MakeUInt16(static_cast<uint16_t>(0x8000 >> precision));
+
+        ParallelMath::ConditionalSet(unq, isZero, ParallelMath::MakeUInt16(0));
+        ParallelMath::ConditionalSet(unq, isMax, ParallelMath::MakeUInt16(0xffff));
+    }
+
+    outUnquantized = unq;
+    outUnquantizedFinished = ParallelMath::ToUInt16(ParallelMath::RightShift(ParallelMath::XMultiply(unq, ParallelMath::MakeUInt15(31)), 6));
+}
+
+void cvtt::Internal::BC6HComputer::QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn)
+{
+    MSInt16 unquantizedEP[2][3];
+    MSInt16 finishedUnquantizedEP[2][3];
+
+    {
+        ParallelMath::RoundUpForScope ru;
+
+        for (int epi = 0; epi < 2; epi++)
+        {
+            for (int ch = 0; ch < 3; ch++)
+            {
+                MSInt16 qee = QuantizeSingleEndpointElementSigned(endPoints[epi][ch], precision, &ru);
+                UnquantizeSingleEndpointElementSigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]);
+                quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee);
+            }
+        }
+    }
+
+    indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange);
+    indexSelector.InitHDR(indexRange, true, fastIndexing, channelWeights);
+
+    MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1);
+
+    MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn);
+
+    ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index);
+
+    if (ParallelMath::AnySet(invert))
+    {
+        ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index));
+
+        indexSelector.ConditionalInvert(invert);
+
+        for (int ch = 0; ch < 3; ch++)
+        {
+            MAInt16 firstEP = quantizedEndPoints[0][ch];
+            MAInt16 secondEP = quantizedEndPoints[1][ch];
+
+            quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP);
+            quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP);
+        }
+    }
+
+    indexes[fixupIndex] = index;
+}
+
+void cvtt::Internal::BC6HComputer::QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn)
+{
+    MUInt16 unquantizedEP[2][3];
+    MUInt16 finishedUnquantizedEP[2][3];
+
+    {
+        ParallelMath::RoundUpForScope ru;
+
+        for (int epi = 0; epi < 2; epi++)
+        {
+            for (int ch = 0; ch < 3; ch++)
+            {
+                MUInt15 qee = QuantizeSingleEndpointElementUnsigned(ParallelMath::LosslessCast<MUInt15>::Cast(endPoints[epi][ch]), precision, &ru);
+                UnquantizeSingleEndpointElementUnsigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]);
+                quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee);
+            }
+        }
+    }
+
+    indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange);
+    indexSelector.InitHDR(indexRange, false, fastIndexing, channelWeights);
+
+    MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1);
+
+    MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn);
+
+    ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index);
+
+    if (ParallelMath::AnySet(invert))
+    {
+        ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index));
+
+        indexSelector.ConditionalInvert(invert);
+
+        for (int ch = 0; ch < 3; ch++)
+        {
+            MAInt16 firstEP = quantizedEndPoints[0][ch];
+            MAInt16 secondEP = quantizedEndPoints[1][ch];
+
+            quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP);
+            quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP);
+        }
+    }
+
+    indexes[fixupIndex] = index;
+}
+
+void cvtt::Internal::BC6HComputer::EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal)
+{
+    ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true);
+
+    MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1));
+
+    for (int ch = 0; ch < 3; ch++)
+    {
+        outEncodedEPs[0][0][ch] = ep0[0][ch];
+        outEncodedEPs[0][1][ch] = ep0[1][ch];
+        outEncodedEPs[1][0][ch] = ep1[0][ch];
+        outEncodedEPs[1][1][ch] = ep1[1][ch];
+
+        if (isTransformed)
+        {
+            for (int subset = 0; subset < 2; subset++)
+            {
+                for (int epi = 0; epi < 2; epi++)
+                {
+                    if (epi == 0 && subset == 0)
+                        continue;
+
+                    MAInt16 bReduced = (outEncodedEPs[subset][epi][ch] & aSignificantMask);
+
+                    MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch])), bPrec[ch]);
+
+                    outEncodedEPs[subset][epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta);
+
+                    MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch]) & aSignificantMask);
+                    allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced);
+                }
+            }
+        }
+
+        if (!ParallelMath::AnySet(allLegal))
+            break;
+    }
+
+    outIsLegal = allLegal;
+}
+
+void cvtt::Internal::BC6HComputer::EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal)
+{
+    ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true);
+
+    MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1));
+
+    for (int ch = 0; ch < 3; ch++)
+    {
+        outEncodedEPs[0][ch] = ep[0][ch];
+        outEncodedEPs[1][ch] = ep[1][ch];
+
+        if (isTransformed)
+        {
+            MAInt16 bReduced = (outEncodedEPs[1][ch] & aSignificantMask);
+
+            MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[1][ch], outEncodedEPs[0][ch])), bPrec[ch]);
+
+            outEncodedEPs[1][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta);
+
+            MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[1][ch], outEncodedEPs[0][ch]) & aSignificantMask);
+            allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced);
+        }
+    }
+
+    outIsLegal = allLegal;
+}
+
+void cvtt::Internal::BC6HComputer::Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds)
+{
+    if (numTweakRounds < 1)
+        numTweakRounds = 1;
+    else if (numTweakRounds > MaxTweakRounds)
+        numTweakRounds = MaxTweakRounds;
+
+    if (numRefineRounds < 1)
+        numRefineRounds = 1;
+    else if (numRefineRounds > MaxRefineRounds)
+        numRefineRounds = MaxRefineRounds;
+
+    bool fastIndexing = ((flags & cvtt::Flags::BC6H_FastIndexing) != 0);
+    float channelWeightsSq[3];
+
+    ParallelMath::RoundTowardNearestForScope rtn;
+
+    MSInt16 pixels[16][3];
+    MFloat floatPixels2CL[16][3];
+    MFloat floatPixelsLinearWeighted[16][3];
+
+    MSInt16 low15Bits = ParallelMath::MakeSInt16(32767);
+
+    for (int ch = 0; ch < 3; ch++)
+        channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
+
+    for (int px = 0; px < 16; px++)
+    {
+        for (int ch = 0; ch < 3; ch++)
+        {
+            MSInt16 pixelValue;
+            ParallelMath::ConvertHDRInputs(inputs, px, ch, pixelValue);
+
+            // Convert from sign+magnitude to 2CL
+            if (isSigned)
+            {
+                ParallelMath::Int16CompFlag negative = ParallelMath::Less(pixelValue, ParallelMath::MakeSInt16(0));
+                MSInt16 magnitude = (pixelValue & low15Bits);
+                ParallelMath::ConditionalSet(pixelValue, negative, ParallelMath::MakeSInt16(0) - magnitude);
+                pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(-31743));
+            }
+            else
+                pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(0));
+
+            pixelValue = ParallelMath::Min(pixelValue, ParallelMath::MakeSInt16(31743));
+
+            pixels[px][ch] = pixelValue;
+            floatPixels2CL[px][ch] = ParallelMath::ToFloat(pixelValue);
+            floatPixelsLinearWeighted[px][ch] = ParallelMath::TwosCLHalfToFloat(pixelValue) * channelWeights[ch];
+        }
+    }
+
+    MFloat preWeightedPixels[16][3];
+
+    BCCommon::PreWeightPixelsHDR<3>(preWeightedPixels, pixels, channelWeights);
+
+    MAInt16 bestEndPoints[2][2][3];
+    MUInt15 bestIndexes[16];
+    MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
+    MUInt15 bestMode = ParallelMath::MakeUInt15(0);
+    MUInt15 bestPartition = ParallelMath::MakeUInt15(0);
+
+    for (int px = 0; px < 16; px++)
+        bestIndexes[px] = ParallelMath::MakeUInt15(0);
+
+    for (int subset = 0; subset < 2; subset++)
+        for (int epi = 0; epi < 2; epi++)
+            for (int ch = 0; ch < 3; ch++)
+                bestEndPoints[subset][epi][ch] = ParallelMath::MakeAInt16(0);
+
+    UnfinishedEndpoints<3> partitionedUFEP[32][2];
+    UnfinishedEndpoints<3> singleUFEP;
+
+    // Generate UFEP for partitions
+    for (int p = 0; p < 32; p++)
+    {
+        int partitionMask = BC7Data::g_partitionMap[p];
+
+        EndpointSelector<3, 8> epSelectors[2];
+
+        for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)
+        {
+            for (int px = 0; px < 16; px++)
+            {
+                int subset = (partitionMask >> px) & 1;
+                epSelectors[subset].ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f));
+            }
+
+            for (int subset = 0; subset < 2; subset++)
+                epSelectors[subset].FinishPass(pass);
+        }
+
+        for (int subset = 0; subset < 2; subset++)
+            partitionedUFEP[p][subset] = epSelectors[subset].GetEndpoints(channelWeights);
+    }
+
+    // Generate UFEP for single
+    {
+        EndpointSelector<3, 8> epSelector;
+
+        for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)
+        {
+            for (int px = 0; px < 16; px++)
+                epSelector.ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f));
+
+            epSelector.FinishPass(pass);
+        }
+
+        singleUFEP = epSelector.GetEndpoints(channelWeights);
+    }
+
+    for (int partitionedInt = 0; partitionedInt < 2; partitionedInt++)
+    {
+        bool partitioned = (partitionedInt == 1);
+
+        for (int aPrec = BC7Data::g_maxHDRPrecision; aPrec >= 0; aPrec--)
+        {
+            if (!BC7Data::g_hdrModesExistForPrecision[partitionedInt][aPrec])
+                continue;
+
+            int numPartitions = partitioned ? 32 : 1;
+            int numSubsets = partitioned ? 2 : 1;
+            int indexBits = partitioned ? 3 : 4;
+            int indexRange = (1 << indexBits);
+
+            for (int p = 0; p < numPartitions; p++)
+            {
+                int partitionMask = partitioned ? BC7Data::g_partitionMap[p] : 0;
+
+                const int MaxMetaRounds = MaxTweakRounds * MaxRefineRounds;
+
+                MAInt16 metaEndPointsQuantized[MaxMetaRounds][2][2][3];
+                MUInt15 metaIndexes[MaxMetaRounds][16];
+                MFloat metaError[MaxMetaRounds][2];
+
+                bool roundValid[MaxMetaRounds][2];
+
+                for (int r = 0; r < MaxMetaRounds; r++)
+                    for (int subset = 0; subset < 2; subset++)
+                        roundValid[r][subset] = true;
+
+                for (int subset = 0; subset < numSubsets; subset++)
+                {
+                    for (int tweak = 0; tweak < MaxTweakRounds; tweak++)
+                    {
+                        EndpointRefiner<3> refiners[2];
+
+                        bool abortRemainingRefines = false;
+                        for (int refinePass = 0; refinePass < MaxRefineRounds; refinePass++)
+                        {
+                            int metaRound = tweak * MaxRefineRounds + refinePass;
+
+                            if (tweak >= numTweakRounds || refinePass >= numRefineRounds)
+                                abortRemainingRefines = true;
+
+                            if (abortRemainingRefines)
+                            {
+                                roundValid[metaRound][subset] = false;
+                                continue;
+                            }
+
+                            MAInt16(&mrQuantizedEndPoints)[2][2][3] = metaEndPointsQuantized[metaRound];
+                            MUInt15(&mrIndexes)[16] = metaIndexes[metaRound];
+
+                            MSInt16 endPointsColorSpace[2][3];
+
+                            if (refinePass == 0)
+                            {
+                                UnfinishedEndpoints<3> ufep = partitioned ? partitionedUFEP[p][subset] : singleUFEP;
+
+                                if (isSigned)
+                                    ufep.FinishHDRSigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn);
+                                else
+                                    ufep.FinishHDRUnsigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn);
+                            }
+                            else
+                                refiners[subset].GetRefinedEndpointsHDR(endPointsColorSpace, isSigned, &rtn);
+
+                            refiners[subset].Init(indexRange, channelWeights);
+
+                            int fixupIndex = (subset == 0) ? 0 : BC7Data::g_fixupIndexes2[p];
+
+                            IndexSelectorHDR<3> indexSelector;
+                            if (isSigned)
+                                QuantizeEndpointsSigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn);
+                            else
+                                QuantizeEndpointsUnsigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn);
+
+                            if (metaRound > 0)
+                            {
+                                ParallelMath::Int16CompFlag anySame = ParallelMath::MakeBoolInt16(false);
+
+                                for (int prevRound = 0; prevRound < metaRound; prevRound++)
+                                {
+                                    MAInt16(&prevRoundEPs)[2][3] = metaEndPointsQuantized[prevRound][subset];
+
+                                    ParallelMath::Int16CompFlag same = ParallelMath::MakeBoolInt16(true);
+
+                                    for (int epi = 0; epi < 2; epi++)
+                                        for (int ch = 0; ch < 3; ch++)
+                                            same = (same & ParallelMath::Equal(prevRoundEPs[epi][ch], mrQuantizedEndPoints[subset][epi][ch]));
+
+                                    anySame = (anySame | same);
+                                    if (ParallelMath::AllSet(anySame))
+                                        break;
+                                }
+
+                                if (ParallelMath::AllSet(anySame))
+                                {
+                                    roundValid[metaRound][subset] = false;
+                                    continue;
+                                }
+                            }
+
+                            MFloat subsetError = ParallelMath::MakeFloatZero();
+
+                            {
+                                for (int px = 0; px < 16; px++)
+                                {
+                                    if (subset != ((partitionMask >> px) & 1))
+                                        continue;
+
+                                    MUInt15 index;
+                                    if (px == fixupIndex)
+                                        index = mrIndexes[px];
+                                    else
+                                    {
+                                        index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixels2CL[px], &rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[px], &rtn);
+                                        mrIndexes[px] = index;
+                                    }
+
+                                    MSInt16 reconstructed[3];
+                                    if (isSigned)
+                                        indexSelector.ReconstructHDRSigned(mrIndexes[px], reconstructed);
+                                    else
+                                        indexSelector.ReconstructHDRUnsigned(mrIndexes[px], reconstructed);
+
+                                    subsetError = subsetError + (fastIndexing ? BCCommon::ComputeErrorHDRFast<3>(flags, reconstructed, pixels[px], channelWeightsSq) : BCCommon::ComputeErrorHDRSlow<3>(flags, reconstructed, pixels[px], channelWeightsSq));
+
+                                    if (refinePass != numRefineRounds - 1)
+                                        refiners[subset].ContributeUnweightedPW(preWeightedPixels[px], index);
+                                }
+                            }
+
+                            metaError[metaRound][subset] = subsetError;
+                        }
+                    }
+                }
+
+                // Now we have a bunch of attempts, but not all of them will fit in the delta coding scheme
+                int numMeta1 = partitioned ? MaxMetaRounds : 1;
+                for (int meta0 = 0; meta0 < MaxMetaRounds; meta0++)
+                {
+                    if (!roundValid[meta0][0])
+                        continue;
+
+                    for (int meta1 = 0; meta1 < numMeta1; meta1++)
+                    {
+                        MFloat combinedError = metaError[meta0][0];
+                        if (partitioned)
+                        {
+                            if (!roundValid[meta1][1])
+                                continue;
+
+                            combinedError = combinedError + metaError[meta1][1];
+                        }
+
+                        ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, bestError);
+                        if (!ParallelMath::AnySet(errorBetter))
+                            continue;
+
+                        ParallelMath::Int16CompFlag needsCommit = ParallelMath::FloatFlagToInt16(errorBetter);
+
+                        // Figure out if this is encodable
+                        for (int mode = 0; mode < BC7Data::g_numHDRModes; mode++)
+                        {
+                            const BC7Data::BC6HModeInfo &modeInfo = BC7Data::g_hdrModes[mode];
+
+                            if (modeInfo.m_partitioned != partitioned || modeInfo.m_aPrec != aPrec)
+                                continue;
+
+                            MAInt16 encodedEPs[2][2][3];
+                            ParallelMath::Int16CompFlag isLegal;
+                            if (partitioned)
+                                EvaluatePartitionedLegality(metaEndPointsQuantized[meta0][0], metaEndPointsQuantized[meta1][1], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs, isLegal);
+                            else
+                                EvaluateSingleLegality(metaEndPointsQuantized[meta0][0], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs[0], isLegal);
+
+                            ParallelMath::Int16CompFlag isLegalAndBetter = (ParallelMath::FloatFlagToInt16(errorBetter) & isLegal);
+                            if (!ParallelMath::AnySet(isLegalAndBetter))
+                                continue;
+
+                            ParallelMath::FloatCompFlag isLegalAndBetterFloat = ParallelMath::Int16FlagToFloat(isLegalAndBetter);
+
+                            ParallelMath::ConditionalSet(bestError, isLegalAndBetterFloat, combinedError);
+                            ParallelMath::ConditionalSet(bestMode, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(mode)));
+                            ParallelMath::ConditionalSet(bestPartition, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(p)));
+
+                            for (int subset = 0; subset < numSubsets; subset++)
+                            {
+                                for (int epi = 0; epi < 2; epi++)
+                                {
+                                    for (int ch = 0; ch < 3; ch++)
+                                        ParallelMath::ConditionalSet(bestEndPoints[subset][epi][ch], isLegalAndBetter, encodedEPs[subset][epi][ch]);
+                                }
+                            }
+
+                            for (int px = 0; px < 16; px++)
+                            {
+                                int subset = ((partitionMask >> px) & 1);
+                                if (subset == 0)
+                                    ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta0][px]);
+                                else
+                                    ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta1][px]);
+                            }
+
+                            needsCommit = ParallelMath::AndNot(needsCommit, isLegalAndBetter);
+                            if (!ParallelMath::AnySet(needsCommit))
+                                break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // At this point, everything should be set
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(bestMode, block);
+        ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(bestPartition, block);
+        int32_t eps[2][2][3];
+        ParallelMath::ScalarUInt16 indexes[16];
+
+        const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode];
+
+        BC6H_IO::WriteFunc_t writeFunc = BC6H_IO::g_writeFuncs[mode];
+
+        const int headerBits = modeInfo.m_partitioned ? 82 : 65;
+
+        for (int subset = 0; subset < 2; subset++)
+        {
+            for (int epi = 0; epi < 2; epi++)
+            {
+                for (int ch = 0; ch < 3; ch++)
+                    eps[subset][epi][ch] = ParallelMath::Extract(bestEndPoints[subset][epi][ch], block);
+            }
+        }
+
+        for (int px = 0; px < 16; px++)
+            indexes[px] = ParallelMath::Extract(bestIndexes[px], block);
+
+        uint16_t modeID = modeInfo.m_modeID;
+
+        PackingVector pv;
+
+        {
+            uint32_t header[3];
+            writeFunc(header, modeID, partition,
+                eps[0][0][0], eps[0][1][0], eps[1][0][0], eps[1][1][0],
+                eps[0][0][1], eps[0][1][1], eps[1][0][1], eps[1][1][1],
+                eps[0][0][2], eps[0][1][2], eps[1][0][2], eps[1][1][2]
+            );
+
+            pv.InitPacked(header, headerBits);
+        }
+
+        int fixupIndex1 = 0;
+        int indexBits = 4;
+        if (modeInfo.m_partitioned)
+        {
+            fixupIndex1 = BC7Data::g_fixupIndexes2[partition];
+            indexBits = 3;
+        }
+
+        for (int px = 0; px < 16; px++)
+        {
+            ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[px], block);
+            if (px == 0 || px == fixupIndex1)
+                pv.Pack(index, indexBits - 1);
+            else
+                pv.Pack(index, indexBits);
+        }
+
+        pv.Flush(packedBlocks + 16 * block);
+    }
+}
+
+void cvtt::Internal::BC6HComputer::SignExtendSingle(int &v, int bits)
+{
+    if (v & (1 << (bits - 1)))
+        v |= -(1 << bits);
+}
+
+void cvtt::Internal::BC6HComputer::UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned)
+{
+    UnpackingVector pv;
+    pv.Init(pBC);
+
+    int numModeBits = 2;
+    int modeBits = pv.Unpack(2);
+    if (modeBits != 0 && modeBits != 1)
+    {
+        modeBits |= pv.Unpack(3) << 2;
+        numModeBits += 3;
+    }
+
+    int mode = -1;
+    for (int possibleMode = 0; possibleMode < BC7Data::g_numHDRModes; possibleMode++)
+    {
+        if (BC7Data::g_hdrModes[possibleMode].m_modeID == modeBits)
+        {
+            mode = possibleMode;
+            break;
+        }
+    }
+
+    if (mode < 0)
+    {
+        for (int px = 0; px < 16; px++)
+        {
+            for (int ch = 0; ch < 3; ch++)
+                output.m_pixels[px][ch] = 0;
+            output.m_pixels[px][3] = 0x3c00;	// 1.0
+        }
+        return;
+    }
+
+    const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode];
+    const int headerBits = modeInfo.m_partitioned ? 82 : 65;
+    const BC6H_IO::ReadFunc_t readFunc = BC6H_IO::g_readFuncs[mode];
+
+    uint16_t partition = 0;
+    int32_t eps[2][2][3];
+
+    for (int subset = 0; subset < 2; subset++)
+        for (int epi = 0; epi < 2; epi++)
+            for (int ch = 0; ch < 3; ch++)
+                eps[subset][epi][ch] = 0;
+
+    {
+        uint32_t header[3];
+        uint16_t codedEPs[2][2][3];
+        pv.UnpackStart(header, headerBits);
+
+        readFunc(header, partition,
+            codedEPs[0][0][0], codedEPs[0][1][0], codedEPs[1][0][0], codedEPs[1][1][0],
+            codedEPs[0][0][1], codedEPs[0][1][1], codedEPs[1][0][1], codedEPs[1][1][1],
+            codedEPs[0][0][2], codedEPs[0][1][2], codedEPs[1][0][2], codedEPs[1][1][2]
+        );
+
+        for (int subset = 0; subset < 2; subset++)
+            for (int epi = 0; epi < 2; epi++)
+                for (int ch = 0; ch < 3; ch++)
+                    eps[subset][epi][ch] = codedEPs[subset][epi][ch];
+    }
+
+    uint16_t modeID = modeInfo.m_modeID;
+
+    int fixupIndex1 = 0;
+    int indexBits = 4;
+    int numSubsets = 1;
+    if (modeInfo.m_partitioned)
+    {
+        fixupIndex1 = BC7Data::g_fixupIndexes2[partition];
+        indexBits = 3;
+        numSubsets = 2;
+    }
+
+    int indexes[16];
+    for (int px = 0; px < 16; px++)
+    {
+        if (px == 0 || px == fixupIndex1)
+            indexes[px] = pv.Unpack(indexBits - 1);
+        else
+            indexes[px] = pv.Unpack(indexBits);
+    }
+
+    if (modeInfo.m_partitioned)
+    {
+        for (int ch = 0; ch < 3; ch++)
+        {
+            if (isSigned)
+                SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec);
+            if (modeInfo.m_transformed || isSigned)
+            {
+                SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]);
+                SignExtendSingle(eps[1][0][ch], modeInfo.m_bPrec[ch]);
+                SignExtendSingle(eps[1][1][ch], modeInfo.m_bPrec[ch]);
+            }
+        }
+    }
+    else
+    {
+        for (int ch = 0; ch < 3; ch++)
+        {
+            if (isSigned)
+                SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec);
+            if (modeInfo.m_transformed || isSigned)
+                SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]);
+        }
+    }
+
+    int aPrec = modeInfo.m_aPrec;
+
+    if (modeInfo.m_transformed)
+    {
+        for (int ch = 0; ch < 3; ch++)
+        {
+            int wrapMask = (1 << aPrec) - 1;
+
+            eps[0][1][ch] = ((eps[0][0][ch] + eps[0][1][ch]) & wrapMask);
+            if (isSigned)
+                SignExtendSingle(eps[0][1][ch], aPrec);
+
+            if (modeInfo.m_partitioned)
+            {
+                eps[1][0][ch] = ((eps[0][0][ch] + eps[1][0][ch]) & wrapMask);
+                eps[1][1][ch] = ((eps[0][0][ch] + eps[1][1][ch]) & wrapMask);
+
+                if (isSigned)
+                {
+                    SignExtendSingle(eps[1][0][ch], aPrec);
+                    SignExtendSingle(eps[1][1][ch], aPrec);
+                }
+            }
+        }
+    }
+
+    // Unquantize endpoints
+    for (int subset = 0; subset < numSubsets; subset++)
+    {
+        for (int epi = 0; epi < 2; epi++)
+        {
+            for (int ch = 0; ch < 3; ch++)
+            {
+                int &v = eps[subset][epi][ch];
+
+                if (isSigned)
+                {
+                    if (aPrec >= 16)
+                    {
+                        // Nothing
+                    }
+                    else
+                    {
+                        bool s = false;
+                        int comp = v;
+                        if (v < 0)
+                        {
+                            s = true;
+                            comp = -comp;
+                        }
+
+                        int unq = 0;
+                        if (comp == 0)
+                            unq = 0;
+                        else if (comp >= ((1 << (aPrec - 1)) - 1))
+                            unq = 0x7fff;
+                        else
+                            unq = ((comp << 15) + 0x4000) >> (aPrec - 1);
+
+                        if (s)
+                            unq = -unq;
+
+                        v = unq;
+                    }
+                }
+                else
+                {
+                    if (aPrec >= 15)
+                    {
+                        // Nothing
+                    }
+                    else if (v == 0)
+                    {
+                        // Nothing
+                    }
+                    else if (v == ((1 << aPrec) - 1))
+                        v = 0xffff;
+                    else
+                        v = ((v << 16) + 0x8000) >> aPrec;
+                }
+            }
+        }
+    }
+
+    const int *weights = BC7Data::g_weightTables[indexBits];
+
+    for (int px = 0; px < 16; px++)
+    {
+        int subset = 0;
+        if (modeInfo.m_partitioned)
+            subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
+
+        int w = weights[indexes[px]];
+        for (int ch = 0; ch < 3; ch++)
+        {
+            int comp = ((64 - w) * eps[subset][0][ch] + w * eps[subset][1][ch] + 32) >> 6;
+
+            if (isSigned)
+            {
+                if (comp < 0)
+                    comp = -(((-comp) * 31) >> 5);
+                else
+                    comp = (comp * 31) >> 5;
+
+                int s = 0;
+                if (comp < 0)
+                {
+                    s = 0x8000;
+                    comp = -comp;
+                }
+
+                output.m_pixels[px][ch] = static_cast<uint16_t>(s | comp);
+            }
+            else
+            {
+                comp = (comp * 31) >> 6;
+                output.m_pixels[px][ch] = static_cast<uint16_t>(comp);
+            }
+        }
+        output.m_pixels[px][3] = 0x3c00;	// 1.0
+    }
+}
+
+void cvtt::Kernels::ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality)
+{
+    static const int kMaxQuality = 100;
+
+    if (quality < 1)
+        quality = 1;
+    else if (quality > kMaxQuality)
+        quality = kMaxQuality;
+
+    const int numRGBModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGB * quality / kMaxQuality;
+    const int numRGBAModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGBA * quality / kMaxQuality;
+
+    const uint16_t *prioLists[] = { cvtt::Tables::BC7Prio::g_bc7PrioCodesRGB, cvtt::Tables::BC7Prio::g_bc7PrioCodesRGBA };
+    const int prioListSizes[] = { numRGBModes, numRGBAModes };
+
+    BC7FineTuningParams ftParams;
+    memset(&ftParams, 0, sizeof(ftParams));
+
+    for (int listIndex = 0; listIndex < 2; listIndex++)
+    {
+        int prioListSize = prioListSizes[listIndex];
+        const uint16_t *prioList = prioLists[listIndex];
+
+        for (int prioIndex = 0; prioIndex < prioListSize; prioIndex++)
+        {
+            const uint16_t packedMode = prioList[prioIndex];
+
+            uint8_t seedPoints = static_cast<uint8_t>(cvtt::Tables::BC7Prio::UnpackSeedPointCount(packedMode));
+            int mode = cvtt::Tables::BC7Prio::UnpackMode(packedMode);
+
+            switch (mode)
+            {
+            case 0:
+                ftParams.mode0SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
+                break;
+            case 1:
+                ftParams.mode1SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
+                break;
+            case 2:
+                ftParams.mode2SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
+                break;
+            case 3:
+                ftParams.mode3SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
+                break;
+            case 4:
+                ftParams.mode4SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)][cvtt::Tables::BC7Prio::UnpackIndexSelector(packedMode)] = seedPoints;
+                break;
+            case 5:
+                ftParams.mode5SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)] = seedPoints;
+                break;
+            case 6:
+                ftParams.mode6SP = seedPoints;
+                break;
+            case 7:
+                ftParams.mode7SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
+                break;
+            }
+        }
+    }
+
+    ConfigureBC7EncodingPlanFromFineTuningParams(encodingPlan, ftParams);
+}
+
+// Generates a BC7 encoding plan from fine-tuning parameters.
+bool cvtt::Kernels::ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams &params)
+{
+    memset(&encodingPlan, 0, sizeof(encodingPlan));
+
+    // Mode 0
+    for (int partition = 0; partition < 16; partition++)
+    {
+        uint8_t sp = params.mode0SP[partition];
+        if (sp == 0)
+            continue;
+
+        encodingPlan.mode0PartitionEnabled |= static_cast<uint16_t>(1) << partition;
+
+        for (int subset = 0; subset < 3; subset++)
+        {
+            int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset];
+            encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
+        }
+    }
+
+    // Mode 1
+    for (int partition = 0; partition < 64; partition++)
+    {
+        uint8_t sp = params.mode1SP[partition];
+        if (sp == 0)
+            continue;
+
+        encodingPlan.mode1PartitionEnabled |= static_cast<uint64_t>(1) << partition;
+
+        for (int subset = 0; subset < 2; subset++)
+        {
+            int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];
+            encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
+        }
+    }
+
+    // Mode 2
+    for (int partition = 0; partition < 64; partition++)
+    {
+        uint8_t sp = params.mode2SP[partition];
+        if (sp == 0)
+            continue;
+
+        encodingPlan.mode2PartitionEnabled |= static_cast<uint64_t>(1) << partition;
+
+        for (int subset = 0; subset < 3; subset++)
+        {
+            int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset];
+            encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
+        }
+    }
+
+    // Mode 3
+    for (int partition = 0; partition < 64; partition++)
+    {
+        uint8_t sp = params.mode3SP[partition];
+        if (sp == 0)
+            continue;
+
+        encodingPlan.mode3PartitionEnabled |= static_cast<uint64_t>(1) << partition;
+
+        for (int subset = 0; subset < 2; subset++)
+        {
+            int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];
+            encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
+        }
+    }
+
+    // Mode 4
+    for (int rotation = 0; rotation < 4; rotation++)
+    {
+        for (int indexMode = 0; indexMode < 2; indexMode++)
+            encodingPlan.mode4SP[rotation][indexMode] = params.mode4SP[rotation][indexMode];
+    }
+
+    // Mode 5
+    for (int rotation = 0; rotation < 4; rotation++)
+        encodingPlan.mode5SP[rotation] = params.mode5SP[rotation];
+
+    // Mode 6
+    {
+        uint8_t sp = params.mode6SP;
+        if (sp != 0)
+        {
+            encodingPlan.mode6Enabled = true;
+
+            int shape = cvtt::Internal::BC7Data::g_shapes1[0][0];
+            encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp);
+        }
+    }
+
+    // Mode 7
+    for (int partition = 0; partition < 64; partition++)
+    {
+        uint8_t sp = params.mode7SP[partition];
+        if (sp == 0)
+            continue;
+
+        encodingPlan.mode7RGBAPartitionEnabled |= static_cast<uint64_t>(1) << partition;
+
+        for (int subset = 0; subset < 2; subset++)
+        {
+            int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];
+            encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp);
+        }
+    }
+
+    for (int i = 0; i < BC7EncodingPlan::kNumRGBShapes; i++)
+    {
+        if (encodingPlan.seedPointsForShapeRGB[i] > 0)
+        {
+            encodingPlan.rgbShapeList[encodingPlan.rgbNumShapesToEvaluate] = i;
+            encodingPlan.rgbNumShapesToEvaluate++;
+        }
+    }
+
+    for (int i = 0; i < BC7EncodingPlan::kNumRGBAShapes; i++)
+    {
+        if (encodingPlan.seedPointsForShapeRGBA[i] > 0)
+        {
+            encodingPlan.rgbaShapeList[encodingPlan.rgbaNumShapesToEvaluate] = i;
+            encodingPlan.rgbaNumShapesToEvaluate++;
+        }
+    }
+
+    encodingPlan.mode7RGBPartitionEnabled = (encodingPlan.mode7RGBAPartitionEnabled & ~encodingPlan.mode3PartitionEnabled);
+
+    return true;
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_BC67.h b/thirdparty/cvtt/ConvectionKernels_BC67.h
new file mode 100644
index 0000000000..b929711187
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_BC67.h
@@ -0,0 +1,99 @@
+#pragma once
+
+#include "ConvectionKernels_ParallelMath.h"
+
+
+namespace cvtt
+{
+    namespace Tables
+    {
+        namespace BC7SC
+        {
+            struct Table;
+        }
+    }
+
+    namespace Internal
+    {
+        namespace BC67
+        {
+            struct WorkInfo;
+        }
+
+        template<int TVectorSize>
+        class IndexSelectorHDR;
+    }
+
+    struct PixelBlockU8;
+}
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        class BC7Computer
+        {
+        public:
+            static void Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds);
+            static void UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock);
+
+        private:
+            static const int MaxTweakRounds = 4;
+
+            typedef ParallelMath::SInt16 MSInt16;
+            typedef ParallelMath::UInt15 MUInt15;
+            typedef ParallelMath::UInt16 MUInt16;
+            typedef ParallelMath::SInt32 MSInt32;
+            typedef ParallelMath::Float MFloat;
+
+            static void TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2]);
+            static void Quantize(MUInt15* color, int bits, int channels);
+            static void QuantizeP(MUInt15* color, int bits, uint16_t p, int channels);
+            static void Unquantize(MUInt15* color, int bits, int channels);
+            static void CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2]);
+            static void CompressEndpoints1(MUInt15 ep[2][4], uint16_t p);
+            static void CompressEndpoints2(MUInt15 ep[2][4]);
+            static void CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2]);
+            static void CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2]);
+            static void CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2]);
+            static void CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2]);
+            static void CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2]);
+            static void TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn);
+            static void TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn);
+            static void TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn);
+
+            template<class T>
+            static void Swap(T& a, T& b);
+        };
+
+
+        class BC6HComputer
+        {
+        public:
+            static void Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds);
+            static void UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned);
+
+        private:
+            typedef ParallelMath::Float MFloat;
+            typedef ParallelMath::SInt16 MSInt16;
+            typedef ParallelMath::UInt16 MUInt16;
+            typedef ParallelMath::UInt15 MUInt15;
+            typedef ParallelMath::AInt16 MAInt16;
+            typedef ParallelMath::SInt32 MSInt32;
+            typedef ParallelMath::UInt31 MUInt31;
+
+            static const int MaxTweakRounds = 4;
+            static const int MaxRefineRounds = 3;
+
+            static MSInt16 QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru);
+            static MUInt15 QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru);
+            static void UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL);
+            static void UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished);
+            static void QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn);
+            static void QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn);
+            static void EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal);
+            static void EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal);
+            static void SignExtendSingle(int &v, int bits);
+        };
+    }
+}
diff --git a/thirdparty/cvtt/ConvectionKernels_BC6H_IO.cpp b/thirdparty/cvtt/ConvectionKernels_BC6H_IO.cpp
new file mode 100644
index 0000000000..753b6f9000
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_BC6H_IO.cpp
@@ -0,0 +1,881 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-------------------------------------------------------------------------------------
+
+Portions based on DirectX Texture Library (DirectXTex)
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+
+http://go.microsoft.com/fwlink/?LinkId=248926
+*/
+#include "ConvectionKernels_Config.h"
+
+#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
+
+#include "ConvectionKernels_BC6H_IO.h"
+
+namespace cvtt
+{
+    namespace BC6H_IO
+    {
+        void WriteMode0(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x3u) | ((gy >> 2) & 0x4u) | ((by >> 1) & 0x8u) | (bz & 0x10u) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
+            encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
+        }
+
+        void WriteMode1(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x3u) | ((gy >> 3) & 0x4u) | ((gz >> 1) & 0x18u) | ((rw << 5) & 0xfe0u) | ((bz << 12) & 0x3000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x3f8000u) | ((by << 17) & 0x400000u) | ((bz << 21) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bz >> 3) & 0x1u) | ((bz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0x1f8u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x7e000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x1f800000u) | ((by << 29) & 0xe0000000u);
+            encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x7eu) | ((rz << 7) & 0x1f80u) | ((d << 13) & 0x3e000u);
+        }
+
+        void WriteMode2(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0xf8u) | ((rw >> 2) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x1e000u) | ((gw << 7) & 0x20000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x7800000u) | ((bw << 17) & 0x8000000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
+            encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
+        }
+
+        void WriteMode3(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x78u) | ((rw >> 3) & 0x80u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((gw << 8) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x7800000u) | ((bw << 17) & 0x8000000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
+            encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x1eu) | ((bz << 5) & 0x20u) | ((bz << 4) & 0x40u) | ((rz << 7) & 0x780u) | ((gy << 7) & 0x800u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
+        }
+
+        void WriteMode4(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x78u) | ((rw >> 3) & 0x80u) | ((by << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x1e000u) | ((gw << 7) & 0x20000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bw << 18) & 0x10000000u) | ((by << 29) & 0xe0000000u);
+            encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x1eu) | ((bz << 4) & 0x60u) | ((rz << 7) & 0x780u) | ((bz << 7) & 0x800u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
+        }
+
+        void WriteMode5(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x3fe0u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0xff8000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x3u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
+            encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
+        }
+
+        void WriteMode6(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x1fe0u) | ((gz << 9) & 0x2000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x7f8000u) | ((bz << 21) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x1u) | ((bz >> 2) & 0x6u) | ((rx << 3) & 0x1f8u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
+            encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x7eu) | ((rz << 7) & 0x1f80u) | ((d << 13) & 0x3e000u);
+        }
+
+        void WriteMode7(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x1fe0u) | ((bz << 13) & 0x2000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x7f8000u) | ((gy << 18) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x1u) | ((gz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x7e000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
+            encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
+        }
+
+        void WriteMode8(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x1fe0u) | ((bz << 12) & 0x2000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x7f8000u) | ((by << 18) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x1u) | ((bz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x1f800000u) | ((by << 29) & 0xe0000000u);
+            encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
+        }
+
+        void WriteMode9(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7e0u) | ((gz << 7) & 0x800u) | ((bz << 12) & 0x3000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x1f8000u) | ((gy << 16) & 0x200000u) | ((by << 17) & 0x400000u) | ((bz << 21) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0x7e000000u) | ((gz << 26) & 0x80000000u);
+            encoded[1] = ((bz >> 3) & 0x1u) | ((bz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0x1f8u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x7e000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x1f800000u) | ((by << 29) & 0xe0000000u);
+            encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x7eu) | ((rz << 7) & 0x1f80u) | ((d << 13) & 0x3e000u);
+        }
+
+        void WriteMode10(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x1ff8u) | ((gx << 13) & 0x7fe000u) | ((bx << 23) & 0xff800000u);
+            encoded[2] = ((bx >> 9) & 0x1u);
+        }
+
+        void WriteMode11(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0xff8u) | ((rw << 2) & 0x1000u) | ((gx << 13) & 0x3fe000u) | ((gw << 12) & 0x400000u) | ((bx << 23) & 0xff800000u);
+            encoded[2] = ((bw >> 10) & 0x1u);
+        }
+
+        void WriteMode12(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x7f8u) | (rw & 0x800u) | ((rw << 2) & 0x1000u) | ((gx << 13) & 0x1fe000u) | ((gw << 10) & 0x200000u) | ((gw << 12) & 0x400000u) | ((bx << 23) & 0x7f800000u) | ((bw << 20) & 0x80000000u);
+            encoded[2] = ((bw >> 10) & 0x1u);
+        }
+
+        void WriteMode13(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
+        {
+            encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
+            encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x78u) | ((rw >> 8) & 0x80u) | ((rw >> 6) & 0x100u) | ((rw >> 4) & 0x200u) | ((rw >> 2) & 0x400u) | (rw & 0x800u) | ((rw << 2) & 0x1000u) | ((gx << 13) & 0x1e000u) | ((gw << 2) & 0x20000u) | ((gw << 4) & 0x40000u) | ((gw << 6) & 0x80000u) | ((gw << 8) & 0x100000u) | ((gw << 10) & 0x200000u) | ((gw << 12) & 0x400000u) | ((bx << 23) & 0x7800000u) | ((bw << 12) & 0x8000000u) | ((bw << 14) & 0x10000000u) | ((bw << 16) & 0x20000000u) | ((bw << 18) & 0x40000000u) | ((bw << 20) & 0x80000000u);
+            encoded[2] = ((bw >> 10) & 0x1u);
+        }
+
+        void ReadMode0(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            gy |= ((encoded[0] << 2) & 0x10u);
+            by |= ((encoded[0] << 1) & 0x10u);
+            bz |= (encoded[0] & 0x10u);
+            rw |= ((encoded[0] >> 5) & 0x3ffu);
+            gw |= ((encoded[0] >> 15) & 0x3ffu);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x380u);
+            rx |= ((encoded[1] >> 3) & 0x1fu);
+            gz |= ((encoded[1] >> 4) & 0x10u);
+            gy |= ((encoded[1] >> 9) & 0xfu);
+            gx |= ((encoded[1] >> 13) & 0x1fu);
+            bz |= ((encoded[1] >> 18) & 0x1u);
+            gz |= ((encoded[1] >> 19) & 0xfu);
+            bx |= ((encoded[1] >> 23) & 0x1fu);
+            bz |= ((encoded[1] >> 27) & 0x2u);
+            by |= ((encoded[1] >> 29) & 0x7u);
+            by |= ((encoded[2] << 3) & 0x8u);
+            ry |= ((encoded[2] >> 1) & 0x1fu);
+            bz |= ((encoded[2] >> 4) & 0x4u);
+            rz |= ((encoded[2] >> 7) & 0x1fu);
+            bz |= ((encoded[2] >> 9) & 0x8u);
+            d |= ((encoded[2] >> 13) & 0x1fu);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode1(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            gy |= ((encoded[0] << 3) & 0x20u);
+            gz |= ((encoded[0] << 1) & 0x30u);
+            rw |= ((encoded[0] >> 5) & 0x7fu);
+            bz |= ((encoded[0] >> 12) & 0x3u);
+            by |= ((encoded[0] >> 10) & 0x10u);
+            gw |= ((encoded[0] >> 15) & 0x7fu);
+            by |= ((encoded[0] >> 17) & 0x20u);
+            bz |= ((encoded[0] >> 21) & 0x4u);
+            gy |= ((encoded[0] >> 20) & 0x10u);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bz |= ((encoded[1] << 3) & 0x8u);
+            bz |= ((encoded[1] << 4) & 0x20u);
+            bz |= ((encoded[1] << 2) & 0x10u);
+            rx |= ((encoded[1] >> 3) & 0x3fu);
+            gy |= ((encoded[1] >> 9) & 0xfu);
+            gx |= ((encoded[1] >> 13) & 0x3fu);
+            gz |= ((encoded[1] >> 19) & 0xfu);
+            bx |= ((encoded[1] >> 23) & 0x3fu);
+            by |= ((encoded[1] >> 29) & 0x7u);
+            by |= ((encoded[2] << 3) & 0x8u);
+            ry |= ((encoded[2] >> 1) & 0x3fu);
+            rz |= ((encoded[2] >> 7) & 0x3fu);
+            d |= ((encoded[2] >> 13) & 0x1fu);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode2(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0x3ffu);
+            gw |= ((encoded[0] >> 15) & 0x3ffu);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x380u);
+            rx |= ((encoded[1] >> 3) & 0x1fu);
+            rw |= ((encoded[1] << 2) & 0x400u);
+            gy |= ((encoded[1] >> 9) & 0xfu);
+            gx |= ((encoded[1] >> 13) & 0xfu);
+            gw |= ((encoded[1] >> 7) & 0x400u);
+            bz |= ((encoded[1] >> 18) & 0x1u);
+            gz |= ((encoded[1] >> 19) & 0xfu);
+            bx |= ((encoded[1] >> 23) & 0xfu);
+            bw |= ((encoded[1] >> 17) & 0x400u);
+            bz |= ((encoded[1] >> 27) & 0x2u);
+            by |= ((encoded[1] >> 29) & 0x7u);
+            by |= ((encoded[2] << 3) & 0x8u);
+            ry |= ((encoded[2] >> 1) & 0x1fu);
+            bz |= ((encoded[2] >> 4) & 0x4u);
+            rz |= ((encoded[2] >> 7) & 0x1fu);
+            bz |= ((encoded[2] >> 9) & 0x8u);
+            d |= ((encoded[2] >> 13) & 0x1fu);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode3(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0x3ffu);
+            gw |= ((encoded[0] >> 15) & 0x3ffu);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x380u);
+            rx |= ((encoded[1] >> 3) & 0xfu);
+            rw |= ((encoded[1] << 3) & 0x400u);
+            gz |= ((encoded[1] >> 4) & 0x10u);
+            gy |= ((encoded[1] >> 9) & 0xfu);
+            gx |= ((encoded[1] >> 13) & 0x1fu);
+            gw |= ((encoded[1] >> 8) & 0x400u);
+            gz |= ((encoded[1] >> 19) & 0xfu);
+            bx |= ((encoded[1] >> 23) & 0xfu);
+            bw |= ((encoded[1] >> 17) & 0x400u);
+            bz |= ((encoded[1] >> 27) & 0x2u);
+            by |= ((encoded[1] >> 29) & 0x7u);
+            by |= ((encoded[2] << 3) & 0x8u);
+            ry |= ((encoded[2] >> 1) & 0xfu);
+            bz |= ((encoded[2] >> 5) & 0x1u);
+            bz |= ((encoded[2] >> 4) & 0x4u);
+            rz |= ((encoded[2] >> 7) & 0xfu);
+            gy |= ((encoded[2] >> 7) & 0x10u);
+            bz |= ((encoded[2] >> 9) & 0x8u);
+            d |= ((encoded[2] >> 13) & 0x1fu);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode4(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0x3ffu);
+            gw |= ((encoded[0] >> 15) & 0x3ffu);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x380u);
+            rx |= ((encoded[1] >> 3) & 0xfu);
+            rw |= ((encoded[1] << 3) & 0x400u);
+            by |= ((encoded[1] >> 4) & 0x10u);
+            gy |= ((encoded[1] >> 9) & 0xfu);
+            gx |= ((encoded[1] >> 13) & 0xfu);
+            gw |= ((encoded[1] >> 7) & 0x400u);
+            bz |= ((encoded[1] >> 18) & 0x1u);
+            gz |= ((encoded[1] >> 19) & 0xfu);
+            bx |= ((encoded[1] >> 23) & 0x1fu);
+            bw |= ((encoded[1] >> 18) & 0x400u);
+            by |= ((encoded[1] >> 29) & 0x7u);
+            by |= ((encoded[2] << 3) & 0x8u);
+            ry |= ((encoded[2] >> 1) & 0xfu);
+            bz |= ((encoded[2] >> 4) & 0x6u);
+            rz |= ((encoded[2] >> 7) & 0xfu);
+            bz |= ((encoded[2] >> 7) & 0x10u);
+            bz |= ((encoded[2] >> 9) & 0x8u);
+            d |= ((encoded[2] >> 13) & 0x1fu);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode5(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0x1ffu);
+            by |= ((encoded[0] >> 10) & 0x10u);
+            gw |= ((encoded[0] >> 15) & 0x1ffu);
+            gy |= ((encoded[0] >> 20) & 0x10u);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x180u);
+            bz |= ((encoded[1] << 2) & 0x10u);
+            rx |= ((encoded[1] >> 3) & 0x1fu);
+            gz |= ((encoded[1] >> 4) & 0x10u);
+            gy |= ((encoded[1] >> 9) & 0xfu);
+            gx |= ((encoded[1] >> 13) & 0x1fu);
+            bz |= ((encoded[1] >> 18) & 0x1u);
+            gz |= ((encoded[1] >> 19) & 0xfu);
+            bx |= ((encoded[1] >> 23) & 0x1fu);
+            bz |= ((encoded[1] >> 27) & 0x2u);
+            by |= ((encoded[1] >> 29) & 0x7u);
+            by |= ((encoded[2] << 3) & 0x8u);
+            ry |= ((encoded[2] >> 1) & 0x1fu);
+            bz |= ((encoded[2] >> 4) & 0x4u);
+            rz |= ((encoded[2] >> 7) & 0x1fu);
+            bz |= ((encoded[2] >> 9) & 0x8u);
+            d |= ((encoded[2] >> 13) & 0x1fu);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode6(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0xffu);
+            gz |= ((encoded[0] >> 9) & 0x10u);
+            by |= ((encoded[0] >> 10) & 0x10u);
+            gw |= ((encoded[0] >> 15) & 0xffu);
+            bz |= ((encoded[0] >> 21) & 0x4u);
+            gy |= ((encoded[0] >> 20) & 0x10u);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x80u);
+            bz |= ((encoded[1] << 2) & 0x18u);
+            rx |= ((encoded[1] >> 3) & 0x3fu);
+            gy |= ((encoded[1] >> 9) & 0xfu);
+            gx |= ((encoded[1] >> 13) & 0x1fu);
+            bz |= ((encoded[1] >> 18) & 0x1u);
+            gz |= ((encoded[1] >> 19) & 0xfu);
+            bx |= ((encoded[1] >> 23) & 0x1fu);
+            bz |= ((encoded[1] >> 27) & 0x2u);
+            by |= ((encoded[1] >> 29) & 0x7u);
+            by |= ((encoded[2] << 3) & 0x8u);
+            ry |= ((encoded[2] >> 1) & 0x3fu);
+            rz |= ((encoded[2] >> 7) & 0x3fu);
+            d |= ((encoded[2] >> 13) & 0x1fu);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode7(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0xffu);
+            bz |= ((encoded[0] >> 13) & 0x1u);
+            by |= ((encoded[0] >> 10) & 0x10u);
+            gw |= ((encoded[0] >> 15) & 0xffu);
+            gy |= ((encoded[0] >> 18) & 0x20u);
+            gy |= ((encoded[0] >> 20) & 0x10u);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x80u);
+            gz |= ((encoded[1] << 4) & 0x20u);
+            bz |= ((encoded[1] << 2) & 0x10u);
+            rx |= ((encoded[1] >> 3) & 0x1fu);
+            gz |= ((encoded[1] >> 4) & 0x10u);
+            gy |= ((encoded[1] >> 9) & 0xfu);
+            gx |= ((encoded[1] >> 13) & 0x3fu);
+            gz |= ((encoded[1] >> 19) & 0xfu);
+            bx |= ((encoded[1] >> 23) & 0x1fu);
+            bz |= ((encoded[1] >> 27) & 0x2u);
+            by |= ((encoded[1] >> 29) & 0x7u);
+            by |= ((encoded[2] << 3) & 0x8u);
+            ry |= ((encoded[2] >> 1) & 0x1fu);
+            bz |= ((encoded[2] >> 4) & 0x4u);
+            rz |= ((encoded[2] >> 7) & 0x1fu);
+            bz |= ((encoded[2] >> 9) & 0x8u);
+            d |= ((encoded[2] >> 13) & 0x1fu);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode8(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0xffu);
+            bz |= ((encoded[0] >> 12) & 0x2u);
+            by |= ((encoded[0] >> 10) & 0x10u);
+            gw |= ((encoded[0] >> 15) & 0xffu);
+            by |= ((encoded[0] >> 18) & 0x20u);
+            gy |= ((encoded[0] >> 20) & 0x10u);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x80u);
+            bz |= ((encoded[1] << 4) & 0x20u);
+            bz |= ((encoded[1] << 2) & 0x10u);
+            rx |= ((encoded[1] >> 3) & 0x1fu);
+            gz |= ((encoded[1] >> 4) & 0x10u);
+            gy |= ((encoded[1] >> 9) & 0xfu);
+            gx |= ((encoded[1] >> 13) & 0x1fu);
+            bz |= ((encoded[1] >> 18) & 0x1u);
+            gz |= ((encoded[1] >> 19) & 0xfu);
+            bx |= ((encoded[1] >> 23) & 0x3fu);
+            by |= ((encoded[1] >> 29) & 0x7u);
+            by |= ((encoded[2] << 3) & 0x8u);
+            ry |= ((encoded[2] >> 1) & 0x1fu);
+            bz |= ((encoded[2] >> 4) & 0x4u);
+            rz |= ((encoded[2] >> 7) & 0x1fu);
+            bz |= ((encoded[2] >> 9) & 0x8u);
+            d |= ((encoded[2] >> 13) & 0x1fu);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode9(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0x3fu);
+            gz |= ((encoded[0] >> 7) & 0x10u);
+            bz |= ((encoded[0] >> 12) & 0x3u);
+            by |= ((encoded[0] >> 10) & 0x10u);
+            gw |= ((encoded[0] >> 15) & 0x3fu);
+            gy |= ((encoded[0] >> 16) & 0x20u);
+            by |= ((encoded[0] >> 17) & 0x20u);
+            bz |= ((encoded[0] >> 21) & 0x4u);
+            gy |= ((encoded[0] >> 20) & 0x10u);
+            bw |= ((encoded[0] >> 25) & 0x3fu);
+            gz |= ((encoded[0] >> 26) & 0x20u);
+            bz |= ((encoded[1] << 3) & 0x8u);
+            bz |= ((encoded[1] << 4) & 0x20u);
+            bz |= ((encoded[1] << 2) & 0x10u);
+            rx |= ((encoded[1] >> 3) & 0x3fu);
+            gy |= ((encoded[1] >> 9) & 0xfu);
+            gx |= ((encoded[1] >> 13) & 0x3fu);
+            gz |= ((encoded[1] >> 19) & 0xfu);
+            bx |= ((encoded[1] >> 23) & 0x3fu);
+            by |= ((encoded[1] >> 29) & 0x7u);
+            by |= ((encoded[2] << 3) & 0x8u);
+            ry |= ((encoded[2] >> 1) & 0x3fu);
+            rz |= ((encoded[2] >> 7) & 0x3fu);
+            d |= ((encoded[2] >> 13) & 0x1fu);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode10(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0x3ffu);
+            gw |= ((encoded[0] >> 15) & 0x3ffu);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x380u);
+            rx |= ((encoded[1] >> 3) & 0x3ffu);
+            gx |= ((encoded[1] >> 13) & 0x3ffu);
+            bx |= ((encoded[1] >> 23) & 0x1ffu);
+            bx |= ((encoded[2] << 9) & 0x200u);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode11(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0x3ffu);
+            gw |= ((encoded[0] >> 15) & 0x3ffu);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x380u);
+            rx |= ((encoded[1] >> 3) & 0x1ffu);
+            rw |= ((encoded[1] >> 2) & 0x400u);
+            gx |= ((encoded[1] >> 13) & 0x1ffu);
+            gw |= ((encoded[1] >> 12) & 0x400u);
+            bx |= ((encoded[1] >> 23) & 0x1ffu);
+            bw |= ((encoded[2] << 10) & 0x400u);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode12(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0x3ffu);
+            gw |= ((encoded[0] >> 15) & 0x3ffu);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x380u);
+            rx |= ((encoded[1] >> 3) & 0xffu);
+            rw |= (encoded[1] & 0x800u);
+            rw |= ((encoded[1] >> 2) & 0x400u);
+            gx |= ((encoded[1] >> 13) & 0xffu);
+            gw |= ((encoded[1] >> 10) & 0x800u);
+            gw |= ((encoded[1] >> 12) & 0x400u);
+            bx |= ((encoded[1] >> 23) & 0xffu);
+            bw |= ((encoded[1] >> 20) & 0x800u);
+            bw |= ((encoded[2] << 10) & 0x400u);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        void ReadMode13(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
+        {
+            uint16_t d = 0;
+            uint16_t rw = 0;
+            uint16_t rx = 0;
+            uint16_t ry = 0;
+            uint16_t rz = 0;
+            uint16_t gw = 0;
+            uint16_t gx = 0;
+            uint16_t gy = 0;
+            uint16_t gz = 0;
+            uint16_t bw = 0;
+            uint16_t bx = 0;
+            uint16_t by = 0;
+            uint16_t bz = 0;
+            rw |= ((encoded[0] >> 5) & 0x3ffu);
+            gw |= ((encoded[0] >> 15) & 0x3ffu);
+            bw |= ((encoded[0] >> 25) & 0x7fu);
+            bw |= ((encoded[1] << 7) & 0x380u);
+            rx |= ((encoded[1] >> 3) & 0xfu);
+            rw |= ((encoded[1] << 8) & 0x8000u);
+            rw |= ((encoded[1] << 6) & 0x4000u);
+            rw |= ((encoded[1] << 4) & 0x2000u);
+            rw |= ((encoded[1] << 2) & 0x1000u);
+            rw |= (encoded[1] & 0x800u);
+            rw |= ((encoded[1] >> 2) & 0x400u);
+            gx |= ((encoded[1] >> 13) & 0xfu);
+            gw |= ((encoded[1] >> 2) & 0x8000u);
+            gw |= ((encoded[1] >> 4) & 0x4000u);
+            gw |= ((encoded[1] >> 6) & 0x2000u);
+            gw |= ((encoded[1] >> 8) & 0x1000u);
+            gw |= ((encoded[1] >> 10) & 0x800u);
+            gw |= ((encoded[1] >> 12) & 0x400u);
+            bx |= ((encoded[1] >> 23) & 0xfu);
+            bw |= ((encoded[1] >> 12) & 0x8000u);
+            bw |= ((encoded[1] >> 14) & 0x4000u);
+            bw |= ((encoded[1] >> 16) & 0x2000u);
+            bw |= ((encoded[1] >> 18) & 0x1000u);
+            bw |= ((encoded[1] >> 20) & 0x800u);
+            bw |= ((encoded[2] << 10) & 0x400u);
+            outD = d;
+            outRW = rw;
+            outRX = rx;
+            outRY = ry;
+            outRZ = rz;
+            outGW = gw;
+            outGX = gx;
+            outGY = gy;
+            outGZ = gz;
+            outBW = bw;
+            outBX = bx;
+            outBY = by;
+            outBZ = bz;
+        }
+
+        const ReadFunc_t g_readFuncs[14] =
+        {
+            ReadMode0,
+            ReadMode1,
+            ReadMode2,
+            ReadMode3,
+            ReadMode4,
+            ReadMode5,
+            ReadMode6,
+            ReadMode7,
+            ReadMode8,
+            ReadMode9,
+            ReadMode10,
+            ReadMode11,
+            ReadMode12,
+            ReadMode13
+        };
+
+        const WriteFunc_t g_writeFuncs[14] =
+        {
+            WriteMode0,
+            WriteMode1,
+            WriteMode2,
+            WriteMode3,
+            WriteMode4,
+            WriteMode5,
+            WriteMode6,
+            WriteMode7,
+            WriteMode8,
+            WriteMode9,
+            WriteMode10,
+            WriteMode11,
+            WriteMode12,
+            WriteMode13
+        };
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_BC6H_IO.h b/thirdparty/cvtt/ConvectionKernels_BC6H_IO.h
new file mode 100644
index 0000000000..a7bb517b54
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_BC6H_IO.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <stdint.h>
+#include "ConvectionKernels_BC6H_IO.h"
+
+namespace cvtt
+{
+    namespace BC6H_IO
+    {
+        typedef void (*ReadFunc_t)(const uint32_t *encoded, uint16_t &d, uint16_t &rw, uint16_t &rx, uint16_t &ry, uint16_t &rz, uint16_t &gw, uint16_t &gx, uint16_t &gy, uint16_t &gz, uint16_t &bw, uint16_t &bx, uint16_t &by, uint16_t &bz);
+        typedef void (*WriteFunc_t)(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz);
+
+        extern const ReadFunc_t g_readFuncs[14];
+        extern const WriteFunc_t g_writeFuncs[14];
+    }
+}
diff --git a/thirdparty/cvtt/ConvectionKernels_BC7_Prio.h b/thirdparty/cvtt/ConvectionKernels_BC7_Prio.h
new file mode 100644
index 0000000000..1880e22d0f
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_BC7_Prio.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <stdint.h>
+
+namespace cvtt { namespace Tables { namespace BC7Prio {
+    extern const uint16_t *g_bc7PrioCodesRGB;
+    extern const int g_bc7NumPrioCodesRGB;
+
+    extern const uint16_t *g_bc7PrioCodesRGBA;
+    extern const int g_bc7NumPrioCodesRGBA;
+
+    int UnpackMode(uint16_t packed);
+    int UnpackSeedPointCount(uint16_t packed);
+    int UnpackPartition(uint16_t packed);
+    int UnpackRotation(uint16_t packed);
+    int UnpackIndexSelector(uint16_t packed);
+}}}
diff --git a/thirdparty/cvtt/ConvectionKernels_BC7_PrioData.cpp b/thirdparty/cvtt/ConvectionKernels_BC7_PrioData.cpp
new file mode 100644
index 0000000000..5b3134f860
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_BC7_PrioData.cpp
@@ -0,0 +1,1301 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-------------------------------------------------------------------------------------
+
+Portions based on DirectX Texture Library (DirectXTex)
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+
+http://go.microsoft.com/fwlink/?LinkId=248926
+*/
+#include "ConvectionKernels_Config.h"
+
+#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
+
+#include "ConvectionKernels_BC7_Prio.h"
+
+#define BC7_PARTITION_BITS  6
+#define BC7_PARTITION_OFFSET_BITS  0
+
+#define BC7_ROTATION_BITS   2
+#define BC7_ROTATION_OFFSET_BITS    0
+
+#define BC7_INDEX_MODE_BITS 1
+#define BC7_INDEX_MODE_OFFSET_BITS (BC7_ROTATION_OFFSET_BITS + BC7_ROTATION_BITS)
+
+#define BC7_MODE_BITS 3
+#define BC7_MODE_OFFSET_BITS (BC7_PARTITION_OFFSET_BITS + BC7_PARTITION_BITS)
+#define BC7_SEED_POINT_COUNT_BITS  2
+#define BC7_SEED_POINT_COUNT_OFFSET_BITS  (BC7_MODE_BITS + BC7_MODE_OFFSET_BITS)
+
+
+
+#define BC7_MODE_PRIO_DUAL_PLANE(subData)   \
+    ( \
+        ((subData / 10) << BC7_ROTATION_OFFSET_BITS) | \
+        ((subData % 10) << BC7_INDEX_MODE_OFFSET_BITS) \
+    )
+
+#define BC7_MODE_PRIO_CODE(seedPointCount, mode, subData)   \
+    (\
+        ((seedPointCount - 1) << BC7_SEED_POINT_COUNT_OFFSET_BITS) |  \
+        (mode << BC7_MODE_OFFSET_BITS) |   \
+        ((mode == 4 || mode == 5) ? BC7_MODE_PRIO_DUAL_PLANE(subData) : (subData << BC7_PARTITION_OFFSET_BITS)) \
+    )
+
+namespace cvtt { namespace Tables { namespace BC7Prio {
+    const uint16_t g_bc7PrioCodesRGBData[] =
+    {
+        BC7_MODE_PRIO_CODE(1, 1, 13),
+        BC7_MODE_PRIO_CODE(1, 1, 0),
+        BC7_MODE_PRIO_CODE(1, 0, 3),
+        BC7_MODE_PRIO_CODE(1, 0, 1),
+        BC7_MODE_PRIO_CODE(1, 6, 0),
+        BC7_MODE_PRIO_CODE(1, 0, 9),
+        BC7_MODE_PRIO_CODE(1, 1, 6),
+        BC7_MODE_PRIO_CODE(1, 1, 1),
+        BC7_MODE_PRIO_CODE(1, 1, 2),
+        BC7_MODE_PRIO_CODE(1, 0, 15),
+        BC7_MODE_PRIO_CODE(1, 1, 7),
+        BC7_MODE_PRIO_CODE(1, 1, 16),
+        BC7_MODE_PRIO_CODE(1, 1, 15),
+        BC7_MODE_PRIO_CODE(1, 1, 14),
+        BC7_MODE_PRIO_CODE(1, 0, 13),
+        BC7_MODE_PRIO_CODE(1, 0, 14),
+        BC7_MODE_PRIO_CODE(1, 0, 11),
+        BC7_MODE_PRIO_CODE(1, 1, 22),
+        BC7_MODE_PRIO_CODE(1, 0, 8),
+        BC7_MODE_PRIO_CODE(1, 0, 10),
+        BC7_MODE_PRIO_CODE(1, 1, 8),
+        BC7_MODE_PRIO_CODE(1, 3, 13),
+        BC7_MODE_PRIO_CODE(1, 1, 19),
+        BC7_MODE_PRIO_CODE(1, 4, 31),
+        BC7_MODE_PRIO_CODE(1, 1, 10),
+        BC7_MODE_PRIO_CODE(1, 1, 23),
+        BC7_MODE_PRIO_CODE(1, 1, 3),
+        BC7_MODE_PRIO_CODE(2, 1, 13),
+        BC7_MODE_PRIO_CODE(1, 1, 9),
+        BC7_MODE_PRIO_CODE(2, 1, 0),
+        BC7_MODE_PRIO_CODE(1, 1, 20),
+        BC7_MODE_PRIO_CODE(1, 1, 21),
+        BC7_MODE_PRIO_CODE(1, 4, 11),
+        BC7_MODE_PRIO_CODE(1, 1, 29),
+        BC7_MODE_PRIO_CODE(1, 1, 26),
+        BC7_MODE_PRIO_CODE(1, 5, 30),
+        BC7_MODE_PRIO_CODE(1, 0, 4),
+        BC7_MODE_PRIO_CODE(2, 6, 0),
+        BC7_MODE_PRIO_CODE(1, 0, 0),
+        BC7_MODE_PRIO_CODE(2, 0, 10),
+        BC7_MODE_PRIO_CODE(3, 6, 0),
+        BC7_MODE_PRIO_CODE(1, 1, 11),
+        BC7_MODE_PRIO_CODE(1, 4, 10),
+        BC7_MODE_PRIO_CODE(2, 0, 8),
+        BC7_MODE_PRIO_CODE(2, 0, 11),
+        BC7_MODE_PRIO_CODE(2, 0, 13),
+        BC7_MODE_PRIO_CODE(1, 1, 4),
+        BC7_MODE_PRIO_CODE(3, 1, 13),
+        BC7_MODE_PRIO_CODE(1, 1, 12),
+        BC7_MODE_PRIO_CODE(1, 1, 18),
+        BC7_MODE_PRIO_CODE(1, 3, 0),
+        BC7_MODE_PRIO_CODE(1, 0, 5),
+        BC7_MODE_PRIO_CODE(1, 1, 17),
+        BC7_MODE_PRIO_CODE(1, 1, 25),
+        BC7_MODE_PRIO_CODE(1, 0, 7),
+        BC7_MODE_PRIO_CODE(3, 0, 10),
+        BC7_MODE_PRIO_CODE(1, 1, 5),
+        BC7_MODE_PRIO_CODE(2, 1, 10),
+        BC7_MODE_PRIO_CODE(1, 1, 24),
+        BC7_MODE_PRIO_CODE(3, 0, 8),
+        BC7_MODE_PRIO_CODE(3, 1, 0),
+        BC7_MODE_PRIO_CODE(2, 1, 15),
+        BC7_MODE_PRIO_CODE(2, 1, 14),
+        BC7_MODE_PRIO_CODE(3, 0, 13),
+        BC7_MODE_PRIO_CODE(3, 0, 11),
+        BC7_MODE_PRIO_CODE(2, 1, 16),
+        BC7_MODE_PRIO_CODE(2, 0, 14),
+        BC7_MODE_PRIO_CODE(2, 1, 3),
+        BC7_MODE_PRIO_CODE(4, 0, 10),
+        BC7_MODE_PRIO_CODE(2, 1, 1),
+        BC7_MODE_PRIO_CODE(1, 0, 2),
+        BC7_MODE_PRIO_CODE(2, 1, 2),
+        BC7_MODE_PRIO_CODE(4, 0, 8),
+        BC7_MODE_PRIO_CODE(1, 0, 12),
+        BC7_MODE_PRIO_CODE(4, 1, 13),
+        BC7_MODE_PRIO_CODE(1, 5, 10),
+        BC7_MODE_PRIO_CODE(2, 0, 15),
+        BC7_MODE_PRIO_CODE(1, 0, 6),
+        BC7_MODE_PRIO_CODE(1, 1, 35),
+        BC7_MODE_PRIO_CODE(2, 1, 23),
+        BC7_MODE_PRIO_CODE(4, 0, 13),
+        BC7_MODE_PRIO_CODE(4, 0, 11),
+        BC7_MODE_PRIO_CODE(1, 2, 17),
+        BC7_MODE_PRIO_CODE(2, 1, 6),
+        BC7_MODE_PRIO_CODE(2, 1, 7),
+        BC7_MODE_PRIO_CODE(4, 6, 0),
+        BC7_MODE_PRIO_CODE(1, 2, 16),
+        BC7_MODE_PRIO_CODE(2, 1, 19),
+        BC7_MODE_PRIO_CODE(1, 1, 30),
+        BC7_MODE_PRIO_CODE(2, 3, 13),
+        BC7_MODE_PRIO_CODE(3, 0, 14),
+        BC7_MODE_PRIO_CODE(2, 1, 29),
+        BC7_MODE_PRIO_CODE(2, 1, 21),
+        BC7_MODE_PRIO_CODE(4, 1, 0),
+        BC7_MODE_PRIO_CODE(3, 0, 15),
+        BC7_MODE_PRIO_CODE(2, 0, 3),
+        BC7_MODE_PRIO_CODE(1, 1, 28),
+        BC7_MODE_PRIO_CODE(1, 4, 30),
+        BC7_MODE_PRIO_CODE(2, 0, 4),
+        BC7_MODE_PRIO_CODE(1, 2, 63),
+        BC7_MODE_PRIO_CODE(4, 0, 14),
+        BC7_MODE_PRIO_CODE(2, 1, 26),
+        BC7_MODE_PRIO_CODE(2, 0, 1),
+        BC7_MODE_PRIO_CODE(3, 0, 3),
+        BC7_MODE_PRIO_CODE(1, 1, 61),
+        BC7_MODE_PRIO_CODE(2, 0, 7),
+        BC7_MODE_PRIO_CODE(2, 0, 5),
+        BC7_MODE_PRIO_CODE(3, 1, 10),
+        BC7_MODE_PRIO_CODE(2, 4, 31),
+        BC7_MODE_PRIO_CODE(2, 0, 9),
+        BC7_MODE_PRIO_CODE(2, 1, 11),
+        BC7_MODE_PRIO_CODE(4, 0, 15),
+        BC7_MODE_PRIO_CODE(3, 1, 14),
+        BC7_MODE_PRIO_CODE(2, 0, 0),
+        BC7_MODE_PRIO_CODE(3, 1, 15),
+        BC7_MODE_PRIO_CODE(2, 3, 0),
+        BC7_MODE_PRIO_CODE(3, 0, 1),
+        BC7_MODE_PRIO_CODE(1, 1, 60),
+        BC7_MODE_PRIO_CODE(2, 1, 12),
+        BC7_MODE_PRIO_CODE(3, 1, 1),
+        BC7_MODE_PRIO_CODE(3, 0, 5),
+        BC7_MODE_PRIO_CODE(1, 1, 27),
+        BC7_MODE_PRIO_CODE(2, 1, 18),
+        BC7_MODE_PRIO_CODE(3, 0, 9),
+        BC7_MODE_PRIO_CODE(3, 1, 3),
+        BC7_MODE_PRIO_CODE(2, 0, 2),
+        BC7_MODE_PRIO_CODE(3, 1, 16),
+        BC7_MODE_PRIO_CODE(3, 1, 2),
+        BC7_MODE_PRIO_CODE(1, 1, 31),
+        BC7_MODE_PRIO_CODE(3, 0, 7),
+        BC7_MODE_PRIO_CODE(2, 1, 17),
+        BC7_MODE_PRIO_CODE(1, 5, 20),
+        BC7_MODE_PRIO_CODE(2, 1, 4),
+        BC7_MODE_PRIO_CODE(1, 1, 62),
+        BC7_MODE_PRIO_CODE(2, 0, 12),
+        BC7_MODE_PRIO_CODE(3, 0, 4),
+        BC7_MODE_PRIO_CODE(4, 0, 4),
+        BC7_MODE_PRIO_CODE(1, 1, 33),
+        BC7_MODE_PRIO_CODE(3, 1, 23),
+        BC7_MODE_PRIO_CODE(2, 1, 5),
+        BC7_MODE_PRIO_CODE(2, 0, 6),
+        BC7_MODE_PRIO_CODE(2, 1, 24),
+        BC7_MODE_PRIO_CODE(1, 1, 59),
+        BC7_MODE_PRIO_CODE(1, 1, 63),
+        BC7_MODE_PRIO_CODE(3, 0, 0),
+        BC7_MODE_PRIO_CODE(1, 1, 52),
+        BC7_MODE_PRIO_CODE(4, 0, 7),
+        BC7_MODE_PRIO_CODE(2, 1, 22),
+        BC7_MODE_PRIO_CODE(4, 0, 3),
+        BC7_MODE_PRIO_CODE(1, 2, 10),
+        BC7_MODE_PRIO_CODE(3, 1, 7),
+        BC7_MODE_PRIO_CODE(4, 0, 9),
+        BC7_MODE_PRIO_CODE(2, 1, 8),
+        BC7_MODE_PRIO_CODE(4, 0, 1),
+        BC7_MODE_PRIO_CODE(3, 0, 12),
+        BC7_MODE_PRIO_CODE(4, 0, 5),
+        BC7_MODE_PRIO_CODE(3, 1, 6),
+        BC7_MODE_PRIO_CODE(4, 1, 14),
+        BC7_MODE_PRIO_CODE(1, 3, 15),
+        BC7_MODE_PRIO_CODE(1, 1, 56),
+        BC7_MODE_PRIO_CODE(3, 0, 6),
+        BC7_MODE_PRIO_CODE(3, 0, 2),
+        BC7_MODE_PRIO_CODE(1, 1, 32),
+        BC7_MODE_PRIO_CODE(4, 1, 10),
+        BC7_MODE_PRIO_CODE(1, 2, 8),
+        BC7_MODE_PRIO_CODE(2, 1, 9),
+        BC7_MODE_PRIO_CODE(1, 2, 18),
+        BC7_MODE_PRIO_CODE(4, 1, 15),
+        BC7_MODE_PRIO_CODE(4, 0, 6),
+        BC7_MODE_PRIO_CODE(3, 1, 29),
+        BC7_MODE_PRIO_CODE(2, 1, 25),
+        BC7_MODE_PRIO_CODE(3, 4, 31),
+        BC7_MODE_PRIO_CODE(3, 3, 13),
+        BC7_MODE_PRIO_CODE(4, 0, 0),
+        BC7_MODE_PRIO_CODE(3, 1, 19),
+        BC7_MODE_PRIO_CODE(4, 0, 12),
+        BC7_MODE_PRIO_CODE(4, 1, 1),
+        BC7_MODE_PRIO_CODE(4, 0, 2),
+        BC7_MODE_PRIO_CODE(1, 3, 2),
+        BC7_MODE_PRIO_CODE(1, 2, 13),
+        BC7_MODE_PRIO_CODE(1, 1, 58),
+        BC7_MODE_PRIO_CODE(1, 3, 14),
+        BC7_MODE_PRIO_CODE(4, 1, 3),
+        BC7_MODE_PRIO_CODE(3, 1, 21),
+        BC7_MODE_PRIO_CODE(2, 2, 8),
+        BC7_MODE_PRIO_CODE(1, 2, 19),
+        BC7_MODE_PRIO_CODE(4, 1, 16),
+        BC7_MODE_PRIO_CODE(4, 1, 2),
+        BC7_MODE_PRIO_CODE(2, 2, 16),
+        BC7_MODE_PRIO_CODE(2, 2, 10),
+        BC7_MODE_PRIO_CODE(2, 1, 20),
+        BC7_MODE_PRIO_CODE(1, 2, 11),
+        BC7_MODE_PRIO_CODE(1, 1, 54),
+        BC7_MODE_PRIO_CODE(1, 1, 47),
+        BC7_MODE_PRIO_CODE(1, 3, 1),
+        BC7_MODE_PRIO_CODE(1, 2, 21),
+        BC7_MODE_PRIO_CODE(1, 2, 62),
+        BC7_MODE_PRIO_CODE(2, 2, 11),
+        BC7_MODE_PRIO_CODE(3, 1, 26),
+        BC7_MODE_PRIO_CODE(1, 1, 53),
+        BC7_MODE_PRIO_CODE(2, 1, 35),
+        BC7_MODE_PRIO_CODE(2, 2, 13),
+        BC7_MODE_PRIO_CODE(4, 1, 23),
+        BC7_MODE_PRIO_CODE(4, 1, 6),
+        BC7_MODE_PRIO_CODE(4, 1, 7),
+        BC7_MODE_PRIO_CODE(1, 2, 25),
+        BC7_MODE_PRIO_CODE(1, 1, 57),
+        BC7_MODE_PRIO_CODE(2, 1, 60),
+        BC7_MODE_PRIO_CODE(1, 2, 20),
+        BC7_MODE_PRIO_CODE(3, 1, 8),
+        BC7_MODE_PRIO_CODE(4, 1, 29),
+        BC7_MODE_PRIO_CODE(4, 1, 19),
+        BC7_MODE_PRIO_CODE(3, 2, 8),
+        BC7_MODE_PRIO_CODE(2, 4, 11),
+        BC7_MODE_PRIO_CODE(4, 1, 21),
+        BC7_MODE_PRIO_CODE(3, 2, 10),
+        BC7_MODE_PRIO_CODE(2, 1, 61),
+        BC7_MODE_PRIO_CODE(2, 1, 30),
+        BC7_MODE_PRIO_CODE(3, 1, 12),
+        BC7_MODE_PRIO_CODE(3, 1, 11),
+        BC7_MODE_PRIO_CODE(2, 1, 63),
+        BC7_MODE_PRIO_CODE(2, 3, 1),
+        BC7_MODE_PRIO_CODE(2, 1, 28),
+        BC7_MODE_PRIO_CODE(2, 1, 62),
+        BC7_MODE_PRIO_CODE(3, 2, 13),
+        BC7_MODE_PRIO_CODE(2, 2, 63),
+        BC7_MODE_PRIO_CODE(2, 1, 33),
+        BC7_MODE_PRIO_CODE(2, 4, 10),
+        BC7_MODE_PRIO_CODE(3, 1, 18),
+        BC7_MODE_PRIO_CODE(2, 5, 30),
+        BC7_MODE_PRIO_CODE(3, 1, 5),
+        BC7_MODE_PRIO_CODE(2, 2, 17),
+        BC7_MODE_PRIO_CODE(1, 1, 55),
+        BC7_MODE_PRIO_CODE(3, 1, 17),
+        BC7_MODE_PRIO_CODE(2, 3, 2),
+        BC7_MODE_PRIO_CODE(1, 4, 21),
+        BC7_MODE_PRIO_CODE(3, 2, 11),
+        BC7_MODE_PRIO_CODE(4, 1, 11),
+        BC7_MODE_PRIO_CODE(2, 1, 27),
+        BC7_MODE_PRIO_CODE(1, 2, 59),
+        BC7_MODE_PRIO_CODE(4, 1, 26),
+        BC7_MODE_PRIO_CODE(3, 1, 9),
+        BC7_MODE_PRIO_CODE(2, 3, 14),
+        BC7_MODE_PRIO_CODE(3, 1, 4),
+        BC7_MODE_PRIO_CODE(3, 1, 24),
+        BC7_MODE_PRIO_CODE(3, 1, 25),
+        BC7_MODE_PRIO_CODE(3, 3, 0),
+        BC7_MODE_PRIO_CODE(3, 4, 11),
+        BC7_MODE_PRIO_CODE(4, 1, 12),
+        BC7_MODE_PRIO_CODE(2, 1, 32),
+        BC7_MODE_PRIO_CODE(2, 3, 15),
+        BC7_MODE_PRIO_CODE(4, 2, 10),
+        BC7_MODE_PRIO_CODE(1, 2, 60),
+        BC7_MODE_PRIO_CODE(1, 2, 32),
+        BC7_MODE_PRIO_CODE(1, 1, 40),
+        BC7_MODE_PRIO_CODE(4, 1, 18),
+        BC7_MODE_PRIO_CODE(2, 1, 59),
+        BC7_MODE_PRIO_CODE(4, 1, 5),
+        BC7_MODE_PRIO_CODE(3, 1, 22),
+        BC7_MODE_PRIO_CODE(3, 2, 16),
+        BC7_MODE_PRIO_CODE(3, 1, 20),
+        BC7_MODE_PRIO_CODE(4, 1, 4),
+        BC7_MODE_PRIO_CODE(2, 1, 31),
+        BC7_MODE_PRIO_CODE(4, 1, 17),
+        BC7_MODE_PRIO_CODE(1, 2, 24),
+        BC7_MODE_PRIO_CODE(4, 1, 24),
+        BC7_MODE_PRIO_CODE(2, 1, 58),
+        BC7_MODE_PRIO_CODE(4, 2, 8),
+        BC7_MODE_PRIO_CODE(1, 2, 22),
+        BC7_MODE_PRIO_CODE(1, 2, 23),
+        BC7_MODE_PRIO_CODE(1, 3, 10),
+        BC7_MODE_PRIO_CODE(1, 1, 41),
+        BC7_MODE_PRIO_CODE(2, 2, 18),
+        BC7_MODE_PRIO_CODE(4, 1, 25),
+        BC7_MODE_PRIO_CODE(3, 1, 61),
+        BC7_MODE_PRIO_CODE(1, 3, 29),
+        BC7_MODE_PRIO_CODE(1, 2, 57),
+        BC7_MODE_PRIO_CODE(2, 2, 19),
+        BC7_MODE_PRIO_CODE(1, 2, 53),
+        BC7_MODE_PRIO_CODE(1, 2, 55),
+        BC7_MODE_PRIO_CODE(3, 2, 63),
+        BC7_MODE_PRIO_CODE(3, 1, 60),
+        BC7_MODE_PRIO_CODE(4, 1, 8),
+        BC7_MODE_PRIO_CODE(2, 1, 56),
+        BC7_MODE_PRIO_CODE(3, 1, 35),
+        BC7_MODE_PRIO_CODE(4, 4, 31),
+        BC7_MODE_PRIO_CODE(4, 1, 9),
+        BC7_MODE_PRIO_CODE(1, 1, 46),
+        BC7_MODE_PRIO_CODE(1, 2, 58),
+        BC7_MODE_PRIO_CODE(2, 3, 29),
+        BC7_MODE_PRIO_CODE(1, 1, 45),
+        BC7_MODE_PRIO_CODE(4, 2, 13),
+        BC7_MODE_PRIO_CODE(1, 1, 42),
+        BC7_MODE_PRIO_CODE(1, 3, 3),
+        BC7_MODE_PRIO_CODE(4, 2, 11),
+        BC7_MODE_PRIO_CODE(3, 1, 63),
+        BC7_MODE_PRIO_CODE(3, 1, 30),
+        BC7_MODE_PRIO_CODE(1, 1, 36),
+        BC7_MODE_PRIO_CODE(3, 1, 62),
+        BC7_MODE_PRIO_CODE(1, 1, 43),
+        BC7_MODE_PRIO_CODE(1, 3, 21),
+        BC7_MODE_PRIO_CODE(3, 2, 17),
+        BC7_MODE_PRIO_CODE(1, 2, 14),
+        BC7_MODE_PRIO_CODE(1, 1, 48),
+        BC7_MODE_PRIO_CODE(2, 1, 57),
+        BC7_MODE_PRIO_CODE(2, 1, 52),
+        BC7_MODE_PRIO_CODE(1, 2, 61),
+        BC7_MODE_PRIO_CODE(3, 1, 33),
+        BC7_MODE_PRIO_CODE(1, 1, 51),
+        BC7_MODE_PRIO_CODE(4, 1, 20),
+        BC7_MODE_PRIO_CODE(1, 3, 8),
+        BC7_MODE_PRIO_CODE(4, 1, 22),
+        BC7_MODE_PRIO_CODE(1, 3, 19),
+        BC7_MODE_PRIO_CODE(1, 2, 36),
+        BC7_MODE_PRIO_CODE(2, 5, 10),
+        BC7_MODE_PRIO_CODE(3, 1, 28),
+        BC7_MODE_PRIO_CODE(2, 2, 14),
+        BC7_MODE_PRIO_CODE(1, 1, 49),
+        BC7_MODE_PRIO_CODE(1, 2, 33),
+        BC7_MODE_PRIO_CODE(1, 3, 9),
+        BC7_MODE_PRIO_CODE(2, 2, 20),
+        BC7_MODE_PRIO_CODE(1, 3, 26),
+        BC7_MODE_PRIO_CODE(2, 1, 53),
+        BC7_MODE_PRIO_CODE(4, 3, 13),
+        BC7_MODE_PRIO_CODE(2, 2, 21),
+        BC7_MODE_PRIO_CODE(3, 4, 10),
+        BC7_MODE_PRIO_CODE(4, 1, 60),
+        BC7_MODE_PRIO_CODE(2, 1, 54),
+        BC7_MODE_PRIO_CODE(1, 2, 29),
+        BC7_MODE_PRIO_CODE(2, 1, 47),
+        BC7_MODE_PRIO_CODE(1, 2, 52),
+        BC7_MODE_PRIO_CODE(3, 1, 32),
+        BC7_MODE_PRIO_CODE(1, 2, 40),
+        BC7_MODE_PRIO_CODE(1, 2, 31),
+        BC7_MODE_PRIO_CODE(3, 1, 27),
+        BC7_MODE_PRIO_CODE(3, 2, 18),
+        BC7_MODE_PRIO_CODE(2, 3, 10),
+        BC7_MODE_PRIO_CODE(2, 1, 55),
+        BC7_MODE_PRIO_CODE(4, 1, 61),
+        BC7_MODE_PRIO_CODE(3, 2, 14),
+        BC7_MODE_PRIO_CODE(3, 1, 31),
+        BC7_MODE_PRIO_CODE(1, 2, 34),
+        BC7_MODE_PRIO_CODE(3, 2, 19),
+        BC7_MODE_PRIO_CODE(2, 3, 21),
+        BC7_MODE_PRIO_CODE(2, 4, 30),
+        BC7_MODE_PRIO_CODE(1, 2, 15),
+        BC7_MODE_PRIO_CODE(2, 3, 26),
+        BC7_MODE_PRIO_CODE(1, 2, 28),
+        BC7_MODE_PRIO_CODE(4, 2, 16),
+        BC7_MODE_PRIO_CODE(2, 2, 15),
+        BC7_MODE_PRIO_CODE(2, 1, 40),
+        BC7_MODE_PRIO_CODE(2, 2, 22),
+        BC7_MODE_PRIO_CODE(4, 1, 33),
+        BC7_MODE_PRIO_CODE(1, 3, 7),
+        BC7_MODE_PRIO_CODE(1, 1, 50),
+        BC7_MODE_PRIO_CODE(2, 1, 41),
+        BC7_MODE_PRIO_CODE(1, 2, 9),
+        BC7_MODE_PRIO_CODE(1, 2, 39),
+        BC7_MODE_PRIO_CODE(2, 2, 25),
+        BC7_MODE_PRIO_CODE(1, 3, 6),
+        BC7_MODE_PRIO_CODE(3, 2, 21),
+        BC7_MODE_PRIO_CODE(1, 1, 37),
+        BC7_MODE_PRIO_CODE(2, 2, 58),
+        BC7_MODE_PRIO_CODE(3, 3, 29),
+        BC7_MODE_PRIO_CODE(4, 1, 62),
+        BC7_MODE_PRIO_CODE(1, 2, 35),
+        BC7_MODE_PRIO_CODE(3, 1, 59),
+        BC7_MODE_PRIO_CODE(4, 1, 28),
+        BC7_MODE_PRIO_CODE(1, 3, 23),
+        BC7_MODE_PRIO_CODE(4, 1, 30),
+        BC7_MODE_PRIO_CODE(2, 1, 45),
+        BC7_MODE_PRIO_CODE(1, 3, 16),
+        BC7_MODE_PRIO_CODE(4, 1, 35),
+        BC7_MODE_PRIO_CODE(2, 1, 46),
+        BC7_MODE_PRIO_CODE(1, 2, 38),
+        BC7_MODE_PRIO_CODE(4, 1, 63),
+        BC7_MODE_PRIO_CODE(1, 3, 22),
+        BC7_MODE_PRIO_CODE(1, 2, 30),
+        BC7_MODE_PRIO_CODE(2, 2, 31),
+        BC7_MODE_PRIO_CODE(1, 3, 20),
+        BC7_MODE_PRIO_CODE(2, 2, 9),
+        BC7_MODE_PRIO_CODE(2, 3, 3),
+        BC7_MODE_PRIO_CODE(3, 2, 22),
+        BC7_MODE_PRIO_CODE(2, 1, 42),
+        BC7_MODE_PRIO_CODE(2, 2, 62),
+        BC7_MODE_PRIO_CODE(3, 2, 20),
+        BC7_MODE_PRIO_CODE(4, 1, 32),
+        BC7_MODE_PRIO_CODE(2, 1, 43),
+        BC7_MODE_PRIO_CODE(3, 1, 58),
+        BC7_MODE_PRIO_CODE(2, 3, 19),
+        BC7_MODE_PRIO_CODE(2, 2, 32),
+        BC7_MODE_PRIO_CODE(2, 2, 57),
+        BC7_MODE_PRIO_CODE(4, 1, 27),
+        BC7_MODE_PRIO_CODE(2, 2, 34),
+        BC7_MODE_PRIO_CODE(4, 1, 58),
+        BC7_MODE_PRIO_CODE(1, 2, 12),
+        BC7_MODE_PRIO_CODE(2, 2, 12),
+        BC7_MODE_PRIO_CODE(1, 4, 20),
+        BC7_MODE_PRIO_CODE(1, 2, 56),
+        BC7_MODE_PRIO_CODE(2, 1, 48),
+        BC7_MODE_PRIO_CODE(2, 1, 36),
+        BC7_MODE_PRIO_CODE(4, 3, 0),
+        BC7_MODE_PRIO_CODE(2, 2, 24),
+        BC7_MODE_PRIO_CODE(3, 1, 40),
+        BC7_MODE_PRIO_CODE(3, 2, 9),
+        BC7_MODE_PRIO_CODE(3, 1, 56),
+        BC7_MODE_PRIO_CODE(3, 2, 15),
+        BC7_MODE_PRIO_CODE(2, 3, 7),
+        BC7_MODE_PRIO_CODE(1, 2, 37),
+        BC7_MODE_PRIO_CODE(2, 2, 35),
+        BC7_MODE_PRIO_CODE(3, 1, 52),
+        BC7_MODE_PRIO_CODE(2, 3, 6),
+        BC7_MODE_PRIO_CODE(3, 1, 57),
+        BC7_MODE_PRIO_CODE(4, 1, 31),
+        BC7_MODE_PRIO_CODE(4, 4, 11),
+        BC7_MODE_PRIO_CODE(1, 1, 44),
+        BC7_MODE_PRIO_CODE(3, 3, 1),
+        BC7_MODE_PRIO_CODE(1, 2, 54),
+        BC7_MODE_PRIO_CODE(2, 1, 50),
+        BC7_MODE_PRIO_CODE(3, 3, 15),
+        BC7_MODE_PRIO_CODE(2, 1, 51),
+        BC7_MODE_PRIO_CODE(1, 2, 27),
+        BC7_MODE_PRIO_CODE(3, 4, 30),
+        BC7_MODE_PRIO_CODE(3, 3, 14),
+        BC7_MODE_PRIO_CODE(3, 2, 25),
+        BC7_MODE_PRIO_CODE(2, 3, 9),
+        BC7_MODE_PRIO_CODE(2, 2, 60),
+        BC7_MODE_PRIO_CODE(2, 1, 49),
+        BC7_MODE_PRIO_CODE(1, 2, 6),
+        BC7_MODE_PRIO_CODE(2, 2, 23),
+        BC7_MODE_PRIO_CODE(3, 2, 12),
+        BC7_MODE_PRIO_CODE(3, 3, 2),
+        BC7_MODE_PRIO_CODE(4, 2, 14),
+        BC7_MODE_PRIO_CODE(2, 3, 16),
+        BC7_MODE_PRIO_CODE(1, 2, 51),
+        BC7_MODE_PRIO_CODE(1, 3, 11),
+        BC7_MODE_PRIO_CODE(1, 2, 4),
+        BC7_MODE_PRIO_CODE(4, 2, 17),
+        BC7_MODE_PRIO_CODE(1, 3, 12),
+        BC7_MODE_PRIO_CODE(3, 1, 43),
+        BC7_MODE_PRIO_CODE(2, 4, 21),
+        BC7_MODE_PRIO_CODE(4, 1, 56),
+        BC7_MODE_PRIO_CODE(3, 1, 53),
+        BC7_MODE_PRIO_CODE(3, 1, 47),
+        BC7_MODE_PRIO_CODE(2, 2, 61),
+        BC7_MODE_PRIO_CODE(2, 2, 55),
+        BC7_MODE_PRIO_CODE(2, 3, 23),
+        BC7_MODE_PRIO_CODE(3, 1, 42),
+        BC7_MODE_PRIO_CODE(2, 3, 8),
+        BC7_MODE_PRIO_CODE(3, 1, 55),
+        BC7_MODE_PRIO_CODE(4, 1, 59),
+        BC7_MODE_PRIO_CODE(3, 2, 60),
+        BC7_MODE_PRIO_CODE(2, 3, 20),
+        BC7_MODE_PRIO_CODE(3, 2, 57),
+        BC7_MODE_PRIO_CODE(3, 1, 54),
+        BC7_MODE_PRIO_CODE(3, 2, 35),
+        BC7_MODE_PRIO_CODE(1, 1, 38),
+        BC7_MODE_PRIO_CODE(1, 2, 5),
+        BC7_MODE_PRIO_CODE(2, 2, 5),
+        BC7_MODE_PRIO_CODE(2, 2, 6),
+        BC7_MODE_PRIO_CODE(3, 2, 23),
+        BC7_MODE_PRIO_CODE(2, 2, 59),
+        BC7_MODE_PRIO_CODE(3, 2, 5),
+        BC7_MODE_PRIO_CODE(4, 1, 42),
+        BC7_MODE_PRIO_CODE(2, 1, 37),
+        BC7_MODE_PRIO_CODE(3, 2, 59),
+        BC7_MODE_PRIO_CODE(4, 2, 9),
+        BC7_MODE_PRIO_CODE(2, 2, 4),
+        BC7_MODE_PRIO_CODE(2, 2, 56),
+        BC7_MODE_PRIO_CODE(1, 3, 33),
+        BC7_MODE_PRIO_CODE(2, 3, 33),
+        BC7_MODE_PRIO_CODE(2, 3, 22),
+        BC7_MODE_PRIO_CODE(2, 3, 12),
+        BC7_MODE_PRIO_CODE(4, 1, 40),
+        BC7_MODE_PRIO_CODE(3, 2, 34),
+        BC7_MODE_PRIO_CODE(3, 2, 56),
+        BC7_MODE_PRIO_CODE(3, 3, 26),
+        BC7_MODE_PRIO_CODE(1, 2, 7),
+        BC7_MODE_PRIO_CODE(2, 2, 7),
+        BC7_MODE_PRIO_CODE(3, 2, 7),
+        BC7_MODE_PRIO_CODE(2, 2, 36),
+        BC7_MODE_PRIO_CODE(3, 2, 36),
+        BC7_MODE_PRIO_CODE(4, 1, 52),
+        BC7_MODE_PRIO_CODE(2, 2, 33),
+        BC7_MODE_PRIO_CODE(3, 1, 45),
+        BC7_MODE_PRIO_CODE(1, 3, 4),
+        BC7_MODE_PRIO_CODE(4, 2, 15),
+        BC7_MODE_PRIO_CODE(3, 1, 41),
+        BC7_MODE_PRIO_CODE(2, 2, 54),
+        BC7_MODE_PRIO_CODE(3, 2, 4),
+        BC7_MODE_PRIO_CODE(2, 5, 20),
+        BC7_MODE_PRIO_CODE(3, 2, 62),
+        BC7_MODE_PRIO_CODE(1, 3, 35),
+        BC7_MODE_PRIO_CODE(4, 1, 41),
+        BC7_MODE_PRIO_CODE(3, 2, 6),
+        BC7_MODE_PRIO_CODE(2, 2, 52),
+        BC7_MODE_PRIO_CODE(3, 1, 46),
+        BC7_MODE_PRIO_CODE(1, 1, 39),
+        BC7_MODE_PRIO_CODE(3, 2, 33),
+        BC7_MODE_PRIO_CODE(1, 3, 5),
+        BC7_MODE_PRIO_CODE(3, 1, 48),
+        BC7_MODE_PRIO_CODE(3, 2, 24),
+        BC7_MODE_PRIO_CODE(3, 2, 32),
+        BC7_MODE_PRIO_CODE(3, 3, 33),
+        BC7_MODE_PRIO_CODE(1, 3, 17),
+        BC7_MODE_PRIO_CODE(4, 1, 57),
+        BC7_MODE_PRIO_CODE(1, 3, 25),
+        BC7_MODE_PRIO_CODE(2, 3, 11),
+        BC7_MODE_PRIO_CODE(1, 3, 61),
+        BC7_MODE_PRIO_CODE(4, 1, 43),
+        BC7_MODE_PRIO_CODE(1, 3, 60),
+        BC7_MODE_PRIO_CODE(2, 3, 60),
+        BC7_MODE_PRIO_CODE(2, 2, 28),
+        BC7_MODE_PRIO_CODE(3, 2, 28),
+        BC7_MODE_PRIO_CODE(4, 1, 55),
+        BC7_MODE_PRIO_CODE(2, 3, 5),
+        BC7_MODE_PRIO_CODE(3, 1, 51),
+        BC7_MODE_PRIO_CODE(4, 1, 53),
+        BC7_MODE_PRIO_CODE(4, 1, 54),
+        BC7_MODE_PRIO_CODE(1, 3, 32),
+        BC7_MODE_PRIO_CODE(1, 3, 24),
+        BC7_MODE_PRIO_CODE(4, 1, 47),
+        BC7_MODE_PRIO_CODE(2, 2, 51),
+        BC7_MODE_PRIO_CODE(4, 2, 12),
+        BC7_MODE_PRIO_CODE(2, 3, 61),
+        BC7_MODE_PRIO_CODE(3, 4, 21),
+        BC7_MODE_PRIO_CODE(2, 3, 32),
+        BC7_MODE_PRIO_CODE(3, 1, 36),
+        BC7_MODE_PRIO_CODE(3, 1, 49),
+        BC7_MODE_PRIO_CODE(1, 3, 18),
+        BC7_MODE_PRIO_CODE(4, 3, 29),
+        BC7_MODE_PRIO_CODE(4, 2, 63),
+        BC7_MODE_PRIO_CODE(2, 2, 27),
+        BC7_MODE_PRIO_CODE(2, 3, 17),
+        BC7_MODE_PRIO_CODE(3, 1, 50),
+        BC7_MODE_PRIO_CODE(3, 2, 61),
+        BC7_MODE_PRIO_CODE(1, 3, 63),
+        BC7_MODE_PRIO_CODE(2, 3, 63),
+        BC7_MODE_PRIO_CODE(3, 2, 27),
+        BC7_MODE_PRIO_CODE(4, 1, 46),
+        BC7_MODE_PRIO_CODE(1, 2, 26),
+        BC7_MODE_PRIO_CODE(2, 3, 4),
+        BC7_MODE_PRIO_CODE(2, 3, 18),
+        BC7_MODE_PRIO_CODE(4, 1, 45),
+        BC7_MODE_PRIO_CODE(4, 1, 51),
+        BC7_MODE_PRIO_CODE(1, 2, 1),
+        BC7_MODE_PRIO_CODE(4, 2, 6),
+        BC7_MODE_PRIO_CODE(1, 3, 62),
+        BC7_MODE_PRIO_CODE(2, 3, 62),
+        BC7_MODE_PRIO_CODE(2, 1, 44),
+        BC7_MODE_PRIO_CODE(4, 1, 49),
+        BC7_MODE_PRIO_CODE(3, 5, 30),
+        BC7_MODE_PRIO_CODE(2, 3, 25),
+        BC7_MODE_PRIO_CODE(1, 2, 49),
+        BC7_MODE_PRIO_CODE(4, 1, 48),
+        BC7_MODE_PRIO_CODE(3, 3, 3),
+        BC7_MODE_PRIO_CODE(3, 1, 37),
+        BC7_MODE_PRIO_CODE(1, 2, 0),
+        BC7_MODE_PRIO_CODE(2, 2, 0),
+        BC7_MODE_PRIO_CODE(2, 3, 35),
+        BC7_MODE_PRIO_CODE(2, 3, 24),
+        BC7_MODE_PRIO_CODE(2, 2, 53),
+        BC7_MODE_PRIO_CODE(3, 2, 53),
+        BC7_MODE_PRIO_CODE(4, 2, 59),
+        BC7_MODE_PRIO_CODE(3, 3, 10),
+        BC7_MODE_PRIO_CODE(1, 2, 3),
+        BC7_MODE_PRIO_CODE(2, 2, 3),
+        BC7_MODE_PRIO_CODE(3, 2, 3),
+        BC7_MODE_PRIO_CODE(3, 3, 32),
+        BC7_MODE_PRIO_CODE(1, 2, 46),
+        BC7_MODE_PRIO_CODE(4, 2, 62),
+        BC7_MODE_PRIO_CODE(4, 2, 60),
+        BC7_MODE_PRIO_CODE(2, 2, 30),
+        BC7_MODE_PRIO_CODE(1, 3, 47),
+        BC7_MODE_PRIO_CODE(4, 2, 36),
+        BC7_MODE_PRIO_CODE(2, 2, 1),
+        BC7_MODE_PRIO_CODE(3, 2, 1),
+        BC7_MODE_PRIO_CODE(3, 2, 58),
+        BC7_MODE_PRIO_CODE(4, 1, 36),
+        BC7_MODE_PRIO_CODE(3, 3, 16),
+        BC7_MODE_PRIO_CODE(2, 3, 47),
+        BC7_MODE_PRIO_CODE(2, 2, 39),
+        BC7_MODE_PRIO_CODE(4, 1, 50),
+        BC7_MODE_PRIO_CODE(4, 2, 21),
+        BC7_MODE_PRIO_CODE(2, 1, 38),
+        BC7_MODE_PRIO_CODE(4, 4, 21),
+        BC7_MODE_PRIO_CODE(3, 3, 23),
+        BC7_MODE_PRIO_CODE(1, 2, 43),
+        BC7_MODE_PRIO_CODE(1, 2, 41),
+        BC7_MODE_PRIO_CODE(2, 2, 41),
+        BC7_MODE_PRIO_CODE(1, 3, 28),
+        BC7_MODE_PRIO_CODE(4, 2, 35),
+        BC7_MODE_PRIO_CODE(4, 3, 26),
+        BC7_MODE_PRIO_CODE(1, 3, 59),
+        BC7_MODE_PRIO_CODE(1, 1, 34),
+        BC7_MODE_PRIO_CODE(2, 2, 29),
+        BC7_MODE_PRIO_CODE(3, 2, 29),
+        BC7_MODE_PRIO_CODE(3, 2, 52),
+        BC7_MODE_PRIO_CODE(1, 3, 58),
+        BC7_MODE_PRIO_CODE(4, 5, 30),
+        BC7_MODE_PRIO_CODE(4, 3, 33),
+        BC7_MODE_PRIO_CODE(3, 2, 30),
+        BC7_MODE_PRIO_CODE(1, 2, 44),
+        BC7_MODE_PRIO_CODE(1, 2, 2),
+        BC7_MODE_PRIO_CODE(2, 2, 2),
+        BC7_MODE_PRIO_CODE(3, 2, 2),
+        BC7_MODE_PRIO_CODE(1, 2, 47),
+        BC7_MODE_PRIO_CODE(2, 2, 47),
+        BC7_MODE_PRIO_CODE(3, 3, 7),
+        BC7_MODE_PRIO_CODE(2, 3, 58),
+        BC7_MODE_PRIO_CODE(3, 2, 55),
+        BC7_MODE_PRIO_CODE(4, 2, 4),
+        BC7_MODE_PRIO_CODE(3, 2, 0),
+        BC7_MODE_PRIO_CODE(1, 3, 31),
+        BC7_MODE_PRIO_CODE(3, 2, 31),
+        BC7_MODE_PRIO_CODE(3, 3, 12),
+        BC7_MODE_PRIO_CODE(3, 2, 51),
+        BC7_MODE_PRIO_CODE(2, 1, 39),
+        BC7_MODE_PRIO_CODE(1, 3, 48),
+        BC7_MODE_PRIO_CODE(1, 3, 27),
+        BC7_MODE_PRIO_CODE(4, 2, 25),
+        BC7_MODE_PRIO_CODE(4, 2, 22),
+        BC7_MODE_PRIO_CODE(4, 2, 18),
+        BC7_MODE_PRIO_CODE(2, 2, 44),
+        BC7_MODE_PRIO_CODE(2, 3, 28),
+        BC7_MODE_PRIO_CODE(3, 1, 44),
+        BC7_MODE_PRIO_CODE(2, 1, 34),
+        BC7_MODE_PRIO_CODE(3, 5, 10),
+        BC7_MODE_PRIO_CODE(4, 4, 10),
+        BC7_MODE_PRIO_CODE(3, 2, 54),
+        BC7_MODE_PRIO_CODE(4, 2, 7),
+        BC7_MODE_PRIO_CODE(4, 2, 20),
+        BC7_MODE_PRIO_CODE(2, 2, 37),
+        BC7_MODE_PRIO_CODE(3, 3, 6),
+        BC7_MODE_PRIO_CODE(2, 2, 43),
+        BC7_MODE_PRIO_CODE(2, 3, 59),
+        BC7_MODE_PRIO_CODE(1, 3, 30),
+        BC7_MODE_PRIO_CODE(4, 2, 5),
+        BC7_MODE_PRIO_CODE(4, 2, 61),
+        BC7_MODE_PRIO_CODE(4, 2, 19),
+        BC7_MODE_PRIO_CODE(4, 2, 23),
+        BC7_MODE_PRIO_CODE(3, 2, 39),
+        BC7_MODE_PRIO_CODE(2, 3, 27),
+        BC7_MODE_PRIO_CODE(1, 3, 57),
+        BC7_MODE_PRIO_CODE(2, 3, 57),
+        BC7_MODE_PRIO_CODE(3, 3, 21),
+        BC7_MODE_PRIO_CODE(3, 3, 11),
+        BC7_MODE_PRIO_CODE(3, 1, 39),
+        BC7_MODE_PRIO_CODE(2, 3, 48),
+        BC7_MODE_PRIO_CODE(4, 1, 37),
+        BC7_MODE_PRIO_CODE(3, 3, 19),
+        BC7_MODE_PRIO_CODE(3, 1, 38),
+        BC7_MODE_PRIO_CODE(2, 2, 38),
+        BC7_MODE_PRIO_CODE(2, 3, 31),
+        BC7_MODE_PRIO_CODE(2, 2, 40),
+        BC7_MODE_PRIO_CODE(3, 2, 40),
+        BC7_MODE_PRIO_CODE(1, 3, 56),
+        BC7_MODE_PRIO_CODE(4, 5, 10),
+        BC7_MODE_PRIO_CODE(2, 3, 56),
+        BC7_MODE_PRIO_CODE(4, 1, 38),
+        BC7_MODE_PRIO_CODE(1, 3, 41),
+        BC7_MODE_PRIO_CODE(1, 3, 50),
+        BC7_MODE_PRIO_CODE(2, 3, 30),
+        BC7_MODE_PRIO_CODE(3, 3, 8),
+        BC7_MODE_PRIO_CODE(4, 2, 24),
+        BC7_MODE_PRIO_CODE(3, 3, 9),
+        BC7_MODE_PRIO_CODE(3, 1, 34),
+        BC7_MODE_PRIO_CODE(4, 1, 34),
+        BC7_MODE_PRIO_CODE(2, 3, 50),
+        BC7_MODE_PRIO_CODE(1, 3, 43),
+        BC7_MODE_PRIO_CODE(1, 3, 40),
+        BC7_MODE_PRIO_CODE(1, 3, 51),
+        BC7_MODE_PRIO_CODE(2, 3, 51),
+        BC7_MODE_PRIO_CODE(1, 3, 45),
+        BC7_MODE_PRIO_CODE(2, 3, 45),
+        BC7_MODE_PRIO_CODE(2, 3, 40),
+        BC7_MODE_PRIO_CODE(3, 3, 20),
+        BC7_MODE_PRIO_CODE(2, 3, 41),
+        BC7_MODE_PRIO_CODE(3, 2, 44),
+        BC7_MODE_PRIO_CODE(2, 3, 43),
+        BC7_MODE_PRIO_CODE(4, 2, 57),
+        BC7_MODE_PRIO_CODE(2, 4, 20),
+        BC7_MODE_PRIO_CODE(3, 3, 4),
+        BC7_MODE_PRIO_CODE(3, 3, 61),
+        BC7_MODE_PRIO_CODE(1, 3, 46),
+        BC7_MODE_PRIO_CODE(2, 3, 46),
+        BC7_MODE_PRIO_CODE(4, 3, 1),
+        BC7_MODE_PRIO_CODE(3, 3, 22),
+        BC7_MODE_PRIO_CODE(1, 3, 49),
+        BC7_MODE_PRIO_CODE(2, 3, 49),
+        BC7_MODE_PRIO_CODE(4, 3, 15),
+        BC7_MODE_PRIO_CODE(3, 3, 5),
+        BC7_MODE_PRIO_CODE(4, 1, 44),
+        BC7_MODE_PRIO_CODE(4, 3, 14),
+        BC7_MODE_PRIO_CODE(4, 3, 2),
+        BC7_MODE_PRIO_CODE(3, 3, 60),
+        BC7_MODE_PRIO_CODE(1, 3, 53),
+        BC7_MODE_PRIO_CODE(2, 3, 53),
+        BC7_MODE_PRIO_CODE(4, 3, 32),
+        BC7_MODE_PRIO_CODE(3, 3, 24),
+        BC7_MODE_PRIO_CODE(3, 3, 63),
+        BC7_MODE_PRIO_CODE(3, 2, 37),
+        BC7_MODE_PRIO_CODE(1, 3, 52),
+        BC7_MODE_PRIO_CODE(2, 3, 52),
+        BC7_MODE_PRIO_CODE(4, 4, 30),
+        BC7_MODE_PRIO_CODE(4, 2, 34),
+        BC7_MODE_PRIO_CODE(1, 3, 54),
+        BC7_MODE_PRIO_CODE(3, 3, 62),
+        BC7_MODE_PRIO_CODE(3, 3, 18),
+        BC7_MODE_PRIO_CODE(3, 2, 41),
+        BC7_MODE_PRIO_CODE(4, 2, 58),
+        BC7_MODE_PRIO_CODE(1, 3, 42),
+        BC7_MODE_PRIO_CODE(2, 3, 42),
+        BC7_MODE_PRIO_CODE(4, 2, 0),
+        BC7_MODE_PRIO_CODE(4, 2, 55),
+        BC7_MODE_PRIO_CODE(2, 3, 54),
+        BC7_MODE_PRIO_CODE(3, 2, 47),
+        BC7_MODE_PRIO_CODE(4, 2, 53),
+        BC7_MODE_PRIO_CODE(3, 3, 25),
+        BC7_MODE_PRIO_CODE(3, 4, 20),
+        BC7_MODE_PRIO_CODE(4, 2, 33),
+        BC7_MODE_PRIO_CODE(1, 3, 55),
+        BC7_MODE_PRIO_CODE(2, 3, 55),
+        BC7_MODE_PRIO_CODE(4, 2, 32),
+        BC7_MODE_PRIO_CODE(3, 2, 43),
+        BC7_MODE_PRIO_CODE(3, 3, 17),
+        BC7_MODE_PRIO_CODE(3, 5, 20),
+        BC7_MODE_PRIO_CODE(4, 5, 20),
+        BC7_MODE_PRIO_CODE(1, 3, 36),
+        BC7_MODE_PRIO_CODE(2, 3, 36),
+        BC7_MODE_PRIO_CODE(4, 2, 54),
+        BC7_MODE_PRIO_CODE(2, 2, 49),
+        BC7_MODE_PRIO_CODE(3, 2, 49),
+        BC7_MODE_PRIO_CODE(4, 1, 39),
+        BC7_MODE_PRIO_CODE(4, 2, 3),
+        BC7_MODE_PRIO_CODE(3, 3, 35),
+        BC7_MODE_PRIO_CODE(4, 2, 52),
+        BC7_MODE_PRIO_CODE(4, 2, 1),
+        BC7_MODE_PRIO_CODE(1, 2, 50),
+        BC7_MODE_PRIO_CODE(4, 2, 49),
+        BC7_MODE_PRIO_CODE(4, 3, 16),
+        BC7_MODE_PRIO_CODE(2, 2, 50),
+        BC7_MODE_PRIO_CODE(3, 2, 50),
+        BC7_MODE_PRIO_CODE(4, 2, 31),
+        BC7_MODE_PRIO_CODE(4, 3, 3),
+        BC7_MODE_PRIO_CODE(1, 2, 48),
+        BC7_MODE_PRIO_CODE(2, 2, 48),
+        BC7_MODE_PRIO_CODE(3, 2, 48),
+        BC7_MODE_PRIO_CODE(3, 3, 28),
+        BC7_MODE_PRIO_CODE(4, 3, 9),
+        BC7_MODE_PRIO_CODE(1, 3, 38),
+        BC7_MODE_PRIO_CODE(4, 3, 10),
+        BC7_MODE_PRIO_CODE(3, 3, 31),
+        BC7_MODE_PRIO_CODE(4, 2, 51),
+        BC7_MODE_PRIO_CODE(1, 3, 37),
+        BC7_MODE_PRIO_CODE(2, 3, 37),
+        BC7_MODE_PRIO_CODE(3, 3, 50),
+        BC7_MODE_PRIO_CODE(2, 3, 38),
+        BC7_MODE_PRIO_CODE(4, 3, 20),
+        BC7_MODE_PRIO_CODE(3, 3, 41),
+        BC7_MODE_PRIO_CODE(3, 3, 56),
+        BC7_MODE_PRIO_CODE(4, 3, 6),
+        BC7_MODE_PRIO_CODE(4, 3, 8),
+        BC7_MODE_PRIO_CODE(4, 2, 37),
+        BC7_MODE_PRIO_CODE(3, 3, 58),
+        BC7_MODE_PRIO_CODE(3, 3, 59),
+        BC7_MODE_PRIO_CODE(4, 2, 56),
+        BC7_MODE_PRIO_CODE(1, 3, 39),
+        BC7_MODE_PRIO_CODE(2, 3, 39),
+        BC7_MODE_PRIO_CODE(4, 2, 43),
+        BC7_MODE_PRIO_CODE(1, 3, 44),
+        BC7_MODE_PRIO_CODE(2, 3, 44),
+        BC7_MODE_PRIO_CODE(4, 3, 7),
+        BC7_MODE_PRIO_CODE(3, 3, 27),
+        BC7_MODE_PRIO_CODE(4, 3, 23),
+        BC7_MODE_PRIO_CODE(3, 3, 45),
+        BC7_MODE_PRIO_CODE(4, 3, 22),
+        BC7_MODE_PRIO_CODE(3, 3, 30),
+        BC7_MODE_PRIO_CODE(3, 3, 48),
+        BC7_MODE_PRIO_CODE(3, 3, 51),
+        BC7_MODE_PRIO_CODE(1, 2, 42),
+        BC7_MODE_PRIO_CODE(2, 2, 42),
+        BC7_MODE_PRIO_CODE(3, 2, 42),
+        BC7_MODE_PRIO_CODE(4, 3, 19),
+        BC7_MODE_PRIO_CODE(4, 3, 21),
+        BC7_MODE_PRIO_CODE(2, 2, 46),
+        BC7_MODE_PRIO_CODE(3, 3, 36),
+        BC7_MODE_PRIO_CODE(4, 2, 28),
+        BC7_MODE_PRIO_CODE(3, 3, 49),
+        BC7_MODE_PRIO_CODE(3, 3, 53),
+        BC7_MODE_PRIO_CODE(3, 3, 55),
+        BC7_MODE_PRIO_CODE(2, 2, 26),
+        BC7_MODE_PRIO_CODE(3, 2, 26),
+        BC7_MODE_PRIO_CODE(4, 2, 30),
+        BC7_MODE_PRIO_CODE(3, 3, 52),
+        BC7_MODE_PRIO_CODE(4, 2, 41),
+        BC7_MODE_PRIO_CODE(4, 2, 29),
+        BC7_MODE_PRIO_CODE(1, 3, 34),
+        BC7_MODE_PRIO_CODE(2, 3, 34),
+        BC7_MODE_PRIO_CODE(4, 2, 44),
+        BC7_MODE_PRIO_CODE(3, 3, 43),
+        BC7_MODE_PRIO_CODE(4, 2, 47),
+        BC7_MODE_PRIO_CODE(4, 3, 18),
+        BC7_MODE_PRIO_CODE(4, 3, 17),
+        BC7_MODE_PRIO_CODE(3, 3, 47),
+        BC7_MODE_PRIO_CODE(4, 3, 11),
+        BC7_MODE_PRIO_CODE(3, 3, 57),
+        BC7_MODE_PRIO_CODE(3, 2, 38),
+        BC7_MODE_PRIO_CODE(3, 3, 46),
+        BC7_MODE_PRIO_CODE(4, 3, 25),
+        BC7_MODE_PRIO_CODE(4, 3, 4),
+        BC7_MODE_PRIO_CODE(3, 3, 42),
+        BC7_MODE_PRIO_CODE(4, 3, 61),
+        BC7_MODE_PRIO_CODE(4, 2, 48),
+        BC7_MODE_PRIO_CODE(4, 3, 5),
+        BC7_MODE_PRIO_CODE(3, 3, 54),
+        BC7_MODE_PRIO_CODE(4, 4, 20),
+        BC7_MODE_PRIO_CODE(4, 3, 24),
+        BC7_MODE_PRIO_CODE(4, 3, 12),
+        BC7_MODE_PRIO_CODE(4, 2, 40),
+        BC7_MODE_PRIO_CODE(3, 3, 40),
+        BC7_MODE_PRIO_CODE(3, 3, 44),
+        BC7_MODE_PRIO_CODE(4, 3, 63),
+        BC7_MODE_PRIO_CODE(4, 3, 50),
+        BC7_MODE_PRIO_CODE(4, 2, 50),
+        BC7_MODE_PRIO_CODE(4, 3, 60),
+        BC7_MODE_PRIO_CODE(4, 2, 39),
+        BC7_MODE_PRIO_CODE(4, 3, 62),
+        BC7_MODE_PRIO_CODE(4, 3, 49),
+        BC7_MODE_PRIO_CODE(4, 3, 58),
+        BC7_MODE_PRIO_CODE(4, 3, 47),
+        BC7_MODE_PRIO_CODE(4, 3, 56),
+        BC7_MODE_PRIO_CODE(4, 2, 26),
+        BC7_MODE_PRIO_CODE(4, 2, 27),
+        BC7_MODE_PRIO_CODE(3, 3, 37),
+        BC7_MODE_PRIO_CODE(4, 3, 57),
+        BC7_MODE_PRIO_CODE(4, 3, 48),
+        BC7_MODE_PRIO_CODE(4, 3, 31),
+        BC7_MODE_PRIO_CODE(4, 3, 51),
+        BC7_MODE_PRIO_CODE(4, 3, 28),
+        BC7_MODE_PRIO_CODE(4, 3, 53),
+        BC7_MODE_PRIO_CODE(3, 3, 39),
+        BC7_MODE_PRIO_CODE(4, 3, 40),
+        BC7_MODE_PRIO_CODE(4, 3, 27),
+        BC7_MODE_PRIO_CODE(4, 2, 2),
+        BC7_MODE_PRIO_CODE(3, 3, 34),
+        BC7_MODE_PRIO_CODE(4, 2, 38),
+        BC7_MODE_PRIO_CODE(4, 3, 54),
+        BC7_MODE_PRIO_CODE(3, 3, 38),
+        BC7_MODE_PRIO_CODE(4, 3, 52),
+        BC7_MODE_PRIO_CODE(4, 3, 30),
+        BC7_MODE_PRIO_CODE(4, 3, 59),
+        BC7_MODE_PRIO_CODE(1, 2, 45),
+        BC7_MODE_PRIO_CODE(4, 3, 45),
+        BC7_MODE_PRIO_CODE(4, 2, 42),
+        BC7_MODE_PRIO_CODE(4, 3, 35),
+        BC7_MODE_PRIO_CODE(4, 3, 41),
+        BC7_MODE_PRIO_CODE(3, 2, 46),
+        BC7_MODE_PRIO_CODE(4, 2, 46),
+        BC7_MODE_PRIO_CODE(4, 3, 46),
+        BC7_MODE_PRIO_CODE(2, 2, 45),
+        BC7_MODE_PRIO_CODE(4, 3, 43),
+        BC7_MODE_PRIO_CODE(4, 3, 37),
+        BC7_MODE_PRIO_CODE(4, 3, 38),
+        BC7_MODE_PRIO_CODE(4, 3, 36),
+        BC7_MODE_PRIO_CODE(4, 3, 42),
+        BC7_MODE_PRIO_CODE(4, 3, 34),
+        BC7_MODE_PRIO_CODE(4, 3, 39),
+        BC7_MODE_PRIO_CODE(4, 3, 55),
+        BC7_MODE_PRIO_CODE(4, 3, 44),
+        BC7_MODE_PRIO_CODE(3, 2, 45),
+        BC7_MODE_PRIO_CODE(1, 4, 0),
+        BC7_MODE_PRIO_CODE(1, 4, 1),
+        BC7_MODE_PRIO_CODE(1, 5, 0),
+        BC7_MODE_PRIO_CODE(4, 2, 45),
+        BC7_MODE_PRIO_CODE(2, 4, 0),
+        BC7_MODE_PRIO_CODE(2, 4, 1),
+        BC7_MODE_PRIO_CODE(2, 5, 0),
+        BC7_MODE_PRIO_CODE(3, 4, 0),
+        BC7_MODE_PRIO_CODE(3, 4, 1),
+        BC7_MODE_PRIO_CODE(3, 5, 0),
+        BC7_MODE_PRIO_CODE(4, 4, 0),
+        BC7_MODE_PRIO_CODE(4, 4, 1),
+        BC7_MODE_PRIO_CODE(4, 5, 0),
+    };
+
+    const uint16_t *g_bc7PrioCodesRGB = g_bc7PrioCodesRGBData;
+    const int g_bc7NumPrioCodesRGB = sizeof(g_bc7PrioCodesRGBData) / sizeof(g_bc7PrioCodesRGBData[0]);
+
+    const uint16_t g_bc7PrioCodesRGBAData[] =
+    {
+        BC7_MODE_PRIO_CODE(1, 4, 1),
+        BC7_MODE_PRIO_CODE(1, 6, 0),
+        BC7_MODE_PRIO_CODE(1, 4, 31),
+        BC7_MODE_PRIO_CODE(1, 4, 11),
+        BC7_MODE_PRIO_CODE(1, 4, 0),
+        BC7_MODE_PRIO_CODE(1, 7, 13),
+        BC7_MODE_PRIO_CODE(1, 5, 0),
+        BC7_MODE_PRIO_CODE(1, 7, 0),
+        BC7_MODE_PRIO_CODE(2, 4, 1),
+        BC7_MODE_PRIO_CODE(3, 4, 1),
+        BC7_MODE_PRIO_CODE(2, 4, 0),
+        BC7_MODE_PRIO_CODE(2, 6, 0),
+        BC7_MODE_PRIO_CODE(1, 7, 6),
+        BC7_MODE_PRIO_CODE(1, 4, 10),
+        BC7_MODE_PRIO_CODE(1, 7, 15),
+        BC7_MODE_PRIO_CODE(1, 7, 14),
+        BC7_MODE_PRIO_CODE(1, 4, 30),
+        BC7_MODE_PRIO_CODE(1, 7, 7),
+        BC7_MODE_PRIO_CODE(3, 6, 0),
+        BC7_MODE_PRIO_CODE(1, 7, 19),
+        BC7_MODE_PRIO_CODE(3, 4, 0),
+        BC7_MODE_PRIO_CODE(2, 7, 13),
+        BC7_MODE_PRIO_CODE(1, 5, 30),
+        BC7_MODE_PRIO_CODE(1, 7, 2),
+        BC7_MODE_PRIO_CODE(1, 7, 1),
+        BC7_MODE_PRIO_CODE(1, 7, 21),
+        BC7_MODE_PRIO_CODE(4, 4, 1),
+        BC7_MODE_PRIO_CODE(1, 4, 21),
+        BC7_MODE_PRIO_CODE(2, 4, 31),
+        BC7_MODE_PRIO_CODE(1, 7, 10),
+        BC7_MODE_PRIO_CODE(1, 7, 3),
+        BC7_MODE_PRIO_CODE(4, 6, 0),
+        BC7_MODE_PRIO_CODE(3, 7, 13),
+        BC7_MODE_PRIO_CODE(1, 7, 16),
+        BC7_MODE_PRIO_CODE(1, 7, 8),
+        BC7_MODE_PRIO_CODE(2, 5, 0),
+        BC7_MODE_PRIO_CODE(2, 7, 0),
+        BC7_MODE_PRIO_CODE(1, 7, 23),
+        BC7_MODE_PRIO_CODE(1, 7, 9),
+        BC7_MODE_PRIO_CODE(2, 4, 11),
+        BC7_MODE_PRIO_CODE(3, 4, 31),
+        BC7_MODE_PRIO_CODE(1, 7, 20),
+        BC7_MODE_PRIO_CODE(1, 7, 22),
+        BC7_MODE_PRIO_CODE(4, 4, 0),
+        BC7_MODE_PRIO_CODE(1, 5, 10),
+        BC7_MODE_PRIO_CODE(4, 7, 13),
+        BC7_MODE_PRIO_CODE(3, 7, 0),
+        BC7_MODE_PRIO_CODE(1, 7, 12),
+        BC7_MODE_PRIO_CODE(1, 7, 29),
+        BC7_MODE_PRIO_CODE(3, 4, 11),
+        BC7_MODE_PRIO_CODE(1, 7, 11),
+        BC7_MODE_PRIO_CODE(1, 7, 18),
+        BC7_MODE_PRIO_CODE(1, 7, 4),
+        BC7_MODE_PRIO_CODE(2, 7, 15),
+        BC7_MODE_PRIO_CODE(2, 7, 14),
+        BC7_MODE_PRIO_CODE(1, 7, 5),
+        BC7_MODE_PRIO_CODE(1, 7, 25),
+        BC7_MODE_PRIO_CODE(1, 7, 17),
+        BC7_MODE_PRIO_CODE(1, 7, 24),
+        BC7_MODE_PRIO_CODE(1, 7, 26),
+        BC7_MODE_PRIO_CODE(3, 5, 0),
+        BC7_MODE_PRIO_CODE(2, 7, 2),
+        BC7_MODE_PRIO_CODE(1, 5, 20),
+        BC7_MODE_PRIO_CODE(2, 7, 1),
+        BC7_MODE_PRIO_CODE(2, 7, 29),
+        BC7_MODE_PRIO_CODE(2, 4, 10),
+        BC7_MODE_PRIO_CODE(4, 7, 0),
+        BC7_MODE_PRIO_CODE(2, 7, 6),
+        BC7_MODE_PRIO_CODE(2, 7, 7),
+        BC7_MODE_PRIO_CODE(3, 7, 14),
+        BC7_MODE_PRIO_CODE(3, 7, 15),
+        BC7_MODE_PRIO_CODE(4, 4, 31),
+        BC7_MODE_PRIO_CODE(2, 7, 21),
+        BC7_MODE_PRIO_CODE(2, 4, 30),
+        BC7_MODE_PRIO_CODE(2, 4, 21),
+        BC7_MODE_PRIO_CODE(3, 7, 29),
+        BC7_MODE_PRIO_CODE(2, 7, 19),
+        BC7_MODE_PRIO_CODE(2, 7, 10),
+        BC7_MODE_PRIO_CODE(3, 7, 1),
+        BC7_MODE_PRIO_CODE(4, 7, 29),
+        BC7_MODE_PRIO_CODE(3, 7, 7),
+        BC7_MODE_PRIO_CODE(1, 4, 20),
+        BC7_MODE_PRIO_CODE(3, 7, 2),
+        BC7_MODE_PRIO_CODE(2, 7, 16),
+        BC7_MODE_PRIO_CODE(2, 7, 3),
+        BC7_MODE_PRIO_CODE(2, 5, 30),
+        BC7_MODE_PRIO_CODE(2, 7, 23),
+        BC7_MODE_PRIO_CODE(3, 7, 6),
+        BC7_MODE_PRIO_CODE(2, 7, 12),
+        BC7_MODE_PRIO_CODE(1, 7, 61),
+        BC7_MODE_PRIO_CODE(4, 4, 11),
+        BC7_MODE_PRIO_CODE(3, 4, 10),
+        BC7_MODE_PRIO_CODE(3, 7, 10),
+        BC7_MODE_PRIO_CODE(2, 7, 8),
+        BC7_MODE_PRIO_CODE(2, 7, 22),
+        BC7_MODE_PRIO_CODE(2, 7, 26),
+        BC7_MODE_PRIO_CODE(3, 4, 30),
+        BC7_MODE_PRIO_CODE(2, 7, 9),
+        BC7_MODE_PRIO_CODE(3, 7, 19),
+        BC7_MODE_PRIO_CODE(2, 7, 25),
+        BC7_MODE_PRIO_CODE(3, 4, 21),
+        BC7_MODE_PRIO_CODE(2, 7, 24),
+        BC7_MODE_PRIO_CODE(1, 7, 60),
+        BC7_MODE_PRIO_CODE(2, 7, 11),
+        BC7_MODE_PRIO_CODE(2, 7, 18),
+        BC7_MODE_PRIO_CODE(2, 7, 17),
+        BC7_MODE_PRIO_CODE(2, 7, 4),
+        BC7_MODE_PRIO_CODE(2, 7, 5),
+        BC7_MODE_PRIO_CODE(3, 7, 3),
+        BC7_MODE_PRIO_CODE(3, 7, 16),
+        BC7_MODE_PRIO_CODE(3, 7, 26),
+        BC7_MODE_PRIO_CODE(3, 7, 21),
+        BC7_MODE_PRIO_CODE(1, 7, 62),
+        BC7_MODE_PRIO_CODE(2, 7, 20),
+        BC7_MODE_PRIO_CODE(3, 7, 23),
+        BC7_MODE_PRIO_CODE(1, 7, 33),
+        BC7_MODE_PRIO_CODE(2, 7, 33),
+        BC7_MODE_PRIO_CODE(3, 7, 33),
+        BC7_MODE_PRIO_CODE(4, 7, 33),
+        BC7_MODE_PRIO_CODE(3, 7, 11),
+        BC7_MODE_PRIO_CODE(3, 7, 12),
+        BC7_MODE_PRIO_CODE(4, 7, 26),
+        BC7_MODE_PRIO_CODE(3, 7, 25),
+        BC7_MODE_PRIO_CODE(1, 7, 63),
+        BC7_MODE_PRIO_CODE(2, 5, 10),
+        BC7_MODE_PRIO_CODE(3, 7, 8),
+        BC7_MODE_PRIO_CODE(4, 5, 0),
+        BC7_MODE_PRIO_CODE(3, 7, 24),
+        BC7_MODE_PRIO_CODE(3, 7, 22),
+        BC7_MODE_PRIO_CODE(3, 7, 9),
+        BC7_MODE_PRIO_CODE(1, 7, 32),
+        BC7_MODE_PRIO_CODE(2, 7, 61),
+        BC7_MODE_PRIO_CODE(3, 7, 4),
+        BC7_MODE_PRIO_CODE(3, 5, 30),
+        BC7_MODE_PRIO_CODE(3, 7, 20),
+        BC7_MODE_PRIO_CODE(1, 7, 35),
+        BC7_MODE_PRIO_CODE(4, 7, 14),
+        BC7_MODE_PRIO_CODE(3, 7, 5),
+        BC7_MODE_PRIO_CODE(3, 7, 18),
+        BC7_MODE_PRIO_CODE(1, 7, 30),
+        BC7_MODE_PRIO_CODE(1, 7, 43),
+        BC7_MODE_PRIO_CODE(4, 4, 21),
+        BC7_MODE_PRIO_CODE(4, 7, 15),
+        BC7_MODE_PRIO_CODE(3, 7, 17),
+        BC7_MODE_PRIO_CODE(2, 7, 32),
+        BC7_MODE_PRIO_CODE(3, 7, 32),
+        BC7_MODE_PRIO_CODE(2, 5, 20),
+        BC7_MODE_PRIO_CODE(4, 7, 1),
+        BC7_MODE_PRIO_CODE(4, 7, 2),
+        BC7_MODE_PRIO_CODE(1, 7, 28),
+        BC7_MODE_PRIO_CODE(1, 7, 54),
+        BC7_MODE_PRIO_CODE(4, 7, 32),
+        BC7_MODE_PRIO_CODE(1, 7, 27),
+        BC7_MODE_PRIO_CODE(4, 4, 10),
+        BC7_MODE_PRIO_CODE(3, 5, 10),
+        BC7_MODE_PRIO_CODE(2, 7, 60),
+        BC7_MODE_PRIO_CODE(2, 4, 20),
+        BC7_MODE_PRIO_CODE(2, 7, 63),
+        BC7_MODE_PRIO_CODE(4, 4, 30),
+        BC7_MODE_PRIO_CODE(2, 7, 62),
+        BC7_MODE_PRIO_CODE(1, 7, 41),
+        BC7_MODE_PRIO_CODE(1, 7, 58),
+        BC7_MODE_PRIO_CODE(3, 7, 60),
+        BC7_MODE_PRIO_CODE(1, 7, 40),
+        BC7_MODE_PRIO_CODE(1, 7, 55),
+        BC7_MODE_PRIO_CODE(2, 7, 35),
+        BC7_MODE_PRIO_CODE(4, 7, 8),
+        BC7_MODE_PRIO_CODE(4, 7, 6),
+        BC7_MODE_PRIO_CODE(1, 7, 53),
+        BC7_MODE_PRIO_CODE(4, 7, 9),
+        BC7_MODE_PRIO_CODE(3, 7, 61),
+        BC7_MODE_PRIO_CODE(3, 4, 20),
+        BC7_MODE_PRIO_CODE(4, 7, 22),
+        BC7_MODE_PRIO_CODE(4, 7, 20),
+        BC7_MODE_PRIO_CODE(3, 7, 62),
+        BC7_MODE_PRIO_CODE(4, 7, 7),
+        BC7_MODE_PRIO_CODE(1, 7, 42),
+        BC7_MODE_PRIO_CODE(1, 7, 52),
+        BC7_MODE_PRIO_CODE(4, 5, 30),
+        BC7_MODE_PRIO_CODE(1, 7, 56),
+        BC7_MODE_PRIO_CODE(1, 7, 31),
+        BC7_MODE_PRIO_CODE(3, 5, 20),
+        BC7_MODE_PRIO_CODE(1, 7, 48),
+        BC7_MODE_PRIO_CODE(2, 7, 28),
+        BC7_MODE_PRIO_CODE(3, 7, 28),
+        BC7_MODE_PRIO_CODE(4, 7, 19),
+        BC7_MODE_PRIO_CODE(3, 7, 35),
+        BC7_MODE_PRIO_CODE(1, 7, 59),
+        BC7_MODE_PRIO_CODE(2, 7, 30),
+        BC7_MODE_PRIO_CODE(3, 7, 63),
+        BC7_MODE_PRIO_CODE(4, 7, 21),
+        BC7_MODE_PRIO_CODE(4, 7, 10),
+        BC7_MODE_PRIO_CODE(4, 7, 3),
+        BC7_MODE_PRIO_CODE(1, 7, 47),
+        BC7_MODE_PRIO_CODE(1, 7, 37),
+        BC7_MODE_PRIO_CODE(4, 5, 10),
+        BC7_MODE_PRIO_CODE(4, 7, 23),
+        BC7_MODE_PRIO_CODE(1, 7, 57),
+        BC7_MODE_PRIO_CODE(4, 7, 17),
+        BC7_MODE_PRIO_CODE(1, 7, 45),
+        BC7_MODE_PRIO_CODE(4, 7, 24),
+        BC7_MODE_PRIO_CODE(4, 7, 60),
+        BC7_MODE_PRIO_CODE(1, 7, 50),
+        BC7_MODE_PRIO_CODE(2, 7, 41),
+        BC7_MODE_PRIO_CODE(4, 7, 25),
+        BC7_MODE_PRIO_CODE(3, 7, 30),
+        BC7_MODE_PRIO_CODE(2, 7, 59),
+        BC7_MODE_PRIO_CODE(2, 7, 55),
+        BC7_MODE_PRIO_CODE(4, 7, 18),
+        BC7_MODE_PRIO_CODE(4, 7, 12),
+        BC7_MODE_PRIO_CODE(4, 7, 5),
+        BC7_MODE_PRIO_CODE(3, 7, 59),
+        BC7_MODE_PRIO_CODE(1, 7, 51),
+        BC7_MODE_PRIO_CODE(4, 7, 16),
+        BC7_MODE_PRIO_CODE(4, 7, 11),
+        BC7_MODE_PRIO_CODE(2, 7, 58),
+        BC7_MODE_PRIO_CODE(3, 7, 41),
+        BC7_MODE_PRIO_CODE(4, 4, 20),
+        BC7_MODE_PRIO_CODE(4, 7, 4),
+        BC7_MODE_PRIO_CODE(1, 7, 49),
+        BC7_MODE_PRIO_CODE(2, 7, 27),
+        BC7_MODE_PRIO_CODE(3, 7, 27),
+        BC7_MODE_PRIO_CODE(4, 7, 62),
+        BC7_MODE_PRIO_CODE(3, 7, 58),
+        BC7_MODE_PRIO_CODE(4, 5, 20),
+        BC7_MODE_PRIO_CODE(2, 7, 53),
+        BC7_MODE_PRIO_CODE(3, 7, 53),
+        BC7_MODE_PRIO_CODE(2, 7, 40),
+        BC7_MODE_PRIO_CODE(3, 7, 40),
+        BC7_MODE_PRIO_CODE(2, 7, 31),
+        BC7_MODE_PRIO_CODE(3, 7, 31),
+        BC7_MODE_PRIO_CODE(4, 7, 61),
+        BC7_MODE_PRIO_CODE(1, 7, 36),
+        BC7_MODE_PRIO_CODE(4, 7, 63),
+        BC7_MODE_PRIO_CODE(1, 7, 46),
+        BC7_MODE_PRIO_CODE(3, 7, 55),
+        BC7_MODE_PRIO_CODE(2, 7, 52),
+        BC7_MODE_PRIO_CODE(2, 7, 56),
+        BC7_MODE_PRIO_CODE(2, 7, 42),
+        BC7_MODE_PRIO_CODE(2, 7, 37),
+        BC7_MODE_PRIO_CODE(2, 7, 57),
+        BC7_MODE_PRIO_CODE(3, 7, 57),
+        BC7_MODE_PRIO_CODE(2, 7, 45),
+        BC7_MODE_PRIO_CODE(4, 7, 57),
+        BC7_MODE_PRIO_CODE(2, 7, 49),
+        BC7_MODE_PRIO_CODE(3, 7, 42),
+        BC7_MODE_PRIO_CODE(2, 7, 43),
+        BC7_MODE_PRIO_CODE(3, 7, 43),
+        BC7_MODE_PRIO_CODE(4, 7, 28),
+        BC7_MODE_PRIO_CODE(2, 7, 48),
+        BC7_MODE_PRIO_CODE(3, 7, 52),
+        BC7_MODE_PRIO_CODE(3, 7, 49),
+        BC7_MODE_PRIO_CODE(4, 7, 59),
+        BC7_MODE_PRIO_CODE(4, 7, 40),
+        BC7_MODE_PRIO_CODE(4, 7, 27),
+        BC7_MODE_PRIO_CODE(3, 7, 45),
+        BC7_MODE_PRIO_CODE(4, 7, 55),
+        BC7_MODE_PRIO_CODE(3, 7, 56),
+        BC7_MODE_PRIO_CODE(4, 7, 42),
+        BC7_MODE_PRIO_CODE(2, 7, 54),
+        BC7_MODE_PRIO_CODE(3, 7, 54),
+        BC7_MODE_PRIO_CODE(4, 7, 54),
+        BC7_MODE_PRIO_CODE(2, 7, 47),
+        BC7_MODE_PRIO_CODE(3, 7, 47),
+        BC7_MODE_PRIO_CODE(4, 7, 43),
+        BC7_MODE_PRIO_CODE(4, 7, 31),
+        BC7_MODE_PRIO_CODE(3, 7, 37),
+        BC7_MODE_PRIO_CODE(3, 7, 48),
+        BC7_MODE_PRIO_CODE(4, 7, 48),
+        BC7_MODE_PRIO_CODE(4, 7, 45),
+        BC7_MODE_PRIO_CODE(4, 7, 47),
+        BC7_MODE_PRIO_CODE(2, 7, 36),
+        BC7_MODE_PRIO_CODE(1, 7, 44),
+        BC7_MODE_PRIO_CODE(4, 7, 35),
+        BC7_MODE_PRIO_CODE(4, 7, 58),
+        BC7_MODE_PRIO_CODE(3, 7, 36),
+        BC7_MODE_PRIO_CODE(2, 7, 50),
+        BC7_MODE_PRIO_CODE(3, 7, 50),
+        BC7_MODE_PRIO_CODE(4, 7, 50),
+        BC7_MODE_PRIO_CODE(4, 7, 52),
+        BC7_MODE_PRIO_CODE(1, 7, 39),
+        BC7_MODE_PRIO_CODE(1, 7, 34),
+        BC7_MODE_PRIO_CODE(1, 7, 38),
+        BC7_MODE_PRIO_CODE(2, 7, 38),
+        BC7_MODE_PRIO_CODE(3, 7, 38),
+        BC7_MODE_PRIO_CODE(4, 7, 30),
+        BC7_MODE_PRIO_CODE(2, 7, 51),
+        BC7_MODE_PRIO_CODE(4, 7, 41),
+        BC7_MODE_PRIO_CODE(4, 7, 53),
+        BC7_MODE_PRIO_CODE(2, 7, 46),
+        BC7_MODE_PRIO_CODE(3, 7, 46),
+        BC7_MODE_PRIO_CODE(4, 7, 49),
+        BC7_MODE_PRIO_CODE(4, 7, 56),
+        BC7_MODE_PRIO_CODE(4, 7, 37),
+        BC7_MODE_PRIO_CODE(2, 7, 44),
+        BC7_MODE_PRIO_CODE(3, 7, 44),
+        BC7_MODE_PRIO_CODE(4, 7, 36),
+        BC7_MODE_PRIO_CODE(2, 7, 39),
+        BC7_MODE_PRIO_CODE(2, 7, 34),
+        BC7_MODE_PRIO_CODE(4, 7, 38),
+        BC7_MODE_PRIO_CODE(3, 7, 51),
+        BC7_MODE_PRIO_CODE(4, 7, 51),
+        BC7_MODE_PRIO_CODE(4, 7, 46),
+        BC7_MODE_PRIO_CODE(4, 7, 44),
+        BC7_MODE_PRIO_CODE(3, 7, 39),
+        BC7_MODE_PRIO_CODE(3, 7, 34),
+        BC7_MODE_PRIO_CODE(4, 7, 39),
+        BC7_MODE_PRIO_CODE(4, 7, 34),
+    };
+
+    const uint16_t *g_bc7PrioCodesRGBA = g_bc7PrioCodesRGBAData;
+    const int g_bc7NumPrioCodesRGBA = sizeof(g_bc7PrioCodesRGBAData) / sizeof(g_bc7PrioCodesRGBA[0]);
+
+    int UnpackMode(uint16_t packed)
+    {
+        return static_cast<int>((packed >> BC7_MODE_OFFSET_BITS) & ((1 << BC7_MODE_BITS) - 1));
+    }
+
+    int UnpackSeedPointCount(uint16_t packed)
+    {
+        return static_cast<int>((packed >> BC7_SEED_POINT_COUNT_OFFSET_BITS) & ((1 << BC7_SEED_POINT_COUNT_BITS) - 1)) + 1;
+    }
+
+    int UnpackPartition(uint16_t packed)
+    {
+        return static_cast<int>((packed >> BC7_PARTITION_OFFSET_BITS) & ((1 << BC7_PARTITION_BITS) - 1));
+    }
+
+    int UnpackRotation(uint16_t packed)
+    {
+        return static_cast<int>((packed >> BC7_ROTATION_OFFSET_BITS) & ((1 << BC7_ROTATION_BITS) - 1));
+    }
+
+    int UnpackIndexSelector(uint16_t packed)
+    {
+        return static_cast<int>((packed >> BC7_INDEX_MODE_OFFSET_BITS) & ((1 << BC7_INDEX_MODE_BITS) - 1));
+    }
+}}}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_BC7_SingleColor.h b/thirdparty/cvtt/ConvectionKernels_BC7_SingleColor.h
index b5564c0dab..b45ba5eca8 100644
--- a/thirdparty/cvtt/ConvectionKernels_BC7_SingleColor.h
+++ b/thirdparty/cvtt/ConvectionKernels_BC7_SingleColor.h
@@ -1,6 +1,8 @@
 #pragma once
 #include <stdint.h>
 
+// This file is generated by the MakeTables app.  Do not edit this file manually.
+
 namespace cvtt { namespace Tables { namespace BC7SC {
 
 struct TableEntry
diff --git a/thirdparty/cvtt/ConvectionKernels_BCCommon.cpp b/thirdparty/cvtt/ConvectionKernels_BCCommon.cpp
new file mode 100644
index 0000000000..be16d1db06
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_BCCommon.cpp
@@ -0,0 +1,46 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-------------------------------------------------------------------------------------
+
+Portions based on DirectX Texture Library (DirectXTex)
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+
+http://go.microsoft.com/fwlink/?LinkId=248926
+*/
+#include "ConvectionKernels_Config.h"
+
+#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
+
+#include "ConvectionKernels_BCCommon.h"
+
+int cvtt::Internal::BCCommon::TweakRoundsForRange(int range)
+{
+    if (range == 3)
+        return 3;
+    return 4;
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_BCCommon.h b/thirdparty/cvtt/ConvectionKernels_BCCommon.h
new file mode 100644
index 0000000000..3e13151acd
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_BCCommon.h
@@ -0,0 +1,104 @@
+#pragma once
+#ifndef __CVTT_BCCOMMON_H__
+#define __CVTT_BCCOMMON_H__
+
+#include "ConvectionKernels_AggregatedError.h"
+#include "ConvectionKernels_ParallelMath.h"
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        class BCCommon
+        {
+        public:
+            typedef ParallelMath::Float MFloat;
+            typedef ParallelMath::UInt16 MUInt16;
+            typedef ParallelMath::UInt15 MUInt15;
+            typedef ParallelMath::AInt16 MAInt16;
+            typedef ParallelMath::SInt16 MSInt16;
+            typedef ParallelMath::SInt32 MSInt32;
+
+            static int TweakRoundsForRange(int range);
+
+            template<int TVectorSize>
+            static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, AggregatedError<TVectorSize> &aggError)
+            {
+                for (int ch = 0; ch < numRealChannels; ch++)
+                    aggError.Add(ParallelMath::SqDiffUInt8(reconstructed[ch], original[ch]), ch);
+            }
+
+            template<int TVectorSize>
+            static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], AggregatedError<TVectorSize> &aggError)
+            {
+                ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, TVectorSize, aggError);
+            }
+
+            template<int TVectorSize>
+            static MFloat ComputeErrorLDRSimple(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, const float *channelWeightsSq)
+            {
+                AggregatedError<TVectorSize> aggError;
+                ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, numRealChannels, aggError);
+                return aggError.Finalize(flags, channelWeightsSq);
+            }
+
+            template<int TVectorSize>
+            static MFloat ComputeErrorHDRFast(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize])
+            {
+                MFloat error = ParallelMath::MakeFloatZero();
+                if (flags & Flags::Uniform)
+                {
+                    for (int ch = 0; ch < TVectorSize; ch++)
+                        error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]);
+                }
+                else
+                {
+                    for (int ch = 0; ch < TVectorSize; ch++)
+                        error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]);
+                }
+
+                return error;
+            }
+
+            template<int TVectorSize>
+            static MFloat ComputeErrorHDRSlow(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize])
+            {
+                MFloat error = ParallelMath::MakeFloatZero();
+                if (flags & Flags::Uniform)
+                {
+                    for (int ch = 0; ch < TVectorSize; ch++)
+                        error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]);
+                }
+                else
+                {
+                    for (int ch = 0; ch < TVectorSize; ch++)
+                        error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]);
+                }
+
+                return error;
+            }
+
+            template<int TChannelCount>
+            static void PreWeightPixelsLDR(MFloat preWeightedPixels[16][TChannelCount], const MUInt15 pixels[16][TChannelCount], const float channelWeights[TChannelCount])
+            {
+                for (int px = 0; px < 16; px++)
+                {
+                    for (int ch = 0; ch < TChannelCount; ch++)
+                        preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch];
+                }
+            }
+
+            template<int TChannelCount>
+            static void PreWeightPixelsHDR(MFloat preWeightedPixels[16][TChannelCount], const MSInt16 pixels[16][TChannelCount], const float channelWeights[TChannelCount])
+            {
+                for (int px = 0; px < 16; px++)
+                {
+                    for (int ch = 0; ch < TChannelCount; ch++)
+                        preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch];
+                }
+            }
+        };
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_Config.h b/thirdparty/cvtt/ConvectionKernels_Config.h
new file mode 100644
index 0000000000..e79d32b1da
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_Config.h
@@ -0,0 +1,12 @@
+#pragma once
+#ifndef __CVTT_CONFIG_H__
+#define __CVTT_CONFIG_H__
+
+#if (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(_M_X64) || defined(__SSE2__)
+#define CVTT_USE_SSE2
+#endif
+
+// Define this to compile everything as a single source file
+//#define CVTT_SINGLE_FILE
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_ETC.cpp b/thirdparty/cvtt/ConvectionKernels_ETC.cpp
new file mode 100644
index 0000000000..cb202a6e9c
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_ETC.cpp
@@ -0,0 +1,3147 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-------------------------------------------------------------------------------------
+
+Portions based on DirectX Texture Library (DirectXTex)
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+
+http://go.microsoft.com/fwlink/?LinkId=248926
+*/
+#include "ConvectionKernels_Config.h"
+
+#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
+
+#include "ConvectionKernels.h"
+#include "ConvectionKernels_ETC.h"
+#include "ConvectionKernels_ETC1.h"
+#include "ConvectionKernels_ETC2.h"
+#include "ConvectionKernels_ETC2_Rounding.h"
+#include "ConvectionKernels_ParallelMath.h"
+#include "ConvectionKernels_FakeBT709_Rounding.h"
+
+#include <cmath>
+
+const int cvtt::Internal::ETCComputer::g_flipTables[2][2][8] =
+{
+    {
+        { 0, 1, 4, 5, 8, 9, 12, 13 },
+        { 2, 3, 6, 7, 10, 11, 14, 15 }
+    },
+    {
+        { 0, 1, 2, 3, 4, 5, 6, 7 },
+        { 8, 9, 10, 11, 12, 13, 14, 15 }
+    },
+};
+
+cvtt::ParallelMath::Float cvtt::Internal::ETCComputer::ComputeErrorUniform(const MUInt15 pixelA[3], const MUInt15 pixelB[3])
+{
+    MSInt16 d0 = ParallelMath::LosslessCast<MSInt16>::Cast(pixelA[0]) - ParallelMath::LosslessCast<MSInt16>::Cast(pixelB[0]);
+    MFloat fd0 = ParallelMath::ToFloat(d0);
+    MFloat error = fd0 * fd0;
+    for (int ch = 1; ch < 3; ch++)
+    {
+        MSInt16 d = ParallelMath::LosslessCast<MSInt16>::Cast(pixelA[ch]) - ParallelMath::LosslessCast<MSInt16>::Cast(pixelB[ch]);
+        MFloat fd = ParallelMath::ToFloat(d);
+        error = error + fd * fd;
+    }
+    return error;
+}
+
+cvtt::ParallelMath::Float cvtt::Internal::ETCComputer::ComputeErrorWeighted(const MUInt15 reconstructed[3], const MFloat preWeightedPixel[3], const Options options)
+{
+    MFloat dr = ParallelMath::ToFloat(reconstructed[0]) * options.redWeight - preWeightedPixel[0];
+    MFloat dg = ParallelMath::ToFloat(reconstructed[1]) * options.greenWeight - preWeightedPixel[1];
+    MFloat db = ParallelMath::ToFloat(reconstructed[2]) * options.blueWeight - preWeightedPixel[2];
+
+    return dr * dr + dg * dg + db * db;
+}
+
+cvtt::ParallelMath::Float cvtt::Internal::ETCComputer::ComputeErrorFakeBT709(const MUInt15 reconstructed[3], const MFloat preWeightedPixel[3])
+{
+    MFloat yuv[3];
+    ConvertToFakeBT709(yuv, reconstructed);
+
+    MFloat dy = yuv[0] - preWeightedPixel[0];
+    MFloat du = yuv[1] - preWeightedPixel[1];
+    MFloat dv = yuv[2] - preWeightedPixel[2];
+
+    return dy * dy + du * du + dv * dv;
+}
+
+void cvtt::Internal::ETCComputer::TestHalfBlock(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const MSInt16 modifiers[4], bool isDifferential, const Options &options)
+{
+    MUInt15 quantized[3];
+    MUInt15 unquantized[3];
+
+    for (int ch = 0; ch < 3; ch++)
+    {
+        quantized[ch] = (ParallelMath::RightShift(quantizedPackedColor, (ch * 5)) & ParallelMath::MakeUInt15(31));
+
+        if (isDifferential)
+            unquantized[ch] = (quantized[ch] << 3) | ParallelMath::RightShift(quantized[ch], 2);
+        else
+            unquantized[ch] = (quantized[ch] << 4) | quantized[ch];
+    }
+
+    MUInt16 selectors = ParallelMath::MakeUInt16(0);
+    MFloat totalError = ParallelMath::MakeFloatZero();
+
+    MUInt15 u15_255 = ParallelMath::MakeUInt15(255);
+    MSInt16 s16_zero = ParallelMath::MakeSInt16(0);
+
+    MUInt15 unquantizedModified[4][3];
+    for (unsigned int s = 0; s < 4; s++)
+        for (int ch = 0; ch < 3; ch++)
+            unquantizedModified[s][ch] = ParallelMath::Min(ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::ToSInt16(unquantized[ch]) + modifiers[s], s16_zero)), u15_255);
+
+    bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0);
+    bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0);
+
+    for (int px = 0; px < 8; px++)
+    {
+        MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
+        MUInt16 bestSelector = ParallelMath::MakeUInt16(0);
+
+        for (unsigned int s = 0; s < 4; s++)
+        {
+            MFloat error;
+            if (isFakeBT709)
+                error = ComputeErrorFakeBT709(unquantizedModified[s], preWeightedPixels[px]);
+            else if (isUniform)
+                error = ComputeErrorUniform(pixels[px], unquantizedModified[s]);
+            else
+                error = ComputeErrorWeighted(unquantizedModified[s], preWeightedPixels[px], options);
+
+            ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
+            bestSelector = ParallelMath::Select(ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt16(s), bestSelector);
+            bestError = ParallelMath::Min(error, bestError);
+        }
+
+        totalError = totalError + bestError;
+        selectors = selectors | (bestSelector << (px * 2));
+    }
+
+    outError = totalError;
+    outSelectors = selectors;
+}
+
+void cvtt::Internal::ETCComputer::TestHalfBlockPunchthrough(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const ParallelMath::Int16CompFlag isTransparent[8], const MUInt15 modifier, const Options &options)
+{
+    MUInt15 quantized[3];
+    MUInt15 unquantized[3];
+
+    for (int ch = 0; ch < 3; ch++)
+    {
+        quantized[ch] = (ParallelMath::RightShift(quantizedPackedColor, (ch * 5)) & ParallelMath::MakeUInt15(31));
+        unquantized[ch] = (quantized[ch] << 3) | ParallelMath::RightShift(quantized[ch], 2);
+    }
+
+    MUInt16 selectors = ParallelMath::MakeUInt16(0);
+    MFloat totalError = ParallelMath::MakeFloatZero();
+
+    MUInt15 u15_255 = ParallelMath::MakeUInt15(255);
+    MSInt16 s16_zero = ParallelMath::MakeSInt16(0);
+
+    MUInt15 unquantizedModified[3][3];
+    for (int ch = 0; ch < 3; ch++)
+    {
+        unquantizedModified[0][ch] = ParallelMath::Max(unquantized[ch], modifier) - modifier;
+        unquantizedModified[1][ch] = unquantized[ch];
+        unquantizedModified[2][ch] = ParallelMath::Min(unquantized[ch] + modifier, u15_255);
+    }
+
+    bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0);
+    bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0);
+
+    for (int px = 0; px < 8; px++)
+    {
+        ParallelMath::FloatCompFlag isTransparentFloat = ParallelMath::Int16FlagToFloat(isTransparent[px]);
+
+        MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
+        MUInt15 bestSelector = ParallelMath::MakeUInt15(0);
+
+        for (unsigned int s = 0; s < 3; s++)
+        {
+            MFloat error;
+            if (isFakeBT709)
+                error = ComputeErrorFakeBT709(unquantizedModified[s], preWeightedPixels[px]);
+            else if (isUniform)
+                error = ComputeErrorUniform(pixels[px], unquantizedModified[s]);
+            else
+                error = ComputeErrorWeighted(unquantizedModified[s], preWeightedPixels[px], options);
+
+            ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
+            bestSelector = ParallelMath::Select(ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(s), bestSelector);
+            bestError = ParallelMath::Min(error, bestError);
+        }
+
+        // Annoying quirk: The ETC encoding machinery assumes that selectors are in the table order in the spec, which isn't
+        // the same as their encoding bits, so the transparent index is actually 1 and the valid indexes are 0, 2, and 3.
+
+        // Remap selector 1 to 2, and 2 to 3
+        bestSelector = ParallelMath::Min(ParallelMath::MakeUInt15(3), bestSelector << 1);
+
+        // Mark zero transparent as 
+        ParallelMath::ConditionalSet(bestError, isTransparentFloat, ParallelMath::MakeFloatZero());
+        ParallelMath::ConditionalSet(bestSelector, isTransparent[px], ParallelMath::MakeUInt15(1));
+
+        totalError = totalError + bestError;
+        selectors = selectors | (ParallelMath::LosslessCast<MUInt16>::Cast(bestSelector) << (px * 2));
+    }
+
+    outError = totalError;
+    outSelectors = selectors;
+}
+
+void cvtt::Internal::ETCComputer::FindBestDifferentialCombination(int flip, int d, const ParallelMath::Int16CompFlag canIgnoreSector[2], ParallelMath::Int16CompFlag& bestIsThisMode, MFloat& bestTotalError, MUInt15& bestFlip, MUInt15& bestD, MUInt15 bestColors[2], MUInt16 bestSelectors[2], MUInt15 bestTables[2], DifferentialResolveStorage &drs)
+{
+    // We do this part scalar because most of the cost benefit of parallelization is in error evaluation,
+    // and this code has a LOT of early-outs and disjointed index lookups that vary heavily between blocks
+    // and save a lot of time.
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        bool canIgnore[2] = { ParallelMath::Extract(canIgnoreSector[0], block), ParallelMath::Extract(canIgnoreSector[1], block) };
+        bool canIgnoreEither = canIgnore[0] || canIgnore[1];
+        float blockBestTotalError = ParallelMath::Extract(bestTotalError, block);
+        float bestDiffErrors[2] = { FLT_MAX, FLT_MAX };
+        uint16_t bestDiffSelectors[2] = { 0, 0 };
+        uint16_t bestDiffColors[2] = { 0, 0 };
+        uint16_t bestDiffTables[2] = { 0, 0 };
+        for (int sector = 0; sector < 2; sector++)
+        {
+            unsigned int sectorNumAttempts = ParallelMath::Extract(drs.diffNumAttempts[sector], block);
+            for (unsigned int i = 0; i < sectorNumAttempts; i++)
+            {
+                float error = ParallelMath::Extract(drs.diffErrors[sector][i], block);
+                if (error < bestDiffErrors[sector])
+                {
+                    bestDiffErrors[sector] = error;
+                    bestDiffSelectors[sector] = ParallelMath::Extract(drs.diffSelectors[sector][i], block);
+                    bestDiffColors[sector] = ParallelMath::Extract(drs.diffColors[sector][i], block);
+                    bestDiffTables[sector] = ParallelMath::Extract(drs.diffTables[sector][i], block);
+                }
+            }
+        }
+
+        if (canIgnore[0])
+            bestDiffColors[0] = bestDiffColors[1];
+        else if (canIgnore[1])
+            bestDiffColors[1] = bestDiffColors[0];
+
+        // The best differential possibilities must be better than the best total error
+        if (bestDiffErrors[0] + bestDiffErrors[1] < blockBestTotalError)
+        {
+            // Fast path if the best possible case is legal
+            if (canIgnoreEither || ETCDifferentialIsLegalScalar(bestDiffColors[0], bestDiffColors[1]))
+            {
+                ParallelMath::PutBoolInt16(bestIsThisMode, block, true);
+                ParallelMath::PutFloat(bestTotalError, block, bestDiffErrors[0] + bestDiffErrors[1]);
+                ParallelMath::PutUInt15(bestFlip, block, flip);
+                ParallelMath::PutUInt15(bestD, block, d);
+                for (int sector = 0; sector < 2; sector++)
+                {
+                    ParallelMath::PutUInt15(bestColors[sector], block, bestDiffColors[sector]);
+                    ParallelMath::PutUInt16(bestSelectors[sector], block, bestDiffSelectors[sector]);
+                    ParallelMath::PutUInt15(bestTables[sector], block, bestDiffTables[sector]);
+                }
+            }
+            else
+            {
+                // Slow path: Sort the possible cases by quality, and search valid combinations
+                // TODO: Pre-flatten the error lists so this is nicer to cache
+                unsigned int numSortIndexes[2] = { 0, 0 };
+                for (int sector = 0; sector < 2; sector++)
+                {
+                    unsigned int sectorNumAttempts = ParallelMath::Extract(drs.diffNumAttempts[sector], block);
+
+                    for (unsigned int i = 0; i < sectorNumAttempts; i++)
+                    {
+                        if (ParallelMath::Extract(drs.diffErrors[sector][i], block) < blockBestTotalError)
+                            drs.attemptSortIndexes[sector][numSortIndexes[sector]++] = i;
+                    }
+
+                    struct SortPredicate
+                    {
+                        const MFloat *diffErrors;
+                        int block;
+
+                        bool operator()(uint16_t a, uint16_t b) const
+                        {
+                            float errorA = ParallelMath::Extract(diffErrors[a], block);
+                            float errorB = ParallelMath::Extract(diffErrors[b], block);
+
+                            if (errorA < errorB)
+                                return true;
+                            if (errorA > errorB)
+                                return false;
+
+                            return a < b;
+                        }
+                    };
+
+                    SortPredicate sp;
+                    sp.diffErrors = drs.diffErrors[sector];
+                    sp.block = block;
+
+                    std::sort<uint16_t*, const SortPredicate&>(drs.attemptSortIndexes[sector], drs.attemptSortIndexes[sector] + numSortIndexes[sector], sp);
+                }
+
+                int scannedElements = 0;
+                for (unsigned int i = 0; i < numSortIndexes[0]; i++)
+                {
+                    unsigned int attemptIndex0 = drs.attemptSortIndexes[0][i];
+                    float error0 = ParallelMath::Extract(drs.diffErrors[0][attemptIndex0], block);
+
+                    scannedElements++;
+
+                    if (error0 >= blockBestTotalError)
+                        break;
+
+                    float maxError1 = ParallelMath::Extract(bestTotalError, block) - error0;
+                    uint16_t diffColor0 = ParallelMath::Extract(drs.diffColors[0][attemptIndex0], block);
+
+                    if (maxError1 < bestDiffErrors[1])
+                        break;
+
+                    for (unsigned int j = 0; j < numSortIndexes[1]; j++)
+                    {
+                        unsigned int attemptIndex1 = drs.attemptSortIndexes[1][j];
+                        float error1 = ParallelMath::Extract(drs.diffErrors[1][attemptIndex1], block);
+
+                        scannedElements++;
+
+                        if (error1 >= maxError1)
+                            break;
+
+                        uint16_t diffColor1 = ParallelMath::Extract(drs.diffColors[1][attemptIndex1], block);
+
+                        if (ETCDifferentialIsLegalScalar(diffColor0, diffColor1))
+                        {
+                            blockBestTotalError = error0 + error1;
+
+                            ParallelMath::PutBoolInt16(bestIsThisMode, block, true);
+                            ParallelMath::PutFloat(bestTotalError, block, blockBestTotalError);
+                            ParallelMath::PutUInt15(bestFlip, block, flip);
+                            ParallelMath::PutUInt15(bestD, block, d);
+                            ParallelMath::PutUInt15(bestColors[0], block, diffColor0);
+                            ParallelMath::PutUInt15(bestColors[1], block, diffColor1);
+                            ParallelMath::PutUInt16(bestSelectors[0], block, ParallelMath::Extract(drs.diffSelectors[0][attemptIndex0], block));
+                            ParallelMath::PutUInt16(bestSelectors[1], block, ParallelMath::Extract(drs.diffSelectors[1][attemptIndex1], block));
+                            ParallelMath::PutUInt15(bestTables[0], block, ParallelMath::Extract(drs.diffTables[0][attemptIndex0], block));
+                            ParallelMath::PutUInt15(bestTables[1], block, ParallelMath::Extract(drs.diffTables[1][attemptIndex1], block));
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+cvtt::ParallelMath::Int16CompFlag cvtt::Internal::ETCComputer::ETCDifferentialIsLegalForChannel(const MUInt15 &a, const MUInt15 &b)
+{
+    MSInt16 diff = ParallelMath::LosslessCast<MSInt16>::Cast(b) - ParallelMath::LosslessCast<MSInt16>::Cast(a);
+
+    return ParallelMath::Less(ParallelMath::MakeSInt16(-5), diff) & ParallelMath::Less(diff, ParallelMath::MakeSInt16(4));
+}
+
+cvtt::ParallelMath::Int16CompFlag cvtt::Internal::ETCComputer::ETCDifferentialIsLegal(const MUInt15 &a, const MUInt15 &b)
+{
+    MUInt15 mask = ParallelMath::MakeUInt15(31);
+
+    return ETCDifferentialIsLegalForChannel(ParallelMath::RightShift(a, 10), ParallelMath::RightShift(b, 10))
+        & ETCDifferentialIsLegalForChannel(ParallelMath::RightShift(a, 5) & mask, ParallelMath::RightShift(b, 5) & mask)
+        & ETCDifferentialIsLegalForChannel(a & mask, b & mask);
+}
+
+bool cvtt::Internal::ETCComputer::ETCDifferentialIsLegalForChannelScalar(const uint16_t &a, const uint16_t &b)
+{
+    int16_t diff = static_cast<int16_t>(b) - static_cast<int16_t>(a);
+
+    return (-4 <= diff) && (diff <= 3);
+}
+
+bool cvtt::Internal::ETCComputer::ETCDifferentialIsLegalScalar(const uint16_t &a, const uint16_t &b)
+{
+    MUInt15 mask = ParallelMath::MakeUInt15(31);
+
+    return ETCDifferentialIsLegalForChannelScalar((a >> 10), (b >> 10))
+        & ETCDifferentialIsLegalForChannelScalar((a >> 5) & 31, (b >> 5) & 31)
+        & ETCDifferentialIsLegalForChannelScalar(a & 31, b & 31);
+}
+
+void cvtt::Internal::ETCComputer::EncodeTMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options)
+{
+    bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0);
+    bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0);
+
+    ParallelMath::Int16CompFlag bestIsThisMode = ParallelMath::MakeBoolInt16(false);
+
+    MUInt15 isolatedTotal[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) };
+    MUInt15 lineTotal[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) };
+
+    MUInt15 numPixelsIsolated = ParallelMath::MakeUInt15(0);
+
+    // To speed this up, we compute line total as the sum, then subtract out isolated
+    for (unsigned int px = 0; px < 16; px++)
+    {
+        for (int ch = 0; ch < 3; ch++)
+        {
+            isolatedTotal[ch] = isolatedTotal[ch] + ParallelMath::SelectOrZero(isIsolated[px], pixels[px][ch]);
+            lineTotal[ch] = lineTotal[ch] + pixels[px][ch];
+        }
+        numPixelsIsolated = numPixelsIsolated + ParallelMath::SelectOrZero(isIsolated[px], ParallelMath::MakeUInt15(1));
+    }
+
+    for (int ch = 0; ch < 3; ch++)
+        lineTotal[ch] = lineTotal[ch] - isolatedTotal[ch];
+
+    MUInt15 numPixelsLine = ParallelMath::MakeUInt15(16) - numPixelsIsolated;
+
+    MUInt15 isolatedAverageQuantized[3];
+    MUInt15 isolatedAverageTargets[3];
+    {
+        int divisors[ParallelMath::ParallelSize];
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            divisors[block] = ParallelMath::Extract(numPixelsIsolated, block) * 34;
+
+        MUInt15 addend = (numPixelsIsolated << 4) | numPixelsIsolated;
+        for (int ch = 0; ch < 3; ch++)
+        {
+            // isolatedAverageQuantized[ch] = (isolatedTotal[ch] * 2 + numPixelsIsolated * 17) / (numPixelsIsolated * 34);
+
+            MUInt15 numerator = isolatedTotal[ch] + isolatedTotal[ch];
+            if (!isFakeBT709)
+                numerator = numerator + addend;
+
+            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            {
+                int divisor = divisors[block];
+                if (divisor == 0)
+                    ParallelMath::PutUInt15(isolatedAverageQuantized[ch], block, 0);
+                else
+                    ParallelMath::PutUInt15(isolatedAverageQuantized[ch], block, ParallelMath::Extract(numerator, block) / divisor);
+            }
+
+            isolatedAverageTargets[ch] = numerator;
+        }
+    }
+
+    if (isFakeBT709)
+        ResolveTHFakeBT709Rounding(isolatedAverageQuantized, isolatedAverageTargets, numPixelsIsolated);
+
+    MUInt15 isolatedColor[3];
+    for (int ch = 0; ch < 3; ch++)
+        isolatedColor[ch] = (isolatedAverageQuantized[ch]) | (isolatedAverageQuantized[ch] << 4);
+
+    MFloat isolatedError[16];
+    for (int px = 0; px < 16; px++)
+    {
+        if (isFakeBT709)
+            isolatedError[px] = ComputeErrorFakeBT709(isolatedColor, preWeightedPixels[px]);
+        else if (isUniform)
+            isolatedError[px] = ComputeErrorUniform(pixels[px], isolatedColor);
+        else
+            isolatedError[px] = ComputeErrorWeighted(isolatedColor, preWeightedPixels[px], options);
+    }
+
+    MSInt32 bestSelectors = ParallelMath::MakeSInt32(0);
+    MUInt15 bestTable = ParallelMath::MakeUInt15(0);
+    MUInt15 bestLineColor = ParallelMath::MakeUInt15(0);
+
+    MSInt16 maxLine = ParallelMath::LosslessCast<MSInt16>::Cast(numPixelsLine);
+    MSInt16 minLine = ParallelMath::MakeSInt16(0) - maxLine;
+
+    int16_t clusterMaxLine = 0;
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        int16_t blockMaxLine = ParallelMath::Extract(maxLine, block);
+        if (blockMaxLine > clusterMaxLine)
+            clusterMaxLine = blockMaxLine;
+    }
+
+    int16_t clusterMinLine = -clusterMaxLine;
+
+    int lineDivisors[ParallelMath::ParallelSize];
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        lineDivisors[block] = ParallelMath::Extract(numPixelsLine, block) * 34;
+
+    MUInt15 lineAddend = (numPixelsLine << 4) | numPixelsLine;
+
+    for (int table = 0; table < 8; table++)
+    {
+        int numUniqueColors[ParallelMath::ParallelSize];
+        MUInt15 uniqueQuantizedColors[31];
+
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            numUniqueColors[block] = 0;
+
+        MUInt15 modifier = ParallelMath::MakeUInt15(cvtt::Tables::ETC2::g_thModifierTable[table]);
+        MUInt15 modifierOffset = (modifier + modifier);
+
+        for (int16_t offsetPremultiplier = clusterMinLine; offsetPremultiplier <= clusterMaxLine; offsetPremultiplier++)
+        {
+            MSInt16 clampedOffsetPremultiplier = ParallelMath::Max(minLine, ParallelMath::Min(maxLine, ParallelMath::MakeSInt16(offsetPremultiplier)));
+            MSInt16 modifierAddend = ParallelMath::CompactMultiply(clampedOffsetPremultiplier, modifierOffset);
+
+            MUInt15 quantized[3];
+            if (isFakeBT709)
+            {
+                MUInt15 targets[3];
+                for (int ch = 0; ch < 3; ch++)
+                {
+                    //quantized[ch] = std::min<int16_t>(15, std::max(0, (lineTotal[ch] * 2 + modifierOffset * offsetPremultiplier)) / (numDAIILine * 34));
+                    MUInt15 numerator = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(lineTotal[ch] + lineTotal[ch]) + modifierAddend));
+                    MUInt15 divided = ParallelMath::MakeUInt15(0);
+                    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                    {
+                        int divisor = lineDivisors[block];
+                        if (divisor == 0)
+                            ParallelMath::PutUInt15(divided, block, 0);
+                        else
+                            ParallelMath::PutUInt15(divided, block, ParallelMath::Extract(numerator, block) / divisor);
+                    }
+                    quantized[ch] = ParallelMath::Min(ParallelMath::MakeUInt15(15), divided);
+                    targets[ch] = numerator;
+                }
+
+                ResolveTHFakeBT709Rounding(quantized, targets, numPixelsLine);
+            }
+            else
+            {
+                for (int ch = 0; ch < 3; ch++)
+                {
+                    //quantized[ch] = std::min<int16_t>(15, std::max(0, (lineTotal[ch] * 2 + numDAIILine * 17 + modifierOffset * offsetPremultiplier)) / (numDAIILine * 34));
+                    MUInt15 numerator = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(lineTotal[ch] + lineTotal[ch] + lineAddend) + modifierAddend));
+                    MUInt15 divided = ParallelMath::MakeUInt15(0);
+                    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                    {
+                        int divisor = lineDivisors[block];
+                        if (divisor == 0)
+                            ParallelMath::PutUInt15(divided, block, 0);
+                        else
+                            ParallelMath::PutUInt15(divided, block, ParallelMath::Extract(numerator, block) / divisor);
+                    }
+                    quantized[ch] = ParallelMath::Min(ParallelMath::MakeUInt15(15), divided);
+                }
+            }
+
+            MUInt15 packedColor = quantized[0] | (quantized[1] << 5) | (quantized[2] << 10);
+
+            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            {
+                uint16_t blockPackedColor = ParallelMath::Extract(packedColor, block);
+                if (numUniqueColors[block] == 0 || blockPackedColor != ParallelMath::Extract(uniqueQuantizedColors[numUniqueColors[block] - 1], block))
+                    ParallelMath::PutUInt15(uniqueQuantizedColors[numUniqueColors[block]++], block, blockPackedColor);
+            }
+        }
+
+        // Stripe unfilled unique colors
+        int maxUniqueColors = 0;
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        {
+            if (numUniqueColors[block] > maxUniqueColors)
+                maxUniqueColors = numUniqueColors[block];
+        }
+
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        {
+            uint16_t fillColor = ParallelMath::Extract(uniqueQuantizedColors[0], block);
+
+            int numUnique = numUniqueColors[block];
+            for (int fill = numUnique + 1; fill < maxUniqueColors; fill++)
+                ParallelMath::PutUInt15(uniqueQuantizedColors[fill], block, fillColor);
+        }
+
+        for (int ci = 0; ci < maxUniqueColors; ci++)
+        {
+            MUInt15 lineColors[3][3];
+            for (int ch = 0; ch < 3; ch++)
+            {
+                MUInt15 quantizedChannel = (ParallelMath::RightShift(uniqueQuantizedColors[ci], (ch * 5)) & ParallelMath::MakeUInt15(15));
+
+                MUInt15 unquantizedColor = (quantizedChannel << 4) | quantizedChannel;
+                lineColors[0][ch] = ParallelMath::Min(ParallelMath::MakeUInt15(255), unquantizedColor + modifier);
+                lineColors[1][ch] = unquantizedColor;
+                lineColors[2][ch] = ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(unquantizedColor) - ParallelMath::LosslessCast<MSInt16>::Cast(modifier)));
+            }
+
+            MSInt32 selectors = ParallelMath::MakeSInt32(0);
+            MFloat error = ParallelMath::MakeFloatZero();
+            for (int px = 0; px < 16; px++)
+            {
+                MFloat pixelError = isolatedError[px];
+
+                MUInt15 pixelBestSelector = ParallelMath::MakeUInt15(0);
+                for (int i = 0; i < 3; i++)
+                {
+                    MFloat error = isUniform ? ComputeErrorUniform(lineColors[i], pixels[px]) : ComputeErrorWeighted(lineColors[i], preWeightedPixels[px], options);
+                    ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, pixelError);
+                    pixelError = ParallelMath::Min(error, pixelError);
+                    pixelBestSelector = ParallelMath::Select(ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(i + 1), pixelBestSelector);
+                }
+
+                error = error + pixelError;
+                selectors = selectors | (ParallelMath::ToInt32(pixelBestSelector) << (px * 2));
+            }
+
+            ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, bestError));
+            bestError = ParallelMath::Min(error, bestError);
+
+            if (ParallelMath::AnySet(errorBetter))
+            {
+                ParallelMath::ConditionalSet(bestLineColor, errorBetter, uniqueQuantizedColors[ci]);
+                ParallelMath::ConditionalSet(bestSelectors, errorBetter, selectors);
+                ParallelMath::ConditionalSet(bestTable, errorBetter, ParallelMath::MakeUInt15(table));
+                bestIsThisMode = bestIsThisMode | errorBetter;
+            }
+        }
+    }
+
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        if (ParallelMath::Extract(bestIsThisMode, block))
+        {
+            uint32_t lowBits = 0;
+            uint32_t highBits = 0;
+
+            uint16_t blockBestLineColor = ParallelMath::Extract(bestLineColor, block);
+            ParallelMath::ScalarUInt16 blockIsolatedAverageQuantized[3];
+
+            for (int ch = 0; ch < 3; ch++)
+                blockIsolatedAverageQuantized[ch] = ParallelMath::Extract(isolatedAverageQuantized[ch], block);
+
+            uint16_t blockBestTable = ParallelMath::Extract(bestTable, block);
+            int32_t blockBestSelectors = ParallelMath::Extract(bestSelectors, block);
+
+            ParallelMath::ScalarUInt16 lineColor[3];
+            for (int ch = 0; ch < 3; ch++)
+                lineColor[ch] = (blockBestLineColor >> (ch * 5)) & 15;
+
+            EmitTModeBlock(outputBuffer + block * 8, lineColor, blockIsolatedAverageQuantized, blockBestSelectors, blockBestTable, true);
+        }
+    }
+}
+
+void cvtt::Internal::ETCComputer::EncodeHMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag groupings[16], const MUInt15 pixels[16][3], HModeEval &he, const MFloat preWeightedPixels[16][3], const Options &options)
+{
+    bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0);
+    bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0);
+
+    MUInt15 zero15 = ParallelMath::MakeUInt15(0);
+
+    MUInt15 counts[2] = { zero15, zero15 };
+
+    ParallelMath::Int16CompFlag bestIsThisMode = ParallelMath::MakeBoolInt16(false);
+
+    MUInt15 totals[2][3] =
+    {
+        { zero15, zero15, zero15 },
+        { zero15, zero15, zero15 }
+    };
+
+    for (unsigned int px = 0; px < 16; px++)
+    {
+        for (int ch = 0; ch < 3; ch++)
+        {
+            totals[0][ch] = totals[0][ch] + pixels[px][ch];
+            totals[1][ch] = totals[1][ch] + ParallelMath::SelectOrZero(groupings[px], pixels[px][ch]);
+        }
+        counts[1] = counts[1] + ParallelMath::SelectOrZero(groupings[px], ParallelMath::MakeUInt15(1));
+    }
+
+    for (int ch = 0; ch < 3; ch++)
+        totals[0][ch] = totals[0][ch] - totals[1][ch];
+    counts[0] = ParallelMath::MakeUInt15(16) - counts[1];
+
+    MUInt16 bestSectorBits = ParallelMath::MakeUInt16(0);
+    MUInt16 bestSignBits = ParallelMath::MakeUInt16(0);
+    MUInt15 bestColors[2] = { zero15, zero15 };
+    MUInt15 bestTable = ParallelMath::MakeUInt15(0);
+
+    for (int table = 0; table < 8; table++)
+    {
+        MUInt15 numUniqueColors = zero15;
+
+        int modifier = cvtt::Tables::ETC1::g_thModifierTable[table];
+
+        for (int sector = 0; sector < 2; sector++)
+        {
+            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            {
+                int blockNumUniqueColors = 0;
+                uint16_t blockUniqueQuantizedColors[31];
+
+                int maxOffsetMultiplier = ParallelMath::Extract(counts[sector], block);
+                int minOffsetMultiplier = -maxOffsetMultiplier;
+
+                int modifierOffset = modifier * 2;
+
+                int blockSectorCounts = ParallelMath::Extract(counts[sector], block);
+                int blockSectorTotals[3];
+                for (int ch = 0; ch < 3; ch++)
+                    blockSectorTotals[ch] = ParallelMath::Extract(totals[sector][ch], block);
+
+                for (int offsetPremultiplier = minOffsetMultiplier; offsetPremultiplier <= maxOffsetMultiplier; offsetPremultiplier++)
+                {
+                    // TODO: This isn't ideal for FakeBT709
+                    int16_t quantized[3];
+                    for (int ch = 0; ch < 3; ch++)
+                    {
+                        if (blockSectorCounts == 0)
+                            quantized[ch] = 0;
+                        else
+                            quantized[ch] = std::min<int16_t>(15, std::max<int16_t>(0, (blockSectorTotals[ch] * 2 + blockSectorCounts * 17 + modifierOffset * offsetPremultiplier)) / (blockSectorCounts * 34));
+                    }
+
+                    uint16_t packedColor = (quantized[0] << 10) | (quantized[1] << 5) | quantized[2];
+                    if (blockNumUniqueColors == 0 || packedColor != blockUniqueQuantizedColors[blockNumUniqueColors - 1])
+                    {
+                        assert(blockNumUniqueColors < 32);
+                        blockUniqueQuantizedColors[blockNumUniqueColors++] = packedColor;
+                    }
+                }
+
+                ParallelMath::PutUInt15(he.numUniqueColors[sector], block, blockNumUniqueColors);
+
+                int baseIndex = 0;
+                if (sector == 1)
+                    baseIndex = ParallelMath::Extract(he.numUniqueColors[0], block);
+
+                for (int i = 0; i < blockNumUniqueColors; i++)
+                    ParallelMath::PutUInt15(he.uniqueQuantizedColors[baseIndex + i], block, blockUniqueQuantizedColors[i]);
+            }
+        }
+
+        MUInt15 totalColors = he.numUniqueColors[0] + he.numUniqueColors[1];
+        int maxErrorColors = 0;
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            maxErrorColors = std::max<int>(maxErrorColors, ParallelMath::Extract(totalColors, block));
+
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        {
+            int lastColor = ParallelMath::Extract(totalColors, block);
+            uint16_t stripeColor = ParallelMath::Extract(he.uniqueQuantizedColors[0], block);
+            for (int i = lastColor; i < maxErrorColors; i++)
+                ParallelMath::PutUInt15(he.uniqueQuantizedColors[i], block, stripeColor);
+        }
+
+        for (int ci = 0; ci < maxErrorColors; ci++)
+        {
+            MUInt15 fifteen = ParallelMath::MakeUInt15(15);
+            MUInt15 twoFiftyFive = ParallelMath::MakeUInt15(255);
+            MSInt16 zeroS16 = ParallelMath::MakeSInt16(0);
+
+            MUInt15 colors[2][3];
+            for (int ch = 0; ch < 3; ch++)
+            {
+                MUInt15 quantizedChannel = ParallelMath::RightShift(he.uniqueQuantizedColors[ci], ((2 - ch) * 5)) & fifteen;
+
+                MUInt15 unquantizedColor = (quantizedChannel << 4) | quantizedChannel;
+                colors[0][ch] = ParallelMath::Min(twoFiftyFive, unquantizedColor + modifier);
+                colors[1][ch] = ParallelMath::ToUInt15(ParallelMath::Max(zeroS16, ParallelMath::LosslessCast<MSInt16>::Cast(unquantizedColor) - ParallelMath::MakeSInt16(modifier)));
+            }
+
+            MUInt16 signBits = ParallelMath::MakeUInt16(0);
+            for (int px = 0; px < 16; px++)
+            {
+                MFloat errors[2];
+                for (int i = 0; i < 2; i++)
+                {
+                    if (isFakeBT709)
+                        errors[i] = ComputeErrorFakeBT709(colors[i], preWeightedPixels[px]);
+                    else if (isUniform)
+                        errors[i] = ComputeErrorUniform(colors[i], pixels[px]);
+                    else
+                        errors[i] = ComputeErrorWeighted(colors[i], preWeightedPixels[px], options);
+                }
+
+                ParallelMath::Int16CompFlag errorOneLess = ParallelMath::FloatFlagToInt16(ParallelMath::Less(errors[1], errors[0]));
+                he.errors[ci][px] = ParallelMath::Min(errors[0], errors[1]);
+                signBits = signBits | ParallelMath::SelectOrZero(errorOneLess, ParallelMath::MakeUInt16(1 << px));
+            }
+            he.signBits[ci] = signBits;
+        }
+
+        int maxUniqueColorCombos = 0;
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        {
+            int numUniqueColorCombos = ParallelMath::Extract(he.numUniqueColors[0], block) * ParallelMath::Extract(he.numUniqueColors[1], block);
+            if (numUniqueColorCombos > maxUniqueColorCombos)
+                maxUniqueColorCombos = numUniqueColorCombos;
+        }
+
+        MUInt15 indexes[2] = { zero15, zero15 };
+        MUInt15 maxIndex[2] = { he.numUniqueColors[0] - ParallelMath::MakeUInt15(1), he.numUniqueColors[1] - ParallelMath::MakeUInt15(1) };
+
+        int block1Starts[ParallelMath::ParallelSize];
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            block1Starts[block] = ParallelMath::Extract(he.numUniqueColors[0], block);
+
+        for (int combo = 0; combo < maxUniqueColorCombos; combo++)
+        {
+            MUInt15 index0 = indexes[0] + ParallelMath::MakeUInt15(1);
+            ParallelMath::Int16CompFlag index0Overflow = ParallelMath::Less(maxIndex[0], index0);
+            ParallelMath::ConditionalSet(index0, index0Overflow, ParallelMath::MakeUInt15(0));
+
+            MUInt15 index1 = ParallelMath::Min(maxIndex[1], indexes[1] + ParallelMath::SelectOrZero(index0Overflow, ParallelMath::MakeUInt15(1)));
+            indexes[0] = index0;
+            indexes[1] = index1;
+
+            int ci0[ParallelMath::ParallelSize];
+            int ci1[ParallelMath::ParallelSize];
+            MUInt15 color0;
+            MUInt15 color1;
+
+            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            {
+                ci0[block] = ParallelMath::Extract(index0, block);
+                ci1[block] = ParallelMath::Extract(index1, block) + block1Starts[block];
+                ParallelMath::PutUInt15(color0, block, ParallelMath::Extract(he.uniqueQuantizedColors[ci0[block]], block));
+                ParallelMath::PutUInt15(color1, block, ParallelMath::Extract(he.uniqueQuantizedColors[ci1[block]], block));
+            }
+
+            MFloat totalError = ParallelMath::MakeFloatZero();
+            MUInt16 sectorBits = ParallelMath::MakeUInt16(0);
+            MUInt16 signBits = ParallelMath::MakeUInt16(0);
+            for (int px = 0; px < 16; px++)
+            {
+                MFloat errorCI0;
+                MFloat errorCI1;
+                MUInt16 signBits0;
+                MUInt16 signBits1;
+
+                for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                {
+                    ParallelMath::PutFloat(errorCI0, block, ParallelMath::Extract(he.errors[ci0[block]][px], block));
+                    ParallelMath::PutFloat(errorCI1, block, ParallelMath::Extract(he.errors[ci1[block]][px], block));
+                    ParallelMath::PutUInt16(signBits0, block, ParallelMath::Extract(he.signBits[ci0[block]], block));
+                    ParallelMath::PutUInt16(signBits1, block, ParallelMath::Extract(he.signBits[ci1[block]], block));
+                }
+
+                totalError = totalError + ParallelMath::Min(errorCI0, errorCI1);
+
+                MUInt16 bitPosition = ParallelMath::MakeUInt16(1 << px);
+
+                ParallelMath::Int16CompFlag error1Better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(errorCI1, errorCI0));
+
+                sectorBits = sectorBits | ParallelMath::SelectOrZero(error1Better, bitPosition);
+                signBits = signBits | (bitPosition & ParallelMath::Select(error1Better, signBits1, signBits0));
+            }
+
+            ParallelMath::FloatCompFlag totalErrorBetter = ParallelMath::Less(totalError, bestError);
+            ParallelMath::Int16CompFlag totalErrorBetter16 = ParallelMath::FloatFlagToInt16(totalErrorBetter);
+            if (ParallelMath::AnySet(totalErrorBetter16))
+            {
+                bestIsThisMode = bestIsThisMode | totalErrorBetter16;
+                ParallelMath::ConditionalSet(bestTable, totalErrorBetter16, ParallelMath::MakeUInt15(table));
+                ParallelMath::ConditionalSet(bestColors[0], totalErrorBetter16, color0);
+                ParallelMath::ConditionalSet(bestColors[1], totalErrorBetter16, color1);
+                ParallelMath::ConditionalSet(bestSectorBits, totalErrorBetter16, sectorBits);
+                ParallelMath::ConditionalSet(bestSignBits, totalErrorBetter16, signBits);
+                bestError = ParallelMath::Min(totalError, bestError);
+            }
+        }
+    }
+
+    if (ParallelMath::AnySet(bestIsThisMode))
+    {
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        {
+            if (!ParallelMath::Extract(bestIsThisMode, block))
+                continue;
+
+            ParallelMath::ScalarUInt16 blockBestColors[2] = { ParallelMath::Extract(bestColors[0], block), ParallelMath::Extract(bestColors[1], block) };
+            ParallelMath::ScalarUInt16 blockBestSectorBits = ParallelMath::Extract(bestSectorBits, block);
+            ParallelMath::ScalarUInt16 blockBestSignBits = ParallelMath::Extract(bestSignBits, block);
+            ParallelMath::ScalarUInt16 blockBestTable = ParallelMath::Extract(bestTable, block);
+
+            EmitHModeBlock(outputBuffer + block * 8, blockBestColors, blockBestSectorBits, blockBestSignBits, blockBestTable, true);
+        }
+    }
+}
+
+void cvtt::Internal::ETCComputer::EncodeVirtualTModePunchthrough(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolatedBase[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], const ParallelMath::Int16CompFlag& anyTransparent, const ParallelMath::Int16CompFlag& allTransparent, const Options &options)
+{
+    // We treat T and H mode as the same mode ("Virtual T mode") with punchthrough, because of how the colors work:
+    //
+    // T mode: C1, C2+M, Transparent, C2-M
+    // H mode: C1+M, C1-M, Transparent, C2-M
+    //
+    // So in either case, we have 2 colors +/- a modifier, and a third unique color, which is basically T mode except without the middle color.
+    // The only thing that matters is whether it's better to store the isolated color as T mode color 1, or store it offset in H mode color 2.
+    //
+    // Sometimes it won't even be possible to store it in H mode color 2 because the table low bit derives from a numeric comparison of the colors,
+    // but unlike opaque blocks, we can't flip them.
+    bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0);
+    bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0);
+
+    ParallelMath::FloatCompFlag isTransparentF[16];
+    for (int px = 0; px < 16; px++)
+        isTransparentF[px] = ParallelMath::Int16FlagToFloat(isTransparent[px]);
+
+    ParallelMath::Int16CompFlag bestIsThisMode = ParallelMath::MakeBoolInt16(false);
+    ParallelMath::Int16CompFlag bestIsHMode = ParallelMath::MakeBoolInt16(false);
+
+    MUInt15 isolatedTotal[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) };
+    MUInt15 lineTotal[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) };
+
+    MUInt15 numPixelsIsolated = ParallelMath::MakeUInt15(0);
+    MUInt15 numPixelsLine = ParallelMath::MakeUInt15(0);
+
+    ParallelMath::Int16CompFlag isIsolated[16];
+    ParallelMath::Int16CompFlag isLine[16];
+
+    for (unsigned int px = 0; px < 16; px++)
+    {
+        ParallelMath::Int16CompFlag isOpaque = ParallelMath::Not(isTransparent[px]);
+        isIsolated[px] = isIsolatedBase[px] & isOpaque;
+        isLine[px] = ParallelMath::Not(isIsolatedBase[px]) & isOpaque;
+    }
+
+    for (unsigned int px = 0; px < 16; px++)
+    {
+        for (int ch = 0; ch < 3; ch++)
+        {
+            isolatedTotal[ch] = isolatedTotal[ch] + ParallelMath::SelectOrZero(isIsolated[px], pixels[px][ch]);
+            lineTotal[ch] = lineTotal[ch] + ParallelMath::SelectOrZero(isLine[px], pixels[px][ch]);
+        }
+        numPixelsIsolated = numPixelsIsolated + ParallelMath::SelectOrZero(isIsolated[px], ParallelMath::MakeUInt15(1));
+        numPixelsLine = numPixelsLine + ParallelMath::SelectOrZero(isLine[px], ParallelMath::MakeUInt15(1));
+    }
+
+    MUInt15 isolatedAverageQuantized[3];
+    MUInt15 hModeIsolatedQuantized[8][3];
+    MUInt15 isolatedAverageTargets[3];
+    {
+        int divisors[ParallelMath::ParallelSize];
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            divisors[block] = ParallelMath::Extract(numPixelsIsolated, block) * 34;
+
+        MUInt15 addend = (numPixelsIsolated << 4) | numPixelsIsolated;
+        for (int ch = 0; ch < 3; ch++)
+        {
+            // isolatedAverageQuantized[ch] = (isolatedTotal[ch] * 2 + numPixelsIsolated * 17) / (numPixelsIsolated * 34);
+
+            MUInt15 numerator = isolatedTotal[ch] + isolatedTotal[ch];
+            if (!isFakeBT709)
+                numerator = numerator + addend;
+
+            MUInt15 hModeIsolatedNumerators[8];
+            for (int table = 0; table < 8; table++)
+            {
+                // FIXME: Handle fake BT.709 correctly
+                MUInt15 offsetTotal = isolatedTotal[ch] + ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(ParallelMath::MakeUInt15(cvtt::Tables::ETC2::g_thModifierTable[table]), numPixelsIsolated));
+
+                hModeIsolatedNumerators[table] = (offsetTotal + offsetTotal) + addend;
+            }
+
+            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            {
+                int divisor = divisors[block];
+                if (divisor == 0)
+                {
+                    ParallelMath::PutUInt15(isolatedAverageQuantized[ch], block, 0);
+                    for (int table = 0; table < 8; table++)
+                        ParallelMath::PutUInt15(hModeIsolatedQuantized[table][ch], block, 0);
+                }
+                else
+                {
+                    ParallelMath::PutUInt15(isolatedAverageQuantized[ch], block, ParallelMath::Extract(numerator, block) / divisor);
+                    for (int table = 0; table < 8; table++)
+                        ParallelMath::PutUInt15(hModeIsolatedQuantized[table][ch], block, ParallelMath::Extract(hModeIsolatedNumerators[table], block) / divisor);
+                }
+            }
+
+            isolatedAverageTargets[ch] = numerator;
+        }
+    }
+
+    if (isFakeBT709)
+        ResolveTHFakeBT709Rounding(isolatedAverageQuantized, isolatedAverageTargets, numPixelsIsolated);
+
+    for (int table = 0; table < 8; table++)
+        for (int ch = 0; ch < 3; ch++)
+            hModeIsolatedQuantized[table][ch] = ParallelMath::Min(ParallelMath::MakeUInt15(15), hModeIsolatedQuantized[table][ch]);
+
+    MUInt15 isolatedColor[3];
+    for (int ch = 0; ch < 3; ch++)
+        isolatedColor[ch] = (isolatedAverageQuantized[ch]) | (isolatedAverageQuantized[ch] << 4);
+
+    MFloat isolatedError[16];
+    for (int px = 0; px < 16; px++)
+    {
+        if (isFakeBT709)
+            isolatedError[px] = ComputeErrorFakeBT709(isolatedColor, preWeightedPixels[px]);
+        else if (isUniform)
+            isolatedError[px] = ComputeErrorUniform(pixels[px], isolatedColor);
+        else
+            isolatedError[px] = ComputeErrorWeighted(isolatedColor, preWeightedPixels[px], options);
+
+        ParallelMath::ConditionalSet(isolatedError[px], isTransparentF[px], ParallelMath::MakeFloatZero());
+    }
+
+    MSInt32 bestSelectors = ParallelMath::MakeSInt32(0);
+    MUInt15 bestTable = ParallelMath::MakeUInt15(0);
+    MUInt15 bestLineColor = ParallelMath::MakeUInt15(0);
+    MUInt15 bestIsolatedColor = ParallelMath::MakeUInt15(0);
+    MUInt15 bestHModeColor2 = ParallelMath::MakeUInt15(0);
+    ParallelMath::Int16CompFlag bestUseHMode = ParallelMath::MakeBoolInt16(false);
+
+    MSInt16 maxLine = ParallelMath::LosslessCast<MSInt16>::Cast(numPixelsLine);
+    MSInt16 minLine = ParallelMath::MakeSInt16(0) - maxLine;
+
+    int16_t clusterMaxLine = 0;
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        int16_t blockMaxLine = ParallelMath::Extract(maxLine, block);
+        if (blockMaxLine > clusterMaxLine)
+            clusterMaxLine = blockMaxLine;
+    }
+
+    int16_t clusterMinLine = -clusterMaxLine;
+
+    int lineDivisors[ParallelMath::ParallelSize];
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        lineDivisors[block] = ParallelMath::Extract(numPixelsLine, block) * 34;
+
+    MUInt15 lineAddend = (numPixelsLine << 4) | numPixelsLine;
+
+    for (int table = 0; table < 8; table++)
+    {
+        int numUniqueColors[ParallelMath::ParallelSize];
+        MUInt15 uniqueQuantizedColors[31];
+
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            numUniqueColors[block] = 0;
+
+        MUInt15 modifier = ParallelMath::MakeUInt15(cvtt::Tables::ETC2::g_thModifierTable[table]);
+        MUInt15 modifierOffset = (modifier + modifier);
+
+        for (int16_t offsetPremultiplier = clusterMinLine; offsetPremultiplier <= clusterMaxLine; offsetPremultiplier += 2)
+        {
+            MSInt16 clampedOffsetPremultiplier = ParallelMath::Max(minLine, ParallelMath::Min(maxLine, ParallelMath::MakeSInt16(offsetPremultiplier)));
+            MSInt16 modifierAddend = ParallelMath::CompactMultiply(clampedOffsetPremultiplier, modifierOffset);
+
+            MUInt15 quantized[3];
+            if (isFakeBT709)
+            {
+                MUInt15 targets[3];
+                for (int ch = 0; ch < 3; ch++)
+                {
+                    //quantized[ch] = std::min<int16_t>(15, std::max(0, (lineTotal[ch] * 2 + modifierOffset * offsetPremultiplier)) / (numDAIILine * 34));
+                    MUInt15 numerator = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(lineTotal[ch] + lineTotal[ch]) + modifierAddend));
+                    MUInt15 divided = ParallelMath::MakeUInt15(0);
+                    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                    {
+                        int divisor = lineDivisors[block];
+                        if (divisor == 0)
+                            ParallelMath::PutUInt15(divided, block, 0);
+                        else
+                            ParallelMath::PutUInt15(divided, block, ParallelMath::Extract(numerator, block) / divisor);
+                    }
+                    quantized[ch] = ParallelMath::Min(ParallelMath::MakeUInt15(15), divided);
+                    targets[ch] = numerator;
+                }
+
+                ResolveTHFakeBT709Rounding(quantized, targets, numPixelsLine);
+            }
+            else
+            {
+                for (int ch = 0; ch < 3; ch++)
+                {
+                    //quantized[ch] = std::min<int16_t>(15, std::max(0, (lineTotal[ch] * 2 + numDAIILine * 17 + modifierOffset * offsetPremultiplier)) / (numDAIILine * 34));
+                    MUInt15 numerator = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(lineTotal[ch] + lineTotal[ch] + lineAddend) + modifierAddend));
+                    MUInt15 divided = ParallelMath::MakeUInt15(0);
+                    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                    {
+                        int divisor = lineDivisors[block];
+                        if (divisor == 0)
+                            ParallelMath::PutUInt15(divided, block, 0);
+                        else
+                            ParallelMath::PutUInt15(divided, block, ParallelMath::Extract(numerator, block) / divisor);
+                    }
+                    quantized[ch] = ParallelMath::Min(ParallelMath::MakeUInt15(15), divided);
+                }
+            }
+
+            MUInt15 packedColor = (quantized[0] << 10) | (quantized[1] << 5) | quantized[2];
+
+            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            {
+                uint16_t blockPackedColor = ParallelMath::Extract(packedColor, block);
+                if (numUniqueColors[block] == 0 || blockPackedColor != ParallelMath::Extract(uniqueQuantizedColors[numUniqueColors[block] - 1], block))
+                    ParallelMath::PutUInt15(uniqueQuantizedColors[numUniqueColors[block]++], block, blockPackedColor);
+            }
+        }
+
+        // Stripe unfilled unique colors
+        int maxUniqueColors = 0;
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        {
+            if (numUniqueColors[block] > maxUniqueColors)
+                maxUniqueColors = numUniqueColors[block];
+        }
+
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        {
+            uint16_t fillColor = ParallelMath::Extract(uniqueQuantizedColors[0], block);
+
+            int numUnique = numUniqueColors[block];
+            for (int fill = numUnique + 1; fill < maxUniqueColors; fill++)
+                ParallelMath::PutUInt15(uniqueQuantizedColors[fill], block, fillColor);
+        }
+
+        MFloat hModeErrors[16];
+        MUInt15 hModeUnquantizedColor[3];
+        for (int ch = 0; ch < 3; ch++)
+        {
+            MUInt15 quantizedChannel = hModeIsolatedQuantized[table][ch];
+
+            MUInt15 unquantizedCh = (quantizedChannel << 4) | quantizedChannel;
+            hModeUnquantizedColor[ch] = ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(unquantizedCh) - ParallelMath::LosslessCast<MSInt16>::Cast(modifier)));
+        }
+
+        for (int px = 0; px < 16; px++)
+        {
+            hModeErrors[px] = isUniform ? ComputeErrorUniform(hModeUnquantizedColor, pixels[px]) : ComputeErrorWeighted(hModeUnquantizedColor, preWeightedPixels[px], options);
+            ParallelMath::ConditionalSet(hModeErrors[px], isTransparentF[px], ParallelMath::MakeFloatZero());
+        }
+
+        MUInt15 packedHModeColor2 = (hModeIsolatedQuantized[table][0] << 10) | (hModeIsolatedQuantized[table][1] << 5) | hModeIsolatedQuantized[table][2];
+        ParallelMath::Int16CompFlag tableLowBitIsZero = ((table & 1) == 0) ? ParallelMath::MakeBoolInt16(true) : ParallelMath::MakeBoolInt16(false);
+
+        for (int ci = 0; ci < maxUniqueColors; ci++)
+        {
+            MUInt15 lineColors[2][3];
+            for (int ch = 0; ch < 3; ch++)
+            {
+                MUInt15 quantizedChannel = (ParallelMath::RightShift(uniqueQuantizedColors[ci], 10 - (ch * 5)) & ParallelMath::MakeUInt15(15));
+
+                MUInt15 unquantizedColor = (quantizedChannel << 4) | quantizedChannel;
+                lineColors[0][ch] = ParallelMath::Min(ParallelMath::MakeUInt15(255), unquantizedColor + modifier);
+                lineColors[1][ch] = ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(unquantizedColor) - ParallelMath::LosslessCast<MSInt16>::Cast(modifier)));
+            }
+
+            MUInt15 bestLineSelector[16];
+            MFloat bestLineError[16];
+            for (int px = 0; px < 16; px++)
+            {
+                MFloat lineErrors[2];
+                for (int i = 0; i < 2; i++)
+                    lineErrors[i] = isUniform ? ComputeErrorUniform(lineColors[i], pixels[px]) : ComputeErrorWeighted(lineColors[i], preWeightedPixels[px], options);
+
+                ParallelMath::Int16CompFlag firstIsBetter = ParallelMath::FloatFlagToInt16(ParallelMath::LessOrEqual(lineErrors[0], lineErrors[1]));
+                bestLineSelector[px] = ParallelMath::Select(firstIsBetter, ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(3));
+                bestLineError[px] = ParallelMath::Min(lineErrors[0], lineErrors[1]);
+
+                ParallelMath::ConditionalSet(bestLineError[px], isTransparentF[px], ParallelMath::MakeFloatZero());
+            }
+
+            // One case considered here was if it was possible to force H mode to be valid when the line color is unused.
+            // That case isn't actually useful because it's equivalent to the isolated color being unused at maximum offset,
+            // which is always checked after a swap.
+            MFloat tModeError = ParallelMath::MakeFloatZero();
+            MFloat hModeError = ParallelMath::MakeFloatZero();
+            for (int px = 0; px < 16; px++)
+            {
+                tModeError = tModeError + ParallelMath::Min(bestLineError[px], isolatedError[px]);
+                hModeError = hModeError + ParallelMath::Min(bestLineError[px], hModeErrors[px]);
+            }
+
+            ParallelMath::FloatCompFlag hLessError = ParallelMath::Less(hModeError, tModeError);
+
+            MUInt15 packedHModeColor1 = uniqueQuantizedColors[ci];
+
+            ParallelMath::Int16CompFlag hModeTableLowBitMustBeZero = ParallelMath::Less(packedHModeColor1, packedHModeColor2);
+
+            ParallelMath::Int16CompFlag hModeIsLegal = ParallelMath::Equal(hModeTableLowBitMustBeZero, tableLowBitIsZero);
+            ParallelMath::Int16CompFlag useHMode = ParallelMath::FloatFlagToInt16(hLessError) & hModeIsLegal;
+
+            MFloat roundBestError = tModeError;
+            ParallelMath::ConditionalSet(roundBestError, ParallelMath::Int16FlagToFloat(useHMode), hModeError);
+
+            ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(roundBestError, bestError));
+            ParallelMath::FloatCompFlag useHModeF = ParallelMath::Int16FlagToFloat(useHMode);
+
+            if (ParallelMath::AnySet(errorBetter))
+            {
+                MSInt32 selectors = ParallelMath::MakeSInt32(0);
+                for (int px = 0; px < 16; px++)
+                {
+                    MUInt15 selector = bestLineSelector[px];
+
+                    MFloat isolatedPixelError = ParallelMath::Select(useHModeF, hModeErrors[px], isolatedError[px]);
+                    ParallelMath::Int16CompFlag isolatedBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(isolatedPixelError, bestLineError[px]));
+
+                    ParallelMath::ConditionalSet(selector, isolatedBetter, ParallelMath::MakeUInt15(0));
+                    ParallelMath::ConditionalSet(selector, isTransparent[px], ParallelMath::MakeUInt15(2));
+                    selectors = selectors | (ParallelMath::ToInt32(selector) << (px * 2));
+                }
+
+                bestError = ParallelMath::Min(bestError, roundBestError);
+                ParallelMath::ConditionalSet(bestLineColor, errorBetter, uniqueQuantizedColors[ci]);
+                ParallelMath::ConditionalSet(bestSelectors, errorBetter, selectors);
+                ParallelMath::ConditionalSet(bestTable, errorBetter, ParallelMath::MakeUInt15(table));
+                ParallelMath::ConditionalSet(bestIsHMode, errorBetter, useHMode);
+                ParallelMath::ConditionalSet(bestHModeColor2, errorBetter, packedHModeColor2);
+                
+                bestIsThisMode = bestIsThisMode | errorBetter;
+            }
+        }
+    }
+
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        if (ParallelMath::Extract(bestIsThisMode, block))
+        {
+            uint32_t lowBits = 0;
+            uint32_t highBits = 0;
+
+            uint16_t blockBestLineColor = ParallelMath::Extract(bestLineColor, block);
+            ParallelMath::ScalarUInt16 blockIsolatedAverageQuantized[3];
+
+            for (int ch = 0; ch < 3; ch++)
+                blockIsolatedAverageQuantized[ch] = ParallelMath::Extract(isolatedAverageQuantized[ch], block);
+
+            uint16_t blockBestTable = ParallelMath::Extract(bestTable, block);
+            int32_t blockBestSelectors = ParallelMath::Extract(bestSelectors, block);
+
+            ParallelMath::ScalarUInt16 lineColor[3];
+            for (int ch = 0; ch < 3; ch++)
+                lineColor[ch] = (blockBestLineColor >> (10 - (ch * 5))) & 15;
+
+            if (ParallelMath::Extract(bestIsHMode, block))
+            {
+                // T mode: C1, C2+M, Transparent, C2-M
+                // H mode: C1+M, C1-M, Transparent, C2-M
+                static const ParallelMath::ScalarUInt16 selectorRemapSector[4] = { 1, 0, 1, 0 };
+                static const ParallelMath::ScalarUInt16 selectorRemapSign[4] = { 1, 0, 0, 1 };
+
+                // Remap selectors
+                ParallelMath::ScalarUInt16 signBits = 0;
+                ParallelMath::ScalarUInt16 sectorBits = 0;
+                int32_t blockBestSelectors = ParallelMath::Extract(bestSelectors, block);
+                for (int px = 0; px < 16; px++)
+                {
+                    int32_t selector = (blockBestSelectors >> (px * 2)) & 3;
+                    sectorBits |= (selectorRemapSector[selector] << px);
+                    signBits |= (selectorRemapSign[selector] << px);
+                }
+
+                ParallelMath::ScalarUInt16 blockColors[2] = { blockBestLineColor, ParallelMath::Extract(bestHModeColor2, block) };
+
+                EmitHModeBlock(outputBuffer + block * 8, blockColors, sectorBits, signBits, blockBestTable, false);
+            }
+            else
+                EmitTModeBlock(outputBuffer + block * 8, lineColor, blockIsolatedAverageQuantized, blockBestSelectors, blockBestTable, false);
+        }
+    }
+}
+
+
+cvtt::ParallelMath::UInt15 cvtt::Internal::ETCComputer::DecodePlanarCoeff(const MUInt15 &coeff, int ch)
+{
+    if (ch == 1)
+        return (coeff << 1) | (ParallelMath::RightShift(coeff, 6));
+    else
+        return (coeff << 2) | (ParallelMath::RightShift(coeff, 4));
+}
+
+void cvtt::Internal::ETCComputer::EncodePlanar(uint8_t *outputBuffer, MFloat &bestError, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options)
+{
+    // NOTE: If it's desired to do this in another color space, the best way to do it would probably be
+    // to do everything in that color space and then transform it back to RGB.
+
+    // We compute H = (H-O)/4 and V= (V-O)/4 to simplify the math
+
+    // error = (x*H + y*V + O - C)^2
+    MFloat h[3] = { ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero() };
+    MFloat v[3] = { ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero() };
+    MFloat o[3] = { ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero() };
+
+    bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0);
+    bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0);
+
+    MFloat totalError = ParallelMath::MakeFloatZero();
+    MUInt15 bestCoeffs[3][3];	// [Channel][Coeff]
+    for (int ch = 0; ch < 3; ch++)
+    {
+        float fhh = 0.f;
+        float fho = 0.f;
+        float fhv = 0.f;
+        float foo = 0.f;
+        float fov = 0.f;
+        float fvv = 0.f;
+        MFloat fc = ParallelMath::MakeFloatZero();
+        MFloat fh = ParallelMath::MakeFloatZero();
+        MFloat fv = ParallelMath::MakeFloatZero();
+        MFloat fo = ParallelMath::MakeFloatZero();
+
+        float &foh = fho;
+        float &fvh = fhv;
+        float &fvo = fov;
+
+        for (int px = 0; px < 16; px++)
+        {
+            float x = static_cast<float>(px % 4);
+            float y = static_cast<float>(px / 4);
+            MFloat c = isFakeBT709 ? preWeightedPixels[px][ch] : ParallelMath::ToFloat(pixels[px][ch]);
+
+            // (x*H + y*V + O - C)^2
+            fhh += x * x;
+            fhv += x * y;
+            fho += x;
+            fh = fh - c * x;
+
+            fvh += y * x;
+            fvv += y * y;
+            fvo += y;
+            fv = fv - c * y;
+
+            foh += x;
+            fov += y;
+            foo += 1;
+            fo = fo - c;
+
+            fh = fh - c * x;
+            fv = fv - c * y;
+            fo = fo - c;
+            fc = fc + c * c;
+        }
+
+        //float totalError = fhh * h * h + fho * h*o + fhv * h*v + foo * o * o + fov * o*v + fvv * v * v + fh * h + fv * v + fo * o + fc;
+
+        // error = fhh*h^2 + fho*h*o + fhv*h*v + foo*o^2 + fov*o*v + fvv*v^2 + fh*h + fv*v + fo*o + fc
+        // derror/dh = 2*fhh*h + fho*o + fhv*v + fh
+        // derror/dv = fhv*h + fov*o + 2*fvv*v + fv
+        // derror/do = fho*h + 2*foo*o + fov*v + fo
+
+        // Solve system of equations
+        // h o v 1 = 0
+        // -------
+        // d e f g  R0
+        // i j k l  R1
+        // m n p q  R2
+
+        float d = 2.0f * fhh;
+        float e = fho;
+        float f = fhv;
+        MFloat gD = fh;
+
+        float i = fhv;
+        float j = fov;
+        float k = 2.0f * fvv;
+        MFloat lD = fv;
+
+        float m = fho;
+        float n = 2.0f * foo;
+        float p = fov;
+        MFloat qD = fo;
+
+        {
+            // Factor out first column from R1 and R2
+            float r0to1 = -i / d;
+            float r0to2 = -m / d;
+
+            // 0 j1 k1 l1D
+            float j1 = j + r0to1 * e;
+            float k1 = k + r0to1 * f;
+            MFloat l1D = lD + gD * r0to1;
+
+            // 0 n1 p1 q1D
+            float n1 = n + r0to2 * e;
+            float p1 = p + r0to2 * f;
+            MFloat q1D = qD + gD * r0to2;
+
+            // Factor out third column from R2
+            float r1to2 = -p1 / k1;
+
+            // 0 n2 0 q2D
+            float n2 = n1 + r1to2 * j1;
+            MFloat q2D = q1D + l1D * r1to2;
+
+            o[ch] = -q2D / n2;
+
+            // Factor out second column from R1
+            // 0 n2 0 q2D
+
+            float r2to1 = -j1 / n2;
+
+            // 0 0 k1 l2D
+            // 0 n2 0 q2D
+            MFloat l2D = l1D + q2D * r2to1;
+
+            float elim2 = -f / k1;
+            float elim1 = -e / n2;
+
+            // d 0 0 g2D
+            MFloat g2D = gD + l2D * elim2 + q2D * elim1;
+
+            // n2*o + q2 = 0
+            // o = -q2 / n2
+            h[ch] = -g2D / d;
+            v[ch] = -l2D / k1;
+        }
+
+        // Undo the local transformation
+        h[ch] = h[ch] * 4.0f + o[ch];
+        v[ch] = v[ch] * 4.0f + o[ch];
+    }
+
+    if (isFakeBT709)
+    {
+        MFloat oRGB[3];
+        MFloat hRGB[3];
+        MFloat vRGB[3];
+
+        ConvertFromFakeBT709(oRGB, o);
+        ConvertFromFakeBT709(hRGB, h);
+        ConvertFromFakeBT709(vRGB, v);
+
+        // Twiddling in fake BT.607 is a mess, just round off for now (the precision is pretty good anyway)
+        {
+            ParallelMath::RoundTowardNearestForScope rtn;
+
+            for (int ch = 0; ch < 3; ch++)
+            {
+                MFloat fcoeffs[3] = { oRGB[ch], hRGB[ch], vRGB[ch] };
+
+                for (int c = 0; c < 3; c++)
+                {
+                    MFloat coeff = ParallelMath::Max(ParallelMath::MakeFloatZero(), fcoeffs[c]);
+                    if (ch == 1)
+                        coeff = ParallelMath::Min(ParallelMath::MakeFloat(127.0f), coeff * (127.0f / 255.0f));
+                    else
+                        coeff = ParallelMath::Min(ParallelMath::MakeFloat(63.0f), coeff * (63.0f / 255.0f));
+                    fcoeffs[c] = coeff;
+                }
+
+                for (int c = 0; c < 3; c++)
+                    bestCoeffs[ch][c] = ParallelMath::RoundAndConvertToU15(fcoeffs[c], &rtn);
+            }
+        }
+
+        MUInt15 reconstructed[16][3];
+        for (int ch = 0; ch < 3; ch++)
+        {
+            MUInt15 dO = DecodePlanarCoeff(bestCoeffs[ch][0], ch);
+            MUInt15 dH = DecodePlanarCoeff(bestCoeffs[ch][1], ch);
+            MUInt15 dV = DecodePlanarCoeff(bestCoeffs[ch][2], ch);
+
+            MSInt16 hMinusO = ParallelMath::LosslessCast<MSInt16>::Cast(dH) - ParallelMath::LosslessCast<MSInt16>::Cast(dO);
+            MSInt16 vMinusO = ParallelMath::LosslessCast<MSInt16>::Cast(dV) - ParallelMath::LosslessCast<MSInt16>::Cast(dO);
+
+            MFloat error = ParallelMath::MakeFloatZero();
+
+            MSInt16 addend = ParallelMath::LosslessCast<MSInt16>::Cast(dO << 2) + 2;
+
+            for (int px = 0; px < 16; px++)
+            {
+                MUInt15 pxv = ParallelMath::MakeUInt15(px);
+                MSInt16 x = ParallelMath::LosslessCast<MSInt16>::Cast(pxv & ParallelMath::MakeUInt15(3));
+                MSInt16 y = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RightShift(pxv, 2));
+
+                MSInt16 interpolated = ParallelMath::RightShift(ParallelMath::CompactMultiply(x, hMinusO) + ParallelMath::CompactMultiply(y, vMinusO) + addend, 2);
+                MUInt15 clampedLow = ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::MakeSInt16(0), interpolated));
+                reconstructed[px][ch] = ParallelMath::Min(ParallelMath::MakeUInt15(255), clampedLow);
+            }
+        }
+
+        totalError = ParallelMath::MakeFloatZero();
+        for (int px = 0; px < 16; px++)
+            totalError = totalError + ComputeErrorFakeBT709(reconstructed[px], preWeightedPixels[px]);
+    }
+    else
+    {
+        for (int ch = 0; ch < 3; ch++)
+        {
+            MFloat fcoeffs[3] = { o[ch], h[ch], v[ch] };
+            MUInt15 coeffRanges[3][2];
+
+            for (int c = 0; c < 3; c++)
+            {
+                MFloat coeff = ParallelMath::Max(ParallelMath::MakeFloatZero(), fcoeffs[c]);
+                if (ch == 1)
+                    coeff = ParallelMath::Min(ParallelMath::MakeFloat(127.0f), coeff * (127.0f / 255.0f));
+                else
+                    coeff = ParallelMath::Min(ParallelMath::MakeFloat(63.0f), coeff * (63.0f / 255.0f));
+                fcoeffs[c] = coeff;
+            }
+
+            {
+                ParallelMath::RoundDownForScope rd;
+                for (int c = 0; c < 3; c++)
+                    coeffRanges[c][0] = ParallelMath::RoundAndConvertToU15(fcoeffs[c], &rd);
+            }
+
+            {
+                ParallelMath::RoundUpForScope ru;
+                for (int c = 0; c < 3; c++)
+                    coeffRanges[c][1] = ParallelMath::RoundAndConvertToU15(fcoeffs[c], &ru);
+            }
+
+            MFloat bestChannelError = ParallelMath::MakeFloat(FLT_MAX);
+            for (int io = 0; io < 2; io++)
+            {
+                MUInt15 dO = DecodePlanarCoeff(coeffRanges[0][io], ch);
+
+                for (int ih = 0; ih < 2; ih++)
+                {
+                    MUInt15 dH = DecodePlanarCoeff(coeffRanges[1][ih], ch);
+                    MSInt16 hMinusO = ParallelMath::LosslessCast<MSInt16>::Cast(dH) - ParallelMath::LosslessCast<MSInt16>::Cast(dO);
+
+                    for (int iv = 0; iv < 2; iv++)
+                    {
+                        MUInt15 dV = DecodePlanarCoeff(coeffRanges[2][iv], ch);
+                        MSInt16 vMinusO = ParallelMath::LosslessCast<MSInt16>::Cast(dV) - ParallelMath::LosslessCast<MSInt16>::Cast(dO);
+
+                        MFloat error = ParallelMath::MakeFloatZero();
+
+                        MSInt16 addend = ParallelMath::LosslessCast<MSInt16>::Cast(dO << 2) + 2;
+
+                        for (int px = 0; px < 16; px++)
+                        {
+                            MUInt15 pxv = ParallelMath::MakeUInt15(px);
+                            MSInt16 x = ParallelMath::LosslessCast<MSInt16>::Cast(pxv & ParallelMath::MakeUInt15(3));
+                            MSInt16 y = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RightShift(pxv, 2));
+
+                            MSInt16 interpolated = ParallelMath::RightShift(ParallelMath::CompactMultiply(x, hMinusO) + ParallelMath::CompactMultiply(y, vMinusO) + addend, 2);
+                            MUInt15 clampedLow = ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::MakeSInt16(0), interpolated));
+                            MUInt15 dec = ParallelMath::Min(ParallelMath::MakeUInt15(255), clampedLow);
+
+                            MSInt16 delta = ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][ch]) - ParallelMath::LosslessCast<MSInt16>::Cast(dec);
+
+                            MFloat deltaF = ParallelMath::ToFloat(delta);
+                            error = error + deltaF * deltaF;
+                        }
+
+                        ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, bestChannelError));
+                        if (ParallelMath::AnySet(errorBetter))
+                        {
+                            bestChannelError = ParallelMath::Min(error, bestChannelError);
+                            ParallelMath::ConditionalSet(bestCoeffs[ch][0], errorBetter, coeffRanges[0][io]);
+                            ParallelMath::ConditionalSet(bestCoeffs[ch][1], errorBetter, coeffRanges[1][ih]);
+                            ParallelMath::ConditionalSet(bestCoeffs[ch][2], errorBetter, coeffRanges[2][iv]);
+                        }
+                    }
+                }
+            }
+
+            if (!isUniform)
+            {
+                switch (ch)
+                {
+                case 0:
+                    bestChannelError = bestChannelError * (options.redWeight * options.redWeight);
+                    break;
+                case 1:
+                    bestChannelError = bestChannelError * (options.greenWeight * options.greenWeight);
+                    break;
+                case 2:
+                    bestChannelError = bestChannelError * (options.blueWeight * options.blueWeight);
+                    break;
+                default:
+                    break;
+                }
+            }
+
+            totalError = totalError + bestChannelError;
+        }
+    }
+
+    ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(totalError, bestError));
+    if (ParallelMath::AnySet(errorBetter))
+    {
+        bestError = ParallelMath::Min(bestError, totalError);
+
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        {
+            if (!ParallelMath::Extract(errorBetter, block))
+                continue;
+
+            int ro = ParallelMath::Extract(bestCoeffs[0][0], block);
+            int rh = ParallelMath::Extract(bestCoeffs[0][1], block);
+            int rv = ParallelMath::Extract(bestCoeffs[0][2], block);
+
+            int go = ParallelMath::Extract(bestCoeffs[1][0], block);
+            int gh = ParallelMath::Extract(bestCoeffs[1][1], block);
+            int gv = ParallelMath::Extract(bestCoeffs[1][2], block);
+
+            int bo = ParallelMath::Extract(bestCoeffs[2][0], block);
+            int bh = ParallelMath::Extract(bestCoeffs[2][1], block);
+            int bv = ParallelMath::Extract(bestCoeffs[2][2], block);
+
+            int go1 = go >> 6;
+            int go2 = go & 63;
+
+            int bo1 = bo >> 5;
+            int bo2 = (bo >> 3) & 3;
+            int bo3 = bo & 7;
+
+            int rh1 = (rh >> 1);
+            int rh2 = rh & 1;
+
+            int fakeR = ro >> 2;
+            int fakeDR = go1 | ((ro & 3) << 1);
+
+            int fakeG = (go2 >> 2);
+            int fakeDG = ((go2 & 3) << 1) | bo1;
+
+            int fakeB = bo2;
+            int fakeDB = bo3 >> 1;
+
+            uint32_t highBits = 0;
+            uint32_t lowBits = 0;
+
+            // Avoid overflowing R
+            if ((fakeDR & 4) != 0 && fakeR + fakeDR < 8)
+                highBits |= 1 << (63 - 32);
+
+            // Avoid overflowing G
+            if ((fakeDG & 4) != 0 && fakeG + fakeDG < 8)
+                highBits |= 1 << (55 - 32);
+
+            // Overflow B
+            if (fakeB + fakeDB < 4)
+            {
+                // Overflow low
+                highBits |= 1 << (42 - 32);
+            }
+            else
+            {
+                // Overflow high
+                highBits |= 7 << (45 - 32);
+            }
+
+            highBits |= ro << (57 - 32);
+            highBits |= go1 << (56 - 32);
+            highBits |= go2 << (49 - 32);
+            highBits |= bo1 << (48 - 32);
+            highBits |= bo2 << (43 - 32);
+            highBits |= bo3 << (39 - 32);
+            highBits |= rh1 << (34 - 32);
+            highBits |= 1 << (33 - 32);
+            highBits |= rh2 << (32 - 32);
+
+            lowBits |= gh << 25;
+            lowBits |= bh << 19;
+            lowBits |= rv << 13;
+            lowBits |= gv << 6;
+            lowBits |= bv << 0;
+
+            for (int i = 0; i < 4; i++)
+                outputBuffer[block * 8 + i] = (highBits >> (24 - i * 8)) & 0xff;
+            for (int i = 0; i < 4; i++)
+                outputBuffer[block * 8 + i + 4] = (lowBits >> (24 - i * 8)) & 0xff;
+        }
+    }
+}
+
+void cvtt::Internal::ETCComputer::CompressETC2Block(uint8_t *outputBuffer, const PixelBlockU8 *pixelBlocks, ETC2CompressionData *compressionData, const Options &options, bool punchthroughAlpha)
+{
+    ParallelMath::Int16CompFlag pixelIsTransparent[16];
+    ParallelMath::Int16CompFlag anyTransparent = ParallelMath::MakeBoolInt16(false);
+    ParallelMath::Int16CompFlag allTransparent = ParallelMath::MakeBoolInt16(true);
+
+    if (punchthroughAlpha)
+    {
+        const float fThreshold = std::max<float>(std::min<float>(1.0f, options.threshold), 0.0f) * 255.0f;
+
+        // +1.0f is intentional, we want to take the next valid integer (even if it's 256) since everything else lower is transparent
+        MUInt15 threshold = ParallelMath::MakeUInt15(static_cast<uint16_t>(std::floor(fThreshold + 1.0f)));
+
+        for (int px = 0; px < 16; px++)
+        {
+            MUInt15 alpha;
+            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                ParallelMath::PutUInt15(alpha, block, pixelBlocks[block].m_pixels[px][3]);
+
+            ParallelMath::Int16CompFlag isTransparent = ParallelMath::Less(alpha, threshold);
+            anyTransparent = (anyTransparent | isTransparent);
+            allTransparent = (allTransparent & isTransparent);
+            pixelIsTransparent[px] = isTransparent;
+        }
+    }
+    else
+    {
+        for (int px = 0; px < 16; px++)
+            pixelIsTransparent[px] = ParallelMath::MakeBoolInt16(false);
+
+        allTransparent = anyTransparent = ParallelMath::MakeBoolInt16(false);
+    }
+
+    MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
+
+    ETC2CompressionDataInternal* internalData = static_cast<ETC2CompressionDataInternal*>(compressionData);
+
+    MUInt15 pixels[16][3];
+    MFloat preWeightedPixels[16][3];
+    ExtractBlocks(pixels, preWeightedPixels, pixelBlocks, options);
+
+    if (ParallelMath::AnySet(anyTransparent))
+    {
+        for (int px = 0; px < 16; px++)
+        {
+            ParallelMath::Int16CompFlag flag = pixelIsTransparent[px];
+            ParallelMath::FloatCompFlag fflag = ParallelMath::Int16FlagToFloat(flag);
+
+            for (int ch = 0; ch < 3; ch++)
+            {
+                ParallelMath::ConditionalSet(pixels[px][ch], flag, ParallelMath::MakeUInt15(0));
+                ParallelMath::ConditionalSet(preWeightedPixels[px][ch], fflag, ParallelMath::MakeFloat(0.0f));
+            }
+        }
+    }
+
+    if (!ParallelMath::AllSet(allTransparent))
+        EncodePlanar(outputBuffer, bestError, pixels, preWeightedPixels, options);
+
+    MFloat chromaDelta[16][2];
+
+    MUInt15 numOpaque = ParallelMath::MakeUInt15(16);
+    for (int px = 0; px < 16; px++)
+        numOpaque = numOpaque - ParallelMath::SelectOrZero(pixelIsTransparent[px], ParallelMath::MakeUInt15(1));
+
+    if (options.flags & cvtt::Flags::Uniform)
+    {
+        MSInt16 chromaCoordinates3[16][2];
+        for (int px = 0; px < 16; px++)
+        {
+            chromaCoordinates3[px][0] = ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][0]) - ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][2]);
+            chromaCoordinates3[px][1] = ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][0]) - ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][1] << 1) + ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][2]);
+        }
+
+        MSInt16 chromaCoordinateCentroid[2] = { ParallelMath::MakeSInt16(0), ParallelMath::MakeSInt16(0) };
+        for (int px = 0; px < 16; px++)
+        {
+            for (int ch = 0; ch < 2; ch++)
+                chromaCoordinateCentroid[ch] = chromaCoordinateCentroid[ch] + chromaCoordinates3[px][ch];
+        }
+
+        if (punchthroughAlpha)
+        {
+            for (int px = 0; px < 16; px++)
+            {
+                for (int ch = 0; ch < 2; ch++)
+                {
+                    MUInt15 chromaCoordinateMultiplied = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(chromaCoordinates3[px][ch], numOpaque));
+                    MSInt16 delta = ParallelMath::LosslessCast<MSInt16>::Cast(chromaCoordinateMultiplied) - chromaCoordinateCentroid[ch];
+                    chromaDelta[px][ch] = ParallelMath::ToFloat(delta);
+                }
+            }
+        }
+        else
+        {
+            for (int px = 0; px < 16; px++)
+            {
+                for (int ch = 0; ch < 2; ch++)
+                    chromaDelta[px][ch] = ParallelMath::ToFloat((chromaCoordinates3[px][ch] << 4) - chromaCoordinateCentroid[ch]);
+            }
+        }
+
+        const MFloat rcpSqrt3 = ParallelMath::MakeFloat(0.57735026918962576450914878050196f);
+
+        for (int px = 0; px < 16; px++)
+            chromaDelta[px][1] = chromaDelta[px][1] * rcpSqrt3;
+    }
+    else
+    {
+        const float chromaAxis0[3] = { internalData->m_chromaSideAxis0[0], internalData->m_chromaSideAxis0[1], internalData->m_chromaSideAxis0[2] };
+        const float chromaAxis1[3] = { internalData->m_chromaSideAxis1[0], internalData->m_chromaSideAxis1[1], internalData->m_chromaSideAxis1[2] };
+
+        MFloat chromaCoordinates3[16][2];
+        for (int px = 0; px < 16; px++)
+        {
+            const MFloat &px0 = preWeightedPixels[px][0];
+            const MFloat &px1 = preWeightedPixels[px][1];
+            const MFloat &px2 = preWeightedPixels[px][2];
+
+            chromaCoordinates3[px][0] = px0 * chromaAxis0[0] + px1 * chromaAxis0[1] + px2 * chromaAxis0[2];
+            chromaCoordinates3[px][1] = px0 * chromaAxis1[0] + px1 * chromaAxis1[1] + px2 * chromaAxis1[2];
+        }
+
+        MFloat chromaCoordinateCentroid[2] = { ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero() };
+        for (int px = 0; px < 16; px++)
+        {
+            for (int ch = 0; ch < 2; ch++)
+                chromaCoordinateCentroid[ch] = chromaCoordinateCentroid[ch] + chromaCoordinates3[px][ch];
+        }
+
+        if (punchthroughAlpha)
+        {
+            const MFloat numOpaqueF = ParallelMath::ToFloat(numOpaque);
+            for (int px = 0; px < 16; px++)
+            {
+                for (int ch = 0; ch < 2; ch++)
+                {
+                    MFloat chromaCoordinateMultiplied = chromaCoordinates3[px][ch] * numOpaqueF;
+                    MFloat delta = chromaCoordinateMultiplied - chromaCoordinateCentroid[ch];
+                    chromaDelta[px][ch] = delta;
+                }
+            }
+        }
+        else
+        {
+            for (int px = 0; px < 16; px++)
+            {
+                for (int ch = 0; ch < 2; ch++)
+                    chromaDelta[px][ch] = chromaCoordinates3[px][ch] * 16.0f - chromaCoordinateCentroid[ch];
+            }
+        }
+    }
+
+
+    MFloat covXX = ParallelMath::MakeFloatZero();
+    MFloat covYY = ParallelMath::MakeFloatZero();
+    MFloat covXY = ParallelMath::MakeFloatZero();
+
+    for (int px = 0; px < 16; px++)
+    {
+        MFloat nx = chromaDelta[px][0];
+        MFloat ny = chromaDelta[px][1];
+
+        covXX = covXX + nx * nx;
+        covYY = covYY + ny * ny;
+        covXY = covXY + nx * ny;
+    }
+
+    MFloat halfTrace = (covXX + covYY) * 0.5f;
+    MFloat det = covXX * covYY - covXY * covXY;
+
+    MFloat mm = ParallelMath::Sqrt(ParallelMath::Max(ParallelMath::MakeFloatZero(), halfTrace * halfTrace - det));
+
+    MFloat ev = halfTrace + mm;
+
+    MFloat dx = (covYY - ev + covXY);
+    MFloat dy = -(covXX - ev + covXY);
+
+    // If evenly distributed, pick an arbitrary plane
+    ParallelMath::FloatCompFlag allZero = ParallelMath::Equal(dx, ParallelMath::MakeFloatZero()) & ParallelMath::Equal(dy, ParallelMath::MakeFloatZero());
+    ParallelMath::ConditionalSet(dx, allZero, ParallelMath::MakeFloat(1.f));
+
+    ParallelMath::Int16CompFlag sectorAssignments[16];
+    for (int px = 0; px < 16; px++)
+        sectorAssignments[px] = ParallelMath::FloatFlagToInt16(ParallelMath::Less(chromaDelta[px][0] * dx + chromaDelta[px][1] * dy, ParallelMath::MakeFloatZero()));
+
+    if (!ParallelMath::AllSet(allTransparent))
+    {
+        EncodeTMode(outputBuffer, bestError, sectorAssignments, pixels, preWeightedPixels, options);
+
+        // Flip sector assignments
+        for (int px = 0; px < 16; px++)
+            sectorAssignments[px] = ParallelMath::Not(sectorAssignments[px]);
+
+        EncodeTMode(outputBuffer, bestError, sectorAssignments, pixels, preWeightedPixels, options);
+
+        EncodeHMode(outputBuffer, bestError, sectorAssignments, pixels, internalData->m_h, preWeightedPixels, options);
+
+        CompressETC1BlockInternal(bestError, outputBuffer, pixels, preWeightedPixels, internalData->m_drs, options, true);
+    }
+
+    if (ParallelMath::AnySet(anyTransparent))
+    {
+        if (!ParallelMath::AllSet(allTransparent))
+        {
+            // Flip sector assignments
+            for (int px = 0; px < 16; px++)
+                sectorAssignments[px] = ParallelMath::Not(sectorAssignments[px]);
+        }
+
+        // Reset the error of any transparent blocks to max and retry with punchthrough modes
+        ParallelMath::ConditionalSet(bestError, ParallelMath::Int16FlagToFloat(anyTransparent), ParallelMath::MakeFloat(FLT_MAX));
+
+        EncodeVirtualTModePunchthrough(outputBuffer, bestError, sectorAssignments, pixels, preWeightedPixels, pixelIsTransparent, anyTransparent, allTransparent, options);
+
+        // Flip sector assignments
+        for (int px = 0; px < 16; px++)
+            sectorAssignments[px] = ParallelMath::Not(sectorAssignments[px]);
+
+        EncodeVirtualTModePunchthrough(outputBuffer, bestError, sectorAssignments, pixels, preWeightedPixels, pixelIsTransparent, anyTransparent, allTransparent, options);
+
+        CompressETC1PunchthroughBlockInternal(bestError, outputBuffer, pixels, preWeightedPixels, pixelIsTransparent, static_cast<ETC2CompressionDataInternal*>(compressionData)->m_drs, options);
+    }
+}
+
+void cvtt::Internal::ETCComputer::CompressETC2AlphaBlock(uint8_t *outputBuffer, const PixelBlockU8 *pixelBlocks, const Options &options)
+{
+    MUInt15 pixels[16];
+
+    for (int px = 0; px < 16; px++)
+    {
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            ParallelMath::PutUInt15(pixels[px], block, pixelBlocks[block].m_pixels[px][3]);
+    }
+
+    CompressETC2AlphaBlockInternal(outputBuffer, pixels, false, false, options);
+}
+
+void cvtt::Internal::ETCComputer::CompressETC2AlphaBlockInternal(uint8_t *outputBuffer, const MUInt15 pixels[16], bool is11Bit, bool isSigned, const Options &options)
+{
+    MUInt15 minAlpha = ParallelMath::MakeUInt15(is11Bit ? 2047 : 255);
+    MUInt15 maxAlpha = ParallelMath::MakeUInt15(0);
+
+    for (int px = 0; px < 16; px++)
+    {
+        minAlpha = ParallelMath::Min(minAlpha, pixels[px]);
+        maxAlpha = ParallelMath::Max(maxAlpha, pixels[px]);
+    }
+
+    MUInt15 alphaSpan = maxAlpha - minAlpha;
+    MUInt15 alphaSpanMidpointTimes2 = maxAlpha + minAlpha;
+
+    MUInt31 bestTotalError = ParallelMath::MakeUInt31(0x7fffffff);
+    MUInt15 bestTableIndex = ParallelMath::MakeUInt15(0);
+    MUInt15 bestBaseCodeword = ParallelMath::MakeUInt15(0);
+    MUInt15 bestMultiplier = ParallelMath::MakeUInt15(0);
+    MUInt15 bestIndexes[16];
+
+    for (int px = 0; px < 16; px++)
+        bestIndexes[px] = ParallelMath::MakeUInt15(0);
+
+    const int numAlphaRanges = 10;
+    for (uint16_t tableIndex = 0; tableIndex < 16; tableIndex++)
+    {
+        for (int r = 0; r < numAlphaRanges; r++)
+        {
+            int subrange = r % 3;
+            int mainRange = r / 3;
+
+            int16_t maxOffset = Tables::ETC2::g_alphaModifierTablePositive[tableIndex][3 - mainRange - (subrange & 1)];
+            int16_t minOffset = -Tables::ETC2::g_alphaModifierTablePositive[tableIndex][3 - mainRange - ((subrange >> 1) & 1)] - 1;
+            uint16_t offsetSpan = static_cast<uint16_t>(maxOffset - minOffset);
+
+            MSInt16 vminOffset = ParallelMath::MakeSInt16(minOffset);
+            MUInt15 vmaxOffset = ParallelMath::MakeUInt15(maxOffset);
+            MUInt15 voffsetSpan = ParallelMath::MakeUInt15(offsetSpan);
+
+            MUInt15 minMultiplier = ParallelMath::MakeUInt15(0);
+            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            {
+                uint16_t singleAlphaSpan = ParallelMath::Extract(alphaSpan, block);
+
+                uint16_t lowMultiplier = singleAlphaSpan / offsetSpan;
+                ParallelMath::PutUInt15(minMultiplier, block, lowMultiplier);
+            }
+
+            if (is11Bit)
+            {
+                // Clamps this to valid multipliers under 15 and rounds down to nearest multiple of 8
+                minMultiplier = ParallelMath::Min(minMultiplier, ParallelMath::MakeUInt15(112)) & ParallelMath::MakeUInt15(120);
+            }
+            else
+            {
+                // We cap at 1 and 14 so both multipliers are valid and dividable
+                // Cases where offset span is 0 should be caught by multiplier 1 of table 13
+                minMultiplier = ParallelMath::Max(ParallelMath::Min(minMultiplier, ParallelMath::MakeUInt15(14)), ParallelMath::MakeUInt15(1));
+            }
+
+            for (uint16_t multiplierOffset = 0; multiplierOffset < 2; multiplierOffset++)
+            {
+                MUInt15 multiplier = minMultiplier;
+
+                if (is11Bit)
+                {
+                    if (multiplierOffset == 1)
+                        multiplier = multiplier + ParallelMath::MakeUInt15(8);
+                    else
+                        multiplier = ParallelMath::Max(multiplier, ParallelMath::MakeUInt15(1));
+                }
+                else
+                {
+                    if (multiplierOffset == 1)
+                        multiplier = multiplier + ParallelMath::MakeUInt15(1);
+                }
+
+                MSInt16 multipliedMinOffset = ParallelMath::CompactMultiply(ParallelMath::LosslessCast<MSInt16>::Cast(multiplier), vminOffset);
+                MUInt15 multipliedMaxOffset = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(multiplier, vmaxOffset));
+
+                // codeword = (maxOffset + minOffset + minAlpha + maxAlpha) / 2
+                MSInt16 unclampedBaseAlphaTimes2 = ParallelMath::LosslessCast<MSInt16>::Cast(alphaSpanMidpointTimes2) - ParallelMath::LosslessCast<MSInt16>::Cast(multipliedMaxOffset) - multipliedMinOffset;
+
+                MUInt15 baseAlpha;
+                if (is11Bit)
+                {
+                    // In unsigned, 4 is added to the unquantized alpha, so compensating for that cancels the 4 we have to add to do rounding.
+                    if (isSigned)
+                        unclampedBaseAlphaTimes2 = unclampedBaseAlphaTimes2 + ParallelMath::MakeSInt16(8);
+
+                    // -128 is illegal for some reason
+                    MSInt16 minBaseAlphaTimes2 = isSigned ? ParallelMath::MakeSInt16(16) : ParallelMath::MakeSInt16(0);
+
+                    MUInt15 clampedBaseAlphaTimes2 = ParallelMath::Min(ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(unclampedBaseAlphaTimes2, minBaseAlphaTimes2)), ParallelMath::MakeUInt15(4095));
+                    baseAlpha = ParallelMath::RightShift(clampedBaseAlphaTimes2, 1) & ParallelMath::MakeUInt15(2040);
+
+                    if (!isSigned)
+                        baseAlpha = baseAlpha + ParallelMath::MakeUInt15(4);
+                }
+                else
+                {
+                    MUInt15 clampedBaseAlphaTimes2 = ParallelMath::Min(ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(unclampedBaseAlphaTimes2, ParallelMath::MakeSInt16(0))), ParallelMath::MakeUInt15(510));
+                    baseAlpha = ParallelMath::RightShift(clampedBaseAlphaTimes2 + ParallelMath::MakeUInt15(1), 1);
+                }
+
+                MUInt15 indexes[16];
+                MUInt31 totalError = ParallelMath::MakeUInt31(0);
+                for (int px = 0; px < 16; px++)
+                {
+                    MUInt15 quantizedValues;
+                    QuantizeETC2Alpha(tableIndex, pixels[px], baseAlpha, multiplier, is11Bit, isSigned, indexes[px], quantizedValues);
+
+                    if (is11Bit)
+                    {
+                        MSInt16 delta = ParallelMath::LosslessCast<MSInt16>::Cast(quantizedValues) - ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px]);
+                        MSInt32 deltaSq = ParallelMath::XMultiply(delta, delta);
+                        totalError = totalError + ParallelMath::LosslessCast<MUInt31>::Cast(deltaSq);
+                    }
+                    else
+                        totalError = totalError + ParallelMath::ToUInt31(ParallelMath::SqDiffUInt8(quantizedValues, pixels[px]));
+                }
+
+                ParallelMath::Int16CompFlag isBetter = ParallelMath::Int32FlagToInt16(ParallelMath::Less(totalError, bestTotalError));
+                if (ParallelMath::AnySet(isBetter))
+                {
+                    ParallelMath::ConditionalSet(bestTotalError, isBetter, totalError);
+                    ParallelMath::ConditionalSet(bestTableIndex, isBetter, ParallelMath::MakeUInt15(tableIndex));
+                    ParallelMath::ConditionalSet(bestBaseCodeword, isBetter, baseAlpha);
+                    ParallelMath::ConditionalSet(bestMultiplier, isBetter, multiplier);
+
+                    for (int px = 0; px < 16; px++)
+                        ParallelMath::ConditionalSet(bestIndexes[px], isBetter, indexes[px]);
+                }
+
+                // TODO: Do one refine pass
+            }
+        }
+    }
+
+    if (is11Bit)
+    {
+        bestMultiplier = ParallelMath::RightShift(bestMultiplier, 3);
+
+        if (isSigned)
+            bestBaseCodeword = bestBaseCodeword ^ ParallelMath::MakeUInt15(0x80);
+    }
+
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        uint8_t *output = outputBuffer + block * 8;
+
+        output[0] = static_cast<uint8_t>(ParallelMath::Extract(bestBaseCodeword, block));
+
+        ParallelMath::ScalarUInt16 multiplier = ParallelMath::Extract(bestMultiplier, block);
+        ParallelMath::ScalarUInt16 tableIndex = ParallelMath::Extract(bestTableIndex, block);
+
+        output[1] = static_cast<uint8_t>((multiplier << 4) | tableIndex);
+
+        static const int pixelSelectorOrder[16] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+        ParallelMath::ScalarUInt16 indexes[16];
+        for (int px = 0; px < 16; px++)
+            indexes[pixelSelectorOrder[px]] = ParallelMath::Extract(bestIndexes[px], block);
+
+        int outputOffset = 2;
+        int outputBits = 0;
+        int numOutputBits = 0;
+        for (int s = 0; s < 16; s++)
+        {
+            outputBits = (outputBits << 3) | indexes[s];
+            numOutputBits += 3;
+
+            if (numOutputBits >= 8)
+            {
+                output[outputOffset++] = static_cast<uint8_t>(outputBits >> (numOutputBits - 8));
+                numOutputBits -= 8;
+
+                outputBits &= ((1 << numOutputBits) - 1);
+            }
+        }
+
+        assert(outputOffset == 8 && numOutputBits == 0);
+    }
+}
+
+void cvtt::Internal::ETCComputer::CompressEACBlock(uint8_t *outputBuffer, const PixelBlockScalarS16 *inputBlocks, bool isSigned, const Options &options)
+{
+    MUInt15 pixels[16];
+    for (int px = 0; px < 16; px++)
+    {
+        MSInt16 adjustedPixel;
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+            ParallelMath::PutSInt16(adjustedPixel, block, inputBlocks[block].m_pixels[px]);
+
+        // We use a slightly shifted range here so we can keep the unquantized base color in a UInt15
+        // That is, signed range is 1..2047, and unsigned range is 0..2047
+        if (isSigned)
+        {
+            adjustedPixel = ParallelMath::Min(adjustedPixel, ParallelMath::MakeSInt16(1023)) + ParallelMath::MakeSInt16(1024);
+            adjustedPixel = ParallelMath::Max(ParallelMath::MakeSInt16(1), adjustedPixel);
+        }
+        else
+        {
+            adjustedPixel = ParallelMath::Min(adjustedPixel, ParallelMath::MakeSInt16(2047));
+            adjustedPixel = ParallelMath::Max(ParallelMath::MakeSInt16(0), adjustedPixel);
+        }
+
+
+        pixels[px] = ParallelMath::LosslessCast<MUInt15>::Cast(adjustedPixel);
+    }
+
+    CompressETC2AlphaBlockInternal(outputBuffer, pixels, true, isSigned, options);
+}
+
+void cvtt::Internal::ETCComputer::CompressETC1Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC1CompressionData *compressionData, const Options &options)
+{
+    DifferentialResolveStorage &drs = static_cast<ETC1CompressionDataInternal*>(compressionData)->m_drs;
+    MFloat bestTotalError = ParallelMath::MakeFloat(FLT_MAX);
+
+    MUInt15 pixels[16][3];
+    MFloat preWeightedPixels[16][3];
+    ExtractBlocks(pixels, preWeightedPixels, inputBlocks, options);
+
+    CompressETC1BlockInternal(bestTotalError, outputBuffer, pixels, preWeightedPixels, drs, options, false);
+}
+
+void cvtt::Internal::ETCComputer::ExtractBlocks(MUInt15 pixels[16][3], MFloat preWeightedPixels[16][3], const PixelBlockU8 *inputBlocks, const Options &options)
+{
+    bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0);
+    bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0);
+
+    for (int px = 0; px < 16; px++)
+    {
+        for (int ch = 0; ch < 3; ch++)
+        {
+            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                ParallelMath::PutUInt15(pixels[px][ch], block, inputBlocks[block].m_pixels[px][ch]);
+        }
+
+        if (isFakeBT709)
+            ConvertToFakeBT709(preWeightedPixels[px], pixels[px]);
+        else if (isUniform)
+        {
+            for (int ch = 0; ch < 3; ch++)
+                preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]);
+        }
+        else
+        {
+            preWeightedPixels[px][0] = ParallelMath::ToFloat(pixels[px][0]) * options.redWeight;
+            preWeightedPixels[px][1] = ParallelMath::ToFloat(pixels[px][1]) * options.greenWeight;
+            preWeightedPixels[px][2] = ParallelMath::ToFloat(pixels[px][2]) * options.blueWeight;
+        }
+    }
+}
+
+void cvtt::Internal::ETCComputer::ResolveHalfBlockFakeBT709RoundingAccurate(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential)
+{
+    for (int ch = 0; ch < 3; ch++)
+    {
+        const MUInt15& cu15 = sectorCumulative[ch];
+
+        if (isDifferential)
+        {
+            //quantized[ch] = (cu * 31 + (cu >> 3)) >> 11;
+            quantized[ch] = ParallelMath::ToUInt15(
+                ParallelMath::RightShift(
+                (ParallelMath::LosslessCast<MUInt16>::Cast(cu15) << 5) - ParallelMath::LosslessCast<MUInt16>::Cast(cu15) + ParallelMath::LosslessCast<MUInt16>::Cast(ParallelMath::RightShift(cu15, 3))
+                    , 11)
+            );
+        }
+        else
+        {
+            //quantized[ch] = (cu * 30 + (cu >> 3)) >> 12;
+            quantized[ch] = ParallelMath::ToUInt15(
+                ParallelMath::RightShift(
+                (ParallelMath::LosslessCast<MUInt16>::Cast(cu15) << 5) - ParallelMath::LosslessCast<MUInt16>::Cast(cu15 << 1) + ParallelMath::LosslessCast<MUInt16>::Cast(ParallelMath::RightShift(cu15, 3))
+                    , 12)
+            );
+        }
+    }
+
+    MFloat lowOctantRGBFloat[3];
+    MFloat highOctantRGBFloat[3];
+
+    for (int ch = 0; ch < 3; ch++)
+    {
+        MUInt15 unquantized;
+        MUInt15 unquantizedNext;
+        if (isDifferential)
+        {
+            unquantized = (quantized[ch] << 3) | ParallelMath::RightShift(quantized[ch], 2);
+            MUInt15 quantizedNext = ParallelMath::Min(ParallelMath::MakeUInt15(31), quantized[ch] + ParallelMath::MakeUInt15(1));
+            unquantizedNext = (quantizedNext << 3) | ParallelMath::RightShift(quantizedNext, 2);
+        }
+        else
+        {
+            unquantized = (quantized[ch] << 4) | quantized[ch];
+            unquantizedNext = ParallelMath::Min(ParallelMath::MakeUInt15(255), unquantized + ParallelMath::MakeUInt15(17));
+        }
+        lowOctantRGBFloat[ch] = ParallelMath::ToFloat(unquantized << 3);
+        highOctantRGBFloat[ch] = ParallelMath::ToFloat(unquantizedNext << 3);
+    }
+
+    MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
+    MUInt15 bestOctant = ParallelMath::MakeUInt15(0);
+
+    MFloat cumulativeYUV[3];
+    ConvertToFakeBT709(cumulativeYUV, sectorCumulative);
+
+    for (uint16_t octant = 0; octant < 8; octant++)
+    {
+        const MFloat &r = (octant & 1) ? highOctantRGBFloat[0] : lowOctantRGBFloat[0];
+        const MFloat &g = (octant & 2) ? highOctantRGBFloat[1] : lowOctantRGBFloat[1];
+        const MFloat &b = (octant & 4) ? highOctantRGBFloat[2] : lowOctantRGBFloat[2];
+
+        MFloat octantYUV[3];
+        ConvertToFakeBT709(octantYUV, r, g, b);
+
+        MFloat delta[3];
+        for (int ch = 0; ch < 3; ch++)
+            delta[ch] = octantYUV[ch] - cumulativeYUV[ch];
+
+        MFloat error = delta[0] * delta[0] + delta[1] + delta[1] + delta[2] * delta[2];
+        ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, bestError));
+        ParallelMath::ConditionalSet(bestOctant, errorBetter, ParallelMath::MakeUInt15(octant));
+        bestError = ParallelMath::Min(error, bestError);
+    }
+
+    for (int ch = 0; ch < 3; ch++)
+        quantized[ch] = quantized[ch] + (ParallelMath::RightShift(bestOctant, ch) & ParallelMath::MakeUInt15(1));
+}
+
+void cvtt::Internal::ETCComputer::ResolveHalfBlockFakeBT709RoundingFast(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential)
+{
+    // sectorCumulative range is 0..2040 (11 bits)
+    MUInt15 roundingOffset = ParallelMath::MakeUInt15(0);
+
+    MUInt15 rOffset;
+    MUInt15 gOffset;
+    MUInt15 bOffset;
+    MUInt15 quantizedBase[3];
+    MUInt15 upperBound;
+
+    MUInt15 sectorCumulativeFillIn[3];
+    for (int ch = 0; ch < 3; ch++)
+        sectorCumulativeFillIn[ch] = sectorCumulative[ch] + ParallelMath::RightShift(sectorCumulative[ch], 8);
+
+    if (isDifferential)
+    {
+        rOffset = (sectorCumulativeFillIn[0] << 6) & ParallelMath::MakeUInt15(0xf00);
+        gOffset = (sectorCumulativeFillIn[1] << 4) & ParallelMath::MakeUInt15(0x0f0);
+        bOffset = ParallelMath::RightShift(sectorCumulativeFillIn[2], 2) & ParallelMath::MakeUInt15(0x00f);
+
+        for (int ch = 0; ch < 3; ch++)
+            quantizedBase[ch] = ParallelMath::RightShift(sectorCumulativeFillIn[ch], 6);
+
+        upperBound = ParallelMath::MakeUInt15(31);
+    }
+    else
+    {
+        rOffset = (sectorCumulativeFillIn[0] << 5) & ParallelMath::MakeUInt15(0xf00);
+        gOffset = (sectorCumulativeFillIn[1] << 1) & ParallelMath::MakeUInt15(0x0f0);
+        bOffset = ParallelMath::RightShift(sectorCumulativeFillIn[2], 3) & ParallelMath::MakeUInt15(0x00f);
+
+        for (int ch = 0; ch < 3; ch++)
+            quantizedBase[ch] = ParallelMath::RightShift(sectorCumulativeFillIn[ch], 7);
+
+        upperBound = ParallelMath::MakeUInt15(15);
+    }
+
+    MUInt15 lookupIndex = (rOffset | gOffset | bOffset);
+
+    MUInt15 octant;
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        ParallelMath::PutUInt15(octant, block, Tables::FakeBT709::g_rounding16[ParallelMath::Extract(lookupIndex, block)]);
+
+    quantizedBase[0] = quantizedBase[0] + (octant & ParallelMath::MakeUInt15(1));
+    quantizedBase[1] = quantizedBase[1] + (ParallelMath::RightShift(octant, 1) & ParallelMath::MakeUInt15(1));
+    quantizedBase[2] = quantizedBase[2] + (ParallelMath::RightShift(octant, 2) & ParallelMath::MakeUInt15(1));
+
+    for (int ch = 0; ch < 3; ch++)
+        quantized[ch] = ParallelMath::Min(quantizedBase[ch], upperBound);
+}
+
+void cvtt::Internal::ETCComputer::ResolveTHFakeBT709Rounding(MUInt15 quantized[3], const MUInt15 targets[3], const MUInt15 &granularity)
+{
+    MFloat lowOctantRGBFloat[3];
+    MFloat highOctantRGBFloat[3];
+
+    for (int ch = 0; ch < 3; ch++)
+    {
+        MUInt15 unquantized = (quantized[ch] << 4) | quantized[ch];
+        MUInt15 unquantizedNext = ParallelMath::Min(ParallelMath::MakeUInt15(255), unquantized + ParallelMath::MakeUInt15(17));
+
+        lowOctantRGBFloat[ch] = ParallelMath::ToFloat(ParallelMath::CompactMultiply(unquantized, granularity) << 1);
+        highOctantRGBFloat[ch] = ParallelMath::ToFloat(ParallelMath::CompactMultiply(unquantizedNext, granularity) << 1);
+    }
+
+    MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
+    MUInt15 bestOctant = ParallelMath::MakeUInt15(0);
+
+    MFloat cumulativeYUV[3];
+    ConvertToFakeBT709(cumulativeYUV, ParallelMath::ToFloat(targets[0]), ParallelMath::ToFloat(targets[1]), ParallelMath::ToFloat(targets[2]));
+
+    for (uint16_t octant = 0; octant < 8; octant++)
+    {
+        const MFloat &r = (octant & 1) ? highOctantRGBFloat[0] : lowOctantRGBFloat[0];
+        const MFloat &g = (octant & 2) ? highOctantRGBFloat[1] : lowOctantRGBFloat[1];
+        const MFloat &b = (octant & 4) ? highOctantRGBFloat[2] : lowOctantRGBFloat[2];
+
+        MFloat octantYUV[3];
+        ConvertToFakeBT709(octantYUV, r, g, b);
+
+        MFloat delta[3];
+        for (int ch = 0; ch < 3; ch++)
+            delta[ch] = octantYUV[ch] - cumulativeYUV[ch];
+
+        MFloat error = delta[0] * delta[0] + delta[1] + delta[1] + delta[2] * delta[2];
+        ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, bestError));
+        ParallelMath::ConditionalSet(bestOctant, errorBetter, ParallelMath::MakeUInt15(octant));
+        bestError = ParallelMath::Min(error, bestError);
+    }
+
+    for (int ch = 0; ch < 3; ch++)
+        quantized[ch] = quantized[ch] + (ParallelMath::RightShift(bestOctant, ch) & ParallelMath::MakeUInt15(1));
+}
+
+void cvtt::Internal::ETCComputer::ConvertToFakeBT709(MFloat yuv[3], const MUInt15 color[3])
+{
+    MFloat floatRGB[3];
+    for (int ch = 0; ch < 3; ch++)
+        floatRGB[ch] = ParallelMath::ToFloat(color[ch]);
+
+    ConvertToFakeBT709(yuv, floatRGB);
+}
+
+void cvtt::Internal::ETCComputer::ConvertToFakeBT709(MFloat yuv[3], const MFloat color[3])
+{
+    ConvertToFakeBT709(yuv, color[0], color[1], color[2]);
+}
+
+void cvtt::Internal::ETCComputer::ConvertToFakeBT709(MFloat yuv[3], const MFloat &pr, const MFloat &pg, const MFloat &pb)
+{
+    MFloat r = pr;
+    MFloat g = pg;
+    MFloat b = pb;
+
+    yuv[0] = r * 0.368233989135369f + g * 1.23876274963149f + b * 0.125054068802017f;
+    yuv[1] = r * 0.5f - g * 0.4541529f - b * 0.04584709f;
+    yuv[2] = r * -0.081014709086133f - g * 0.272538676238785f + b * 0.353553390593274f;
+}
+
+void cvtt::Internal::ETCComputer::ConvertFromFakeBT709(MFloat rgb[3], const MFloat yuv[3])
+{
+    MFloat yy = yuv[0] * 0.57735026466774571071f;
+    MFloat u = yuv[1];
+    MFloat v = yuv[2];
+
+    rgb[0] = yy + u * 1.5748000207960953486f;
+    rgb[1] = yy - u * 0.46812425854364753669f - v * 0.26491652528157560861f;
+    rgb[2] = yy + v * 2.6242146882856944069f;
+}
+
+
+void cvtt::Internal::ETCComputer::QuantizeETC2Alpha(int tableIndex, const MUInt15& value, const MUInt15& baseValue, const MUInt15& multiplier, bool is11Bit, bool isSigned, MUInt15& outIndexes, MUInt15& outQuantizedValues)
+{
+    MSInt16 offset = ParallelMath::LosslessCast<MSInt16>::Cast(value) - ParallelMath::LosslessCast<MSInt16>::Cast(baseValue);
+    MSInt16 offsetTimes2 = offset + offset;
+
+    // ETC2's offset tables all have a reflect about 0.5*multiplier
+    MSInt16 offsetAboutReflectorTimes2 = offsetTimes2 + ParallelMath::LosslessCast<MSInt16>::Cast(multiplier);
+
+    MUInt15 absOffsetAboutReflectorTimes2 = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Abs(offsetAboutReflectorTimes2));
+    MUInt15 lookupIndex = ParallelMath::RightShift(absOffsetAboutReflectorTimes2, 1);
+
+    MUInt15 positiveIndex;
+    MUInt15 positiveOffsetUnmultiplied;
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        uint16_t blockLookupIndex = ParallelMath::Extract(lookupIndex, block) / ParallelMath::Extract(multiplier, block);
+        if (blockLookupIndex >= Tables::ETC2::g_alphaRoundingTableWidth)
+            blockLookupIndex = Tables::ETC2::g_alphaRoundingTableWidth - 1;
+        uint16_t index = Tables::ETC2::g_alphaRoundingTables[tableIndex][blockLookupIndex];
+        ParallelMath::PutUInt15(positiveIndex, block, index);
+        ParallelMath::PutUInt15(positiveOffsetUnmultiplied, block, Tables::ETC2::g_alphaModifierTablePositive[tableIndex][index]);
+
+        // TODO: This is suboptimal when the offset is capped.  We should detect 0 and 255 values and always map them to the maximum offsets.
+        // Doing that will also affect refinement though.
+    }
+
+    MSInt16 signBits = ParallelMath::RightShift(offsetAboutReflectorTimes2, 15);
+    MSInt16 offsetUnmultiplied = ParallelMath::LosslessCast<MSInt16>::Cast(positiveOffsetUnmultiplied) ^ signBits;
+    MSInt16 quantizedOffset = ParallelMath::CompactMultiply(offsetUnmultiplied, multiplier);
+
+    MSInt16 offsetValue = ParallelMath::LosslessCast<MSInt16>::Cast(baseValue) + quantizedOffset;
+
+    if (is11Bit)
+    {
+        if (isSigned)
+            outQuantizedValues = ParallelMath::Min(ParallelMath::MakeUInt15(2047), ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(1), offsetValue)));
+        else
+            outQuantizedValues = ParallelMath::Min(ParallelMath::MakeUInt15(2047), ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), offsetValue)));
+    }
+    else
+        outQuantizedValues = ParallelMath::Min(ParallelMath::MakeUInt15(255), ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), offsetValue)));
+
+    MUInt15 indexSub = ParallelMath::LosslessCast<MUInt15>::Cast(signBits) & ParallelMath::MakeUInt15(4);
+
+    outIndexes = positiveIndex + ParallelMath::MakeUInt15(4) - indexSub;
+}
+
+
+void cvtt::Internal::ETCComputer::EmitTModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 lineColor[3], const ParallelMath::ScalarUInt16 isolatedColor[3], int32_t packedSelectors, ParallelMath::ScalarUInt16 table, bool opaque)
+{
+    static const int selectorOrder[] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+    uint32_t lowBits = 0;
+    uint32_t highBits = 0;
+
+    int rh = ((isolatedColor[0] >> 2) & 3);
+    int rl = (isolatedColor[0] & 3);
+
+    if (rh + rl < 4)
+    {
+        // Overflow low
+        highBits |= 1 << (58 - 32);
+    }
+    else
+    {
+        // Overflow high
+        highBits |= 7 << (61 - 32);
+    }
+
+    highBits |= rh << (59 - 32);
+    highBits |= rl << (56 - 32);
+    highBits |= isolatedColor[1] << (52 - 32);
+    highBits |= isolatedColor[2] << (48 - 32);
+    highBits |= lineColor[0] << (44 - 32);
+    highBits |= lineColor[1] << (40 - 32);
+    highBits |= lineColor[2] << (36 - 32);
+    highBits |= ((table >> 1) & 3) << (34 - 32);
+    if (opaque)
+        highBits |= 1 << (33 - 32);
+    highBits |= (table & 1) << (32 - 32);
+
+    for (int px = 0; px < 16; px++)
+    {
+        int sel = (packedSelectors >> (2 * selectorOrder[px])) & 3;
+        if ((sel & 0x1) != 0)
+            lowBits |= (1 << px);
+        if ((sel & 0x2) != 0)
+            lowBits |= (1 << (16 + px));
+    }
+
+    for (int i = 0; i < 4; i++)
+        outputBuffer[i] = (highBits >> (24 - i * 8)) & 0xff;
+    for (int i = 0; i < 4; i++)
+        outputBuffer[i + 4] = (lowBits >> (24 - i * 8)) & 0xff;
+}
+
+void cvtt::Internal::ETCComputer::EmitHModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 blockColors[2], ParallelMath::ScalarUInt16 sectorBits, ParallelMath::ScalarUInt16 signBits, ParallelMath::ScalarUInt16 table, bool opaque)
+{
+    if (blockColors[0] == blockColors[1])
+    {
+        // Base colors are the same.
+        // If the table low bit isn't 1, then we can't encode this, because swapping the block colors will have no effect
+        // on their order.
+        // Instead, we encode this as T mode where all of the indexes are on the line.
+
+        ParallelMath::ScalarUInt16 lineColor[3];
+        ParallelMath::ScalarUInt16 isolatedColor[3];
+
+        lineColor[0] = isolatedColor[0] = (blockColors[0] >> 10) & 0x1f;
+        lineColor[1] = isolatedColor[1] = (blockColors[0] >> 5) & 0x1f;
+        lineColor[2] = isolatedColor[2] = (blockColors[0] >> 0) & 0x1f;
+
+        int32_t packedSelectors = 0x55555555;
+        for (int px = 0; px < 16; px++)
+            packedSelectors |= ((signBits >> px) & 1) << ((px * 2) + 1);
+
+        EmitTModeBlock(outputBuffer, lineColor, isolatedColor, packedSelectors, table, opaque);
+        return;
+    }
+
+    static const int selectorOrder[] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+    int16_t colors[2][3];
+    for (int sector = 0; sector < 2; sector++)
+    {
+        for (int ch = 0; ch < 3; ch++)
+            colors[sector][ch] = (blockColors[sector] >> ((2 - ch) * 5)) & 15;
+    }
+
+    uint32_t lowBits = 0;
+    uint32_t highBits = 0;
+
+    if (((table & 1) == 1) != (blockColors[0] > blockColors[1]))
+    {
+        for (int ch = 0; ch < 3; ch++)
+            std::swap(colors[0][ch], colors[1][ch]);
+        sectorBits ^= 0xffff;
+    }
+
+    int r1 = colors[0][0];
+    int g1a = colors[0][1] >> 1;
+    int g1b = (colors[0][1] & 1);
+    int b1a = colors[0][2] >> 3;
+    int b1b = colors[0][2] & 7;
+    int r2 = colors[1][0];
+    int g2 = colors[1][1];
+    int b2 = colors[1][2];
+
+    // Avoid overflowing R
+    if ((g1a & 4) != 0 && r1 + g1a < 8)
+        highBits |= 1 << (63 - 32);
+
+    int fakeDG = b1b >> 1;
+    int fakeG = b1a | (g1b << 1);
+
+    if (fakeG + fakeDG < 4)
+    {
+        // Overflow low
+        highBits |= 1 << (50 - 32);
+    }
+    else
+    {
+        // Overflow high
+        highBits |= 7 << (53 - 32);
+    }
+
+    int da = (table >> 2) & 1;
+    int db = (table >> 1) & 1;
+
+    highBits |= r1 << (59 - 32);
+    highBits |= g1a << (56 - 32);
+    highBits |= g1b << (52 - 32);
+    highBits |= b1a << (51 - 32);
+    highBits |= b1b << (47 - 32);
+    highBits |= r2 << (43 - 32);
+    highBits |= g2 << (39 - 32);
+    highBits |= b2 << (35 - 32);
+    highBits |= da << (34 - 32);
+    if (opaque)
+        highBits |= 1 << (33 - 32);
+    highBits |= db << (32 - 32);
+
+    for (int px = 0; px < 16; px++)
+    {
+        int sectorBit = (sectorBits >> selectorOrder[px]) & 1;
+        int signBit = (signBits >> selectorOrder[px]) & 1;
+
+        lowBits |= (signBit << px);
+        lowBits |= (sectorBit << (16 + px));
+    }
+
+    uint8_t *output = outputBuffer;
+
+    for (int i = 0; i < 4; i++)
+        output[i] = (highBits >> (24 - i * 8)) & 0xff;
+    for (int i = 0; i < 4; i++)
+        output[i + 4] = (lowBits >> (24 - i * 8)) & 0xff;
+}
+
+void cvtt::Internal::ETCComputer::EmitETC1Block(uint8_t *outputBuffer, int blockBestFlip, int blockBestD, const int blockBestColors[2][3], const int blockBestTables[2], const ParallelMath::ScalarUInt16 blockBestSelectors[2], bool transparent)
+{
+    uint32_t highBits = 0;
+    uint32_t lowBits = 0;
+
+    if (blockBestD == 0)
+    {
+        highBits |= blockBestColors[0][0] << 28;
+        highBits |= blockBestColors[1][0] << 24;
+        highBits |= blockBestColors[0][1] << 20;
+        highBits |= blockBestColors[1][1] << 16;
+        highBits |= blockBestColors[0][2] << 12;
+        highBits |= blockBestColors[1][2] << 8;
+    }
+    else
+    {
+        highBits |= blockBestColors[0][0] << 27;
+        highBits |= ((blockBestColors[1][0] - blockBestColors[0][0]) & 7) << 24;
+        highBits |= blockBestColors[0][1] << 19;
+        highBits |= ((blockBestColors[1][1] - blockBestColors[0][1]) & 7) << 16;
+        highBits |= blockBestColors[0][2] << 11;
+        highBits |= ((blockBestColors[1][2] - blockBestColors[0][2]) & 7) << 8;
+    }
+
+    highBits |= (blockBestTables[0] << 5);
+    highBits |= (blockBestTables[1] << 2);
+    if (!transparent)
+        highBits |= (blockBestD << 1);
+    highBits |= blockBestFlip;
+
+    const uint8_t modifierCodes[4] = { 3, 2, 0, 1 };
+
+    uint8_t unpackedSelectors[16];
+    uint8_t unpackedSelectorCodes[16];
+    for (int sector = 0; sector < 2; sector++)
+    {
+        int blockSectorBestSelectors = blockBestSelectors[sector];
+
+        for (int px = 0; px < 8; px++)
+        {
+            int selector = (blockSectorBestSelectors >> (2 * px)) & 3;
+            unpackedSelectorCodes[g_flipTables[blockBestFlip][sector][px]] = modifierCodes[selector];
+            unpackedSelectors[g_flipTables[blockBestFlip][sector][px]] = selector;
+        }
+    }
+
+    const int pixelSelectorOrder[16] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+
+    int lowBitOffset = 0;
+    for (int sb = 0; sb < 2; sb++)
+        for (int px = 0; px < 16; px++)
+            lowBits |= ((unpackedSelectorCodes[pixelSelectorOrder[px]] >> sb) & 1) << (px + sb * 16);
+
+    for (int i = 0; i < 4; i++)
+        outputBuffer[i] = (highBits >> (24 - i * 8)) & 0xff;
+    for (int i = 0; i < 4; i++)
+        outputBuffer[i + 4] = (lowBits >> (24 - i * 8)) & 0xff;
+}
+
+void cvtt::Internal::ETCComputer::CompressETC1BlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], DifferentialResolveStorage &drs, const Options &options, bool punchthrough)
+{
+	int numTries = 0;
+
+    MUInt15 zeroU15 = ParallelMath::MakeUInt15(0);
+    MUInt16 zeroU16 = ParallelMath::MakeUInt16(0);
+
+    MUInt15 bestColors[2] = { zeroU15, zeroU15 };
+    MUInt16 bestSelectors[2] = { zeroU16, zeroU16 };
+    MUInt15 bestTables[2] = { zeroU15, zeroU15 };
+    MUInt15 bestFlip = zeroU15;
+    MUInt15 bestD = zeroU15;
+
+    MUInt15 sectorPixels[2][2][8][3];
+    MFloat sectorPreWeightedPixels[2][2][8][3];
+    MUInt15 sectorCumulative[2][2][3];
+
+    ParallelMath::Int16CompFlag bestIsThisMode = ParallelMath::MakeBoolInt16(false);
+
+    for (int flip = 0; flip < 2; flip++)
+	{
+		for (int sector = 0; sector < 2; sector++)
+		{
+			for (int ch = 0; ch < 3; ch++)
+				sectorCumulative[flip][sector][ch] = zeroU15;
+
+			for (int px = 0; px < 8; px++)
+			{
+				for (int ch = 0; ch < 3; ch++)
+				{
+					MUInt15 pixelChannelValue = pixels[g_flipTables[flip][sector][px]][ch];
+					sectorPixels[flip][sector][px][ch] = pixelChannelValue;
+                    sectorPreWeightedPixels[flip][sector][px][ch] = preWeightedPixels[g_flipTables[flip][sector][px]][ch];
+					sectorCumulative[flip][sector][ch] = sectorCumulative[flip][sector][ch] + pixelChannelValue;
+				}
+			}
+		}
+	}
+
+	static const MSInt16 modifierTables[8][4] =
+	{
+		{ ParallelMath::MakeSInt16(-8), ParallelMath::MakeSInt16(-2), ParallelMath::MakeSInt16(2), ParallelMath::MakeSInt16(8) },
+		{ ParallelMath::MakeSInt16(-17), ParallelMath::MakeSInt16(-5), ParallelMath::MakeSInt16(5), ParallelMath::MakeSInt16(17) },
+		{ ParallelMath::MakeSInt16(-29), ParallelMath::MakeSInt16(-9), ParallelMath::MakeSInt16(9), ParallelMath::MakeSInt16(29) },
+		{ ParallelMath::MakeSInt16(-42), ParallelMath::MakeSInt16(-13), ParallelMath::MakeSInt16(13), ParallelMath::MakeSInt16(42) },
+		{ ParallelMath::MakeSInt16(-60), ParallelMath::MakeSInt16(-18), ParallelMath::MakeSInt16(18), ParallelMath::MakeSInt16(60) },
+		{ ParallelMath::MakeSInt16(-80), ParallelMath::MakeSInt16(-24), ParallelMath::MakeSInt16(24), ParallelMath::MakeSInt16(80) },
+		{ ParallelMath::MakeSInt16(-106), ParallelMath::MakeSInt16(-33), ParallelMath::MakeSInt16(33), ParallelMath::MakeSInt16(106) },
+		{ ParallelMath::MakeSInt16(-183), ParallelMath::MakeSInt16(-47), ParallelMath::MakeSInt16(47), ParallelMath::MakeSInt16(183) },
+	};
+
+    bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0);
+
+    int minD = punchthrough ? 1 : 0;
+
+	for (int flip = 0; flip < 2; flip++)
+	{
+		drs.diffNumAttempts[0] = drs.diffNumAttempts[1] = zeroU15;
+
+		MFloat bestIndError[2] = { ParallelMath::MakeFloat(FLT_MAX), ParallelMath::MakeFloat(FLT_MAX) };
+		MUInt16 bestIndSelectors[2] = { ParallelMath::MakeUInt16(0), ParallelMath::MakeUInt16(0) };
+		MUInt15 bestIndColors[2] = { zeroU15, zeroU15 };
+		MUInt15 bestIndTable[2] = { zeroU15, zeroU15 };
+
+		for (int d = minD; d < 2; d++)
+		{
+			for (int sector = 0; sector < 2; sector++)
+			{
+				const int16_t *potentialOffsets = cvtt::Tables::ETC1::g_potentialOffsets4;
+
+				for (int table = 0; table < 8; table++)
+				{
+					int16_t numOffsets = *potentialOffsets++;
+
+					MUInt15 possibleColors[cvtt::Tables::ETC1::g_maxPotentialOffsets];
+
+                    MUInt15 quantized[3];
+                    for (int oi = 0; oi < numOffsets; oi++)
+                    {
+                        if (!isFakeBT709)
+                        {
+						    for (int ch = 0; ch < 3; ch++)
+						    {
+                                // cu is in range 0..2040
+                                MUInt15 cu15 = ParallelMath::Min(
+                                    ParallelMath::MakeUInt15(2040),
+                                    ParallelMath::ToUInt15(
+                                        ParallelMath::Max(
+                                            ParallelMath::MakeSInt16(0),
+                                            ParallelMath::LosslessCast<MSInt16>::Cast(sectorCumulative[flip][sector][ch]) + ParallelMath::MakeSInt16(potentialOffsets[oi])
+                                        )
+                                    )
+                                );
+
+                                if (d == 1)
+                                {
+                                    //quantized[ch] = (cu * 31 + (cu >> 3) + 1024) >> 11;
+                                    quantized[ch] = ParallelMath::ToUInt15(
+                                        ParallelMath::RightShift(
+                                            (ParallelMath::LosslessCast<MUInt16>::Cast(cu15) << 5) - ParallelMath::LosslessCast<MUInt16>::Cast(cu15) + ParallelMath::LosslessCast<MUInt16>::Cast(ParallelMath::RightShift(cu15, 3)) + ParallelMath::MakeUInt16(1024)
+                                            , 11)
+                                        );
+                                }
+                                else
+                                {
+                                    //quantized[ch] = (cu * 30 + (cu >> 3) + 2048) >> 12;
+                                    quantized[ch] = ParallelMath::ToUInt15(
+                                        ParallelMath::RightShift(
+                                        (ParallelMath::LosslessCast<MUInt16>::Cast(cu15) << 5) - ParallelMath::LosslessCast<MUInt16>::Cast(cu15 << 1) + ParallelMath::LosslessCast<MUInt16>::Cast(ParallelMath::RightShift(cu15, 3)) + ParallelMath::MakeUInt16(2048)
+                                            , 12)
+                                    );
+                                }
+						    }
+                        }
+                        else
+                        {
+                            MUInt15 offsetCumulative[3];
+						    for (int ch = 0; ch < 3; ch++)
+						    {
+                                // cu is in range 0..2040
+                                MUInt15 cu15 = ParallelMath::Min(
+                                    ParallelMath::MakeUInt15(2040),
+                                    ParallelMath::ToUInt15(
+                                        ParallelMath::Max(
+                                            ParallelMath::MakeSInt16(0),
+                                            ParallelMath::LosslessCast<MSInt16>::Cast(sectorCumulative[flip][sector][ch]) + ParallelMath::MakeSInt16(potentialOffsets[oi])
+                                        )
+                                    )
+                                );
+
+                                offsetCumulative[ch] = cu15;
+						    }
+
+                            if ((options.flags & cvtt::Flags::ETC_FakeBT709Accurate) != 0)
+                                ResolveHalfBlockFakeBT709RoundingAccurate(quantized, offsetCumulative, d == 1);
+                            else
+                                ResolveHalfBlockFakeBT709RoundingFast(quantized, offsetCumulative, d == 1);
+                        }
+
+						possibleColors[oi] = quantized[0] | (quantized[1] << 5) | (quantized[2] << 10);
+					}
+
+					potentialOffsets += numOffsets;
+
+                    ParallelMath::UInt15 numUniqueColors;
+                    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                    {
+                        uint16_t blockNumUniqueColors = 1;
+                        for (int i = 1; i < numOffsets; i++)
+                        {
+                            uint16_t color = ParallelMath::Extract(possibleColors[i], block);
+                            if (color != ParallelMath::Extract(possibleColors[blockNumUniqueColors - 1], block))
+                                ParallelMath::PutUInt15(possibleColors[blockNumUniqueColors++], block, color);
+                        }
+
+                        ParallelMath::PutUInt15(numUniqueColors, block, blockNumUniqueColors);
+                    }
+
+                    int maxUniqueColors = ParallelMath::Extract(numUniqueColors, 0);
+                    for (int block = 1; block < ParallelMath::ParallelSize; block++)
+                        maxUniqueColors = std::max<int>(maxUniqueColors, ParallelMath::Extract(numUniqueColors, block));
+
+                    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                    {
+                        uint16_t fillColor = ParallelMath::Extract(possibleColors[0], block);
+                        for (int i = ParallelMath::Extract(numUniqueColors, block); i < maxUniqueColors; i++)
+                            ParallelMath::PutUInt15(possibleColors[i], block, fillColor);
+                    }
+
+					for (int i = 0; i < maxUniqueColors; i++)
+					{
+						MFloat error = ParallelMath::MakeFloatZero();
+						MUInt16 selectors = ParallelMath::MakeUInt16(0);
+                        MUInt15 quantized = possibleColors[i];
+						TestHalfBlock(error, selectors, quantized, sectorPixels[flip][sector], sectorPreWeightedPixels[flip][sector], modifierTables[table], d == 1, options);
+
+						if (d == 0)
+						{
+                            ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, bestIndError[sector]));
+							if (ParallelMath::AnySet(errorBetter))
+							{
+								bestIndError[sector] = ParallelMath::Min(error, bestIndError[sector]);
+								ParallelMath::ConditionalSet(bestIndSelectors[sector], errorBetter, selectors);
+                                ParallelMath::ConditionalSet(bestIndColors[sector], errorBetter, quantized);
+                                ParallelMath::ConditionalSet(bestIndTable[sector], errorBetter, ParallelMath::MakeUInt15(table));
+							}
+						}
+						else
+						{
+                            ParallelMath::Int16CompFlag isInBounds = ParallelMath::Less(ParallelMath::MakeUInt15(i), numUniqueColors);
+
+							MUInt15 storageIndexes = drs.diffNumAttempts[sector];
+                            drs.diffNumAttempts[sector] = drs.diffNumAttempts[sector] + ParallelMath::SelectOrZero(isInBounds, ParallelMath::MakeUInt15(1));
+
+                            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                            {
+                                int storageIndex = ParallelMath::Extract(storageIndexes, block);
+
+                                ParallelMath::PutFloat(drs.diffErrors[sector][storageIndex], block, ParallelMath::Extract(error, block));
+                                ParallelMath::PutUInt16(drs.diffSelectors[sector][storageIndex], block, ParallelMath::Extract(selectors, block));
+                                ParallelMath::PutUInt15(drs.diffColors[sector][storageIndex], block, ParallelMath::Extract(quantized, block));
+                                ParallelMath::PutUInt15(drs.diffTables[sector][storageIndex], block, table);
+                            }
+						}
+					}
+				}
+			}
+
+			if (d == 0)
+			{
+				MFloat bestIndErrorTotal = bestIndError[0] + bestIndError[1];
+                ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(bestIndErrorTotal, bestTotalError));
+				if (ParallelMath::AnySet(errorBetter))
+				{
+                    bestIsThisMode = bestIsThisMode | errorBetter;
+
+					bestTotalError = ParallelMath::Min(bestTotalError, bestIndErrorTotal);
+					ParallelMath::ConditionalSet(bestFlip, errorBetter, ParallelMath::MakeUInt15(flip));
+                    ParallelMath::ConditionalSet(bestD, errorBetter, ParallelMath::MakeUInt15(d));
+					for (int sector = 0; sector < 2; sector++)
+					{
+                        ParallelMath::ConditionalSet(bestColors[sector], errorBetter, bestIndColors[sector]);
+                        ParallelMath::ConditionalSet(bestSelectors[sector], errorBetter, bestIndSelectors[sector]);
+                        ParallelMath::ConditionalSet(bestTables[sector], errorBetter, bestIndTable[sector]);
+					}
+				}
+			}
+			else
+			{
+                ParallelMath::Int16CompFlag canIgnoreSector[2] = { ParallelMath::MakeBoolInt16(false), ParallelMath::MakeBoolInt16(false) };
+                FindBestDifferentialCombination(flip, d, canIgnoreSector, bestIsThisMode, bestTotalError, bestFlip, bestD, bestColors, bestSelectors, bestTables, drs);
+			}
+		}
+	}
+
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        if (!ParallelMath::Extract(bestIsThisMode, block))
+            continue;
+
+        uint32_t highBits = 0;
+        uint32_t lowBits = 0;
+
+        int blockBestFlip = ParallelMath::Extract(bestFlip, block);
+        int blockBestD = ParallelMath::Extract(bestD, block);
+        int blockBestTables[2] = { ParallelMath::Extract(bestTables[0], block), ParallelMath::Extract(bestTables[1], block) };
+        ParallelMath::ScalarUInt16 blockBestSelectors[2] = { ParallelMath::Extract(bestSelectors[0], block), ParallelMath::Extract(bestSelectors[1], block) };
+
+        int colors[2][3];
+        for (int sector = 0; sector < 2; sector++)
+        {
+            int sectorColor = ParallelMath::Extract(bestColors[sector], block);
+            for (int ch = 0; ch < 3; ch++)
+                colors[sector][ch] = (sectorColor >> (ch * 5)) & 31;
+        }
+
+        EmitETC1Block(outputBuffer + block * 8, blockBestFlip, blockBestD, colors, blockBestTables, blockBestSelectors, false);
+    }
+}
+
+
+void cvtt::Internal::ETCComputer::CompressETC1PunchthroughBlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], DifferentialResolveStorage &drs, const Options &options)
+{
+	int numTries = 0;
+
+    MUInt15 zeroU15 = ParallelMath::MakeUInt15(0);
+    MUInt16 zeroU16 = ParallelMath::MakeUInt16(0);
+
+    MUInt15 bestColors[2] = { zeroU15, zeroU15 };
+    MUInt16 bestSelectors[2] = { zeroU16, zeroU16 };
+    MUInt15 bestTables[2] = { zeroU15, zeroU15 };
+    MUInt15 bestFlip = zeroU15;
+
+    MUInt15 sectorPixels[2][2][8][3];
+    ParallelMath::Int16CompFlag sectorTransparent[2][2][8];
+    MFloat sectorPreWeightedPixels[2][2][8][3];
+    MUInt15 sectorCumulative[2][2][3];
+
+    ParallelMath::Int16CompFlag bestIsThisMode = ParallelMath::MakeBoolInt16(false);
+
+    for (int flip = 0; flip < 2; flip++)
+	{
+		for (int sector = 0; sector < 2; sector++)
+		{
+			for (int ch = 0; ch < 3; ch++)
+				sectorCumulative[flip][sector][ch] = zeroU15;
+
+			for (int px = 0; px < 8; px++)
+			{
+				for (int ch = 0; ch < 3; ch++)
+				{
+					MUInt15 pixelChannelValue = pixels[g_flipTables[flip][sector][px]][ch];
+					sectorPixels[flip][sector][px][ch] = pixelChannelValue;
+                    sectorPreWeightedPixels[flip][sector][px][ch] = preWeightedPixels[g_flipTables[flip][sector][px]][ch];
+					sectorCumulative[flip][sector][ch] = sectorCumulative[flip][sector][ch] + pixelChannelValue;
+				}
+
+                sectorTransparent[flip][sector][px] = isTransparent[g_flipTables[flip][sector][px]];
+			}
+		}
+	}
+
+	static const MUInt15 modifiers[8] =
+	{
+		ParallelMath::MakeUInt15(8),
+		ParallelMath::MakeUInt15(17),
+		ParallelMath::MakeUInt15(29),
+		ParallelMath::MakeUInt15(42),
+		ParallelMath::MakeUInt15(60),
+		ParallelMath::MakeUInt15(80),
+		ParallelMath::MakeUInt15(106),
+		ParallelMath::MakeUInt15(183),
+	};
+
+    bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0);
+
+    const int maxSectorCumulativeOffsets = 17;
+
+	for (int flip = 0; flip < 2; flip++)
+	{
+        ParallelMath::Int16CompFlag canIgnoreSector[2] = { ParallelMath::MakeBoolInt16(true), ParallelMath::MakeBoolInt16(false) };
+
+        for (int sector = 0; sector < 2; sector++)
+            for (int px = 0; px < 8; px++)
+                canIgnoreSector[sector] = canIgnoreSector[sector] & sectorTransparent[flip][sector][px];
+
+		drs.diffNumAttempts[0] = drs.diffNumAttempts[1] = zeroU15;
+
+		for (int sector = 0; sector < 2; sector++)
+		{
+            MUInt15 sectorNumOpaque = ParallelMath::MakeUInt15(0);
+            for (int px = 0; px < 8; px++)
+                sectorNumOpaque = sectorNumOpaque + ParallelMath::SelectOrZero(sectorTransparent[flip][sector][px], ParallelMath::MakeUInt15(1));
+
+            int sectorMaxOpaque = 0;
+            for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                sectorMaxOpaque = std::max<int>(sectorMaxOpaque, ParallelMath::Extract(sectorNumOpaque, block));
+
+            int sectorNumOpaqueMultipliers = sectorMaxOpaque * 2 + 1;
+
+            MUInt15 sectorNumOpaqueDenominator = ParallelMath::Max(ParallelMath::MakeUInt15(1), sectorNumOpaque) << 8;
+            MUInt15 sectorNumOpaqueAddend = sectorNumOpaque << 7;
+
+            MSInt16 sectorNumOpaqueSigned = ParallelMath::LosslessCast<MSInt16>::Cast(sectorNumOpaque);
+            MSInt16 negSectorNumOpaqueSigned = ParallelMath::MakeSInt16(0) - sectorNumOpaqueSigned;
+
+            MUInt15 sectorCumulativeMax = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(ParallelMath::MakeUInt15(255), sectorNumOpaque));
+
+			for (int table = 0; table < 8; table++)
+			{
+				MUInt15 possibleColors[maxSectorCumulativeOffsets];
+
+                MUInt15 quantized[3];
+                for (int om = -sectorMaxOpaque; om <= sectorMaxOpaque; om++)
+                {
+                    MSInt16 clampedOffsetMult = ParallelMath::Max(ParallelMath::Min(ParallelMath::MakeSInt16(om), sectorNumOpaqueSigned), negSectorNumOpaqueSigned);
+                    MSInt16 offset = ParallelMath::CompactMultiply(clampedOffsetMult, modifiers[table]);
+
+                    for (int ch = 0; ch < 3; ch++)
+                    {
+                        // cu is in range 0..255*numOpaque (at most 0..2040)
+                        MUInt15 cu15 = ParallelMath::Min(
+                            sectorCumulativeMax,
+                            ParallelMath::ToUInt15(
+                                ParallelMath::Max(
+                                    ParallelMath::MakeSInt16(0),
+                                    ParallelMath::LosslessCast<MSInt16>::Cast(sectorCumulative[flip][sector][ch]) + offset
+                                )
+                            )
+                        );
+
+                        //quantized[ch] = (cu * 31 + (cu >> 3) + (numOpaque * 128)) / (numOpaque * 256)
+                        MUInt16 cuTimes31 = (ParallelMath::LosslessCast<MUInt16>::Cast(cu15) << 5) - ParallelMath::LosslessCast<MUInt16>::Cast(cu15);
+                        MUInt15 cuDiv8 = ParallelMath::RightShift(cu15, 3);
+                        MUInt16 numerator = cuTimes31 + ParallelMath::LosslessCast<MUInt16>::Cast(cuDiv8 + sectorNumOpaqueAddend);
+                        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                            ParallelMath::PutUInt15(quantized[ch], block, ParallelMath::Extract(numerator, block) / ParallelMath::Extract(sectorNumOpaqueDenominator, block));
+                    }
+
+					possibleColors[om + sectorMaxOpaque] = quantized[0] | (quantized[1] << 5) | (quantized[2] << 10);
+				}
+
+                ParallelMath::UInt15 numUniqueColors;
+                for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                {
+                    uint16_t blockNumUniqueColors = 1;
+                    for (int i = 1; i < sectorNumOpaqueMultipliers; i++)
+                    {
+                        uint16_t color = ParallelMath::Extract(possibleColors[i], block);
+                        if (color != ParallelMath::Extract(possibleColors[blockNumUniqueColors - 1], block))
+                            ParallelMath::PutUInt15(possibleColors[blockNumUniqueColors++], block, color);
+                    }
+
+                    ParallelMath::PutUInt15(numUniqueColors, block, blockNumUniqueColors);
+                }
+
+                int maxUniqueColors = ParallelMath::Extract(numUniqueColors, 0);
+                for (int block = 1; block < ParallelMath::ParallelSize; block++)
+                    maxUniqueColors = std::max<int>(maxUniqueColors, ParallelMath::Extract(numUniqueColors, block));
+
+                for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                {
+                    uint16_t fillColor = ParallelMath::Extract(possibleColors[0], block);
+                    for (int i = ParallelMath::Extract(numUniqueColors, block); i < maxUniqueColors; i++)
+                        ParallelMath::PutUInt15(possibleColors[i], block, fillColor);
+                }
+
+				for (int i = 0; i < maxUniqueColors; i++)
+				{
+					MFloat error = ParallelMath::MakeFloatZero();
+					MUInt16 selectors = ParallelMath::MakeUInt16(0);
+                    MUInt15 quantized = possibleColors[i];
+					TestHalfBlockPunchthrough(error, selectors, quantized, sectorPixels[flip][sector], sectorPreWeightedPixels[flip][sector], sectorTransparent[flip][sector], modifiers[table], options);
+
+                    ParallelMath::Int16CompFlag isInBounds = ParallelMath::Less(ParallelMath::MakeUInt15(i), numUniqueColors);
+
+					MUInt15 storageIndexes = drs.diffNumAttempts[sector];
+                    drs.diffNumAttempts[sector] = drs.diffNumAttempts[sector] + ParallelMath::SelectOrZero(isInBounds, ParallelMath::MakeUInt15(1));
+
+                    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+                    {
+                        int storageIndex = ParallelMath::Extract(storageIndexes, block);
+
+                        ParallelMath::PutFloat(drs.diffErrors[sector][storageIndex], block, ParallelMath::Extract(error, block));
+                        ParallelMath::PutUInt16(drs.diffSelectors[sector][storageIndex], block, ParallelMath::Extract(selectors, block));
+                        ParallelMath::PutUInt15(drs.diffColors[sector][storageIndex], block, ParallelMath::Extract(quantized, block));
+                        ParallelMath::PutUInt15(drs.diffTables[sector][storageIndex], block, table);
+                    }
+                }
+            }
+        }
+
+        MUInt15 bestDDummy = ParallelMath::MakeUInt15(0);
+        FindBestDifferentialCombination(flip, 1, canIgnoreSector, bestIsThisMode, bestTotalError, bestFlip, bestDDummy, bestColors, bestSelectors, bestTables, drs);
+	}
+
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        if (!ParallelMath::Extract(bestIsThisMode, block))
+            continue;
+
+        int blockBestColors[2][3];
+        int blockBestTables[2];
+        ParallelMath::ScalarUInt16 blockBestSelectors[2];
+        for (int sector = 0; sector < 2; sector++)
+        {
+            int sectorColor = ParallelMath::Extract(bestColors[sector], block);
+            for (int ch = 0; ch < 3; ch++)
+                blockBestColors[sector][ch] = (sectorColor >> (ch * 5)) & 31;
+
+            blockBestTables[sector] = ParallelMath::Extract(bestTables[sector], block);
+            blockBestSelectors[sector] = ParallelMath::Extract(bestSelectors[sector], block);
+        }
+
+        EmitETC1Block(outputBuffer + block * 8, ParallelMath::Extract(bestFlip, block), 1, blockBestColors, blockBestTables, blockBestSelectors, true);
+    }
+}
+
+
+cvtt::ETC1CompressionData *cvtt::Internal::ETCComputer::AllocETC1Data(cvtt::Kernels::allocFunc_t allocFunc, void *context)
+{
+    void *buffer = allocFunc(context, sizeof(cvtt::Internal::ETCComputer::ETC1CompressionDataInternal));
+    if (!buffer)
+        return NULL;
+    new (buffer) cvtt::Internal::ETCComputer::ETC1CompressionDataInternal(context);
+    return static_cast<ETC1CompressionData*>(buffer);
+}
+
+void cvtt::Internal::ETCComputer::ReleaseETC1Data(ETC1CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc)
+{
+    cvtt::Internal::ETCComputer::ETC1CompressionDataInternal* internalData = static_cast<cvtt::Internal::ETCComputer::ETC1CompressionDataInternal*>(compressionData);
+    void *context = internalData->m_context;
+    internalData->~ETC1CompressionDataInternal();
+    freeFunc(context, compressionData, sizeof(cvtt::Internal::ETCComputer::ETC1CompressionDataInternal));
+}
+
+cvtt::ETC2CompressionData *cvtt::Internal::ETCComputer::AllocETC2Data(cvtt::Kernels::allocFunc_t allocFunc, void *context, const cvtt::Options &options)
+{
+    void *buffer = allocFunc(context, sizeof(cvtt::Internal::ETCComputer::ETC2CompressionDataInternal));
+    if (!buffer)
+        return NULL;
+    new (buffer) cvtt::Internal::ETCComputer::ETC2CompressionDataInternal(context, options);
+    return static_cast<ETC2CompressionData*>(buffer);
+}
+
+void cvtt::Internal::ETCComputer::ReleaseETC2Data(ETC2CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc)
+{
+    cvtt::Internal::ETCComputer::ETC2CompressionDataInternal* internalData = static_cast<cvtt::Internal::ETCComputer::ETC2CompressionDataInternal*>(compressionData);
+    void *context = internalData->m_context;
+    internalData->~ETC2CompressionDataInternal();
+    freeFunc(context, compressionData, sizeof(cvtt::Internal::ETCComputer::ETC2CompressionDataInternal));
+}
+
+cvtt::Internal::ETCComputer::ETC2CompressionDataInternal::ETC2CompressionDataInternal(void *context, const cvtt::Options &options)
+    : m_context(context)
+{
+    const float cd[3] = { options.redWeight, options.greenWeight, options.blueWeight };
+    const float rotCD[3] = { cd[1], cd[2], cd[0] };
+
+    const float offs = -(rotCD[0] * cd[0] + rotCD[1] * cd[1] + rotCD[2] * cd[2]) / (cd[0] * cd[0] + cd[1] * cd[1] + cd[2] * cd[2]);
+
+    const float chromaAxis0[3] = { rotCD[0] + cd[0] * offs, rotCD[1] + cd[1] * offs, rotCD[2] + cd[2] * offs };
+
+    const float chromaAxis1Unnormalized[3] =
+    {
+        chromaAxis0[1] * cd[2] - chromaAxis0[2] * cd[1],
+        chromaAxis0[2] * cd[0] - chromaAxis0[0] * cd[2],
+        chromaAxis0[0] * cd[1] - chromaAxis0[1] * cd[0]
+    };
+
+    const float ca0LengthSq = (chromaAxis0[0] * chromaAxis0[0] + chromaAxis0[1] * chromaAxis0[1] + chromaAxis0[2] * chromaAxis0[2]);
+    const float ca1UNLengthSq = (chromaAxis1Unnormalized[0] * chromaAxis1Unnormalized[0] + chromaAxis1Unnormalized[1] * chromaAxis1Unnormalized[1] + chromaAxis1Unnormalized[2] * chromaAxis1Unnormalized[2]);
+    const float lengthRatio = static_cast<float>(std::sqrt(ca0LengthSq / ca1UNLengthSq));
+
+    const float chromaAxis1[3] = { chromaAxis1Unnormalized[0] * lengthRatio, chromaAxis1Unnormalized[1] * lengthRatio, chromaAxis1Unnormalized[2] * lengthRatio };
+
+    for (int i = 0; i < 3; i++)
+    {
+        m_chromaSideAxis0[i] = chromaAxis0[i];
+        m_chromaSideAxis1[i] = chromaAxis1[i];
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_ETC.h b/thirdparty/cvtt/ConvectionKernels_ETC.h
new file mode 100644
index 0000000000..5e3c4d74fd
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_ETC.h
@@ -0,0 +1,126 @@
+#pragma once
+#ifndef __CVTT_CONVECTIONKERNELS_ETC_H__
+#define __CVTT_CONVECTIONKERNELS_ETC_H__
+
+#include "ConvectionKernels.h"
+#include "ConvectionKernels_ParallelMath.h"
+
+namespace cvtt
+{
+    struct Options;
+
+    namespace Internal
+    {
+        class ETCComputer
+        {
+        public:
+            static void CompressETC1Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC1CompressionData *compressionData, const Options &options);
+            static void CompressETC2Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC2CompressionData *compressionData, const Options &options, bool punchthroughAlpha);
+            static void CompressETC2AlphaBlock(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, const Options &options);
+            static void CompressEACBlock(uint8_t *outputBuffer, const PixelBlockScalarS16 *inputBlocks, bool isSigned, const Options &options);
+
+            static ETC2CompressionData *AllocETC2Data(cvtt::Kernels::allocFunc_t allocFunc, void *context, const cvtt::Options &options);
+            static void ReleaseETC2Data(ETC2CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc);
+
+            static ETC1CompressionData *AllocETC1Data(cvtt::Kernels::allocFunc_t allocFunc, void *context);
+            static void ReleaseETC1Data(ETC1CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc);
+
+        private:
+            typedef ParallelMath::Float MFloat;
+            typedef ParallelMath::SInt16 MSInt16;
+            typedef ParallelMath::UInt15 MUInt15;
+            typedef ParallelMath::UInt16 MUInt16;
+            typedef ParallelMath::SInt32 MSInt32;
+            typedef ParallelMath::UInt31 MUInt31;
+
+            struct DifferentialResolveStorage
+            {
+                static const unsigned int MaxAttemptsPerSector = 57 + 81 + 81 + 81 + 81 + 81 + 81 + 81;
+
+                MUInt15 diffNumAttempts[2];
+                MFloat diffErrors[2][MaxAttemptsPerSector];
+                MUInt16 diffSelectors[2][MaxAttemptsPerSector];
+                MUInt15 diffColors[2][MaxAttemptsPerSector];
+                MUInt15 diffTables[2][MaxAttemptsPerSector];
+
+                uint16_t attemptSortIndexes[2][MaxAttemptsPerSector];
+            };
+
+            struct HModeEval
+            {
+                MFloat errors[62][16];
+                MUInt16 signBits[62];
+                MUInt15 uniqueQuantizedColors[62];
+                MUInt15 numUniqueColors[2];
+            };
+
+            struct ETC1CompressionDataInternal : public cvtt::ETC1CompressionData
+            {
+                explicit ETC1CompressionDataInternal(void *context)
+                    : m_context(context)
+                {
+                }
+
+                DifferentialResolveStorage m_drs;
+                void *m_context;
+            };
+
+            struct ETC2CompressionDataInternal : public cvtt::ETC2CompressionData
+            {
+                explicit ETC2CompressionDataInternal(void *context, const cvtt::Options &options);
+
+                HModeEval m_h;
+                DifferentialResolveStorage m_drs;
+
+                void *m_context;
+                float m_chromaSideAxis0[3];
+                float m_chromaSideAxis1[3];
+            };
+
+            static MFloat ComputeErrorUniform(const MUInt15 pixelA[3], const MUInt15 pixelB[3]);
+            static MFloat ComputeErrorWeighted(const MUInt15 reconstructed[3], const MFloat pixelB[3], const Options options);
+            static MFloat ComputeErrorFakeBT709(const MUInt15 reconstructed[3], const MFloat pixelB[3]);
+
+            static void TestHalfBlock(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const MSInt16 modifiers[4], bool isDifferential, const Options &options);
+            static void TestHalfBlockPunchthrough(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const ParallelMath::Int16CompFlag isTransparent[8], const MUInt15 modifier, const Options &options);
+            static void FindBestDifferentialCombination(int flip, int d, const ParallelMath::Int16CompFlag canIgnoreSector[2], ParallelMath::Int16CompFlag& bestIsThisMode, MFloat& bestTotalError, MUInt15& bestFlip, MUInt15& bestD, MUInt15 bestColors[2], MUInt16 bestSelectors[2], MUInt15 bestTables[2], DifferentialResolveStorage &drs);
+
+            static ParallelMath::Int16CompFlag ETCDifferentialIsLegalForChannel(const MUInt15 &a, const MUInt15 &b);
+            static ParallelMath::Int16CompFlag ETCDifferentialIsLegal(const MUInt15 &a, const MUInt15 &b);
+            static bool ETCDifferentialIsLegalForChannelScalar(const uint16_t &a, const uint16_t &b);
+            static bool ETCDifferentialIsLegalScalar(const uint16_t &a, const uint16_t &b);
+
+            static void EncodeTMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options);
+            static void EncodeHMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag groupings[16], const MUInt15 pixels[16][3], HModeEval &he, const MFloat preWeightedPixels[16][3], const Options &options);
+
+            static void EncodeVirtualTModePunchthrough(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], const ParallelMath::Int16CompFlag& anyTransparent, const ParallelMath::Int16CompFlag& allTransparent, const Options &options);
+
+            static MUInt15 DecodePlanarCoeff(const MUInt15 &coeff, int ch);
+            static void EncodePlanar(uint8_t *outputBuffer, MFloat &bestError, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options);
+
+            static void CompressETC1BlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], DifferentialResolveStorage& compressionData, const Options &options, bool punchthrough);
+            static void CompressETC1PunchthroughBlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], DifferentialResolveStorage& compressionData, const Options &options);
+            static void CompressETC2AlphaBlockInternal(uint8_t *outputBuffer, const MUInt15 pixels[16], bool is11Bit, bool isSigned, const Options &options);
+
+            static void ExtractBlocks(MUInt15 pixels[16][3], MFloat preWeightedPixels[16][3], const PixelBlockU8 *inputBlocks, const Options &options);
+
+            static void ResolveHalfBlockFakeBT709RoundingAccurate(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential);
+            static void ResolveHalfBlockFakeBT709RoundingFast(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential);
+            static void ResolveTHFakeBT709Rounding(MUInt15 quantized[3], const MUInt15 target[3], const MUInt15 &granularity);
+            static void ConvertToFakeBT709(MFloat yuv[3], const MUInt15 color[3]);
+            static void ConvertToFakeBT709(MFloat yuv[3], const MFloat color[3]);
+            static void ConvertToFakeBT709(MFloat yuv[3], const MFloat &r, const MFloat &g, const MFloat &b);
+            static void ConvertFromFakeBT709(MFloat rgb[3], const MFloat yuv[3]);
+
+            static void QuantizeETC2Alpha(int tableIndex, const MUInt15& value, const MUInt15& baseValue, const MUInt15& multiplier, bool is11Bit, bool isSigned, MUInt15& outIndexes, MUInt15& outQuantizedValues);
+
+            static void EmitTModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 lineColor[3], const ParallelMath::ScalarUInt16 isolatedColor[3], int32_t packedSelectors, ParallelMath::ScalarUInt16 table, bool opaque);
+            static void EmitHModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 blockColors[2], ParallelMath::ScalarUInt16 sectorBits, ParallelMath::ScalarUInt16 signBits, ParallelMath::ScalarUInt16 table, bool opaque);
+            static void EmitETC1Block(uint8_t *outputBuffer, int blockBestFlip, int blockBestD, const int blockBestColors[2][3], const int blockBestTables[2], const ParallelMath::ScalarUInt16 blockBestSelectors[2], bool transparent);
+
+            static const int g_flipTables[2][2][8];
+        };
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_ETC1.h b/thirdparty/cvtt/ConvectionKernels_ETC1.h
new file mode 100644
index 0000000000..775e41669f
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_ETC1.h
@@ -0,0 +1,29 @@
+#include <stdint.h>
+
+namespace cvtt
+{
+    namespace Tables
+    {
+        namespace ETC1
+        {
+            const int16_t g_potentialOffsets4[] =
+            {
+                57, -64, -58, -54, -52, -48, -46, -44, -42, -40, -38, -36, -34, -32, -30, -28, -26, -24, -22, -20, -18, -16, -14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 52, 54, 58, 64,
+                81, -136, -124, -114, -112, -102, -100, -92, -90, -88, -80, -78, -76, -70, -68, -66, -64, -58, -56, -54, -52, -48, -46, -44, -42, -40, -36, -34, -32, -30, -26, -24, -22, -20, -18, -14, -12, -10, -8, -4, -2, 0, 2, 4, 8, 10, 12, 14, 18, 20, 22, 24, 26, 30, 32, 34, 36, 40, 42, 44, 46, 48, 52, 54, 56, 58, 64, 66, 68, 70, 76, 78, 80, 88, 90, 92, 100, 102, 112, 114, 124, 136,
+                81, -232, -212, -194, -192, -174, -172, -156, -154, -152, -136, -134, -132, -118, -116, -114, -112, -98, -96, -94, -92, -80, -78, -76, -74, -72, -60, -58, -56, -54, -42, -40, -38, -36, -34, -22, -20, -18, -16, -4, -2, 0, 2, 4, 16, 18, 20, 22, 34, 36, 38, 40, 42, 54, 56, 58, 60, 72, 74, 76, 78, 80, 92, 94, 96, 98, 112, 114, 116, 118, 132, 134, 136, 152, 154, 156, 172, 174, 192, 194, 212, 232,
+                81, -336, -307, -281, -278, -252, -249, -226, -223, -220, -197, -194, -191, -171, -168, -165, -162, -142, -139, -136, -133, -116, -113, -110, -107, -104, -87, -84, -81, -78, -61, -58, -55, -52, -49, -32, -29, -26, -23, -6, -3, 0, 3, 6, 23, 26, 29, 32, 49, 52, 55, 58, 61, 78, 81, 84, 87, 104, 107, 110, 113, 116, 133, 136, 139, 142, 162, 165, 168, 171, 191, 194, 197, 220, 223, 226, 249, 252, 278, 281, 307, 336,
+                81, -480, -438, -402, -396, -360, -354, -324, -318, -312, -282, -276, -270, -246, -240, -234, -228, -204, -198, -192, -186, -168, -162, -156, -150, -144, -126, -120, -114, -108, -90, -84, -78, -72, -66, -48, -42, -36, -30, -12, -6, 0, 6, 12, 30, 36, 42, 48, 66, 72, 78, 84, 90, 108, 114, 120, 126, 144, 150, 156, 162, 168, 186, 192, 198, 204, 228, 234, 240, 246, 270, 276, 282, 312, 318, 324, 354, 360, 396, 402, 438, 480,
+                81, -640, -584, -536, -528, -480, -472, -432, -424, -416, -376, -368, -360, -328, -320, -312, -304, -272, -264, -256, -248, -224, -216, -208, -200, -192, -168, -160, -152, -144, -120, -112, -104, -96, -88, -64, -56, -48, -40, -16, -8, 0, 8, 16, 40, 48, 56, 64, 88, 96, 104, 112, 120, 144, 152, 160, 168, 192, 200, 208, 216, 224, 248, 256, 264, 272, 304, 312, 320, 328, 360, 368, 376, 416, 424, 432, 472, 480, 528, 536, 584, 640,
+                81, -848, -775, -709, -702, -636, -629, -570, -563, -556, -497, -490, -483, -431, -424, -417, -410, -358, -351, -344, -337, -292, -285, -278, -271, -264, -219, -212, -205, -198, -153, -146, -139, -132, -125, -80, -73, -66, -59, -14, -7, 0, 7, 14, 59, 66, 73, 80, 125, 132, 139, 146, 153, 198, 205, 212, 219, 264, 271, 278, 285, 292, 337, 344, 351, 358, 410, 417, 424, 431, 483, 490, 497, 556, 563, 570, 629, 636, 702, 709, 775, 848,
+                81, -1464, -1328, -1234, -1192, -1098, -1056, -1004, -962, -920, -868, -826, -784, -774, -732, -690, -648, -638, -596, -554, -544, -512, -502, -460, -418, -408, -376, -366, -324, -314, -282, -272, -230, -188, -178, -146, -136, -94, -84, -52, -42, 0, 42, 52, 84, 94, 136, 146, 178, 188, 230, 272, 282, 314, 324, 366, 376, 408, 418, 460, 502, 512, 544, 554, 596, 638, 648, 690, 732, 774, 784, 826, 868, 920, 962, 1004, 1056, 1098, 1192, 1234, 1328, 1464
+            };
+
+            const unsigned int g_maxPotentialOffsets = 81;
+
+            const int16_t g_thModifierTable[8] =
+            {
+                3, 6, 11, 16, 23, 32, 41, 64
+            };
+        }
+    }
+}
diff --git a/thirdparty/cvtt/ConvectionKernels_ETC2.h b/thirdparty/cvtt/ConvectionKernels_ETC2.h
new file mode 100644
index 0000000000..4befc8e8c2
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_ETC2.h
@@ -0,0 +1,35 @@
+#include <stdint.h>
+
+namespace cvtt
+{
+    namespace Tables
+    {
+        namespace ETC2
+        {
+            const int16_t g_thModifierTable[8] =
+            {
+                3, 6, 11, 16, 23, 32, 41, 64
+            };
+
+            const int16_t g_alphaModifierTablePositive[16][4] =
+            {
+                { 2, 5, 8, 14, },
+                { 2, 6, 9, 12, },
+                { 1, 4, 7, 12, },
+                { 1, 3, 5, 12, },
+                { 2, 5, 7, 11, },
+                { 2, 6, 8, 10, },
+                { 3, 6, 7, 10, },
+                { 2, 4, 7, 10, },
+                { 1, 5, 7, 9, },
+                { 1, 4, 7, 9, },
+                { 1, 3, 7, 9, },
+                { 1, 4, 6, 9, },
+                { 2, 3, 6, 9, },
+                { 0, 1, 2, 9, },
+                { 3, 5, 7, 8, },
+                { 2, 4, 6, 8, },
+            };
+        }
+    }
+}
diff --git a/thirdparty/cvtt/ConvectionKernels_ETC2_Rounding.h b/thirdparty/cvtt/ConvectionKernels_ETC2_Rounding.h
new file mode 100644
index 0000000000..a4f5a3ddfa
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_ETC2_Rounding.h
@@ -0,0 +1,27 @@
+#pragma once
+#include <stdint.h>
+
+// This file is generated by the MakeTables app.  Do not edit this file manually.
+
+namespace cvtt { namespace Tables { namespace ETC2 {
+    const int g_alphaRoundingTableWidth = 13;
+    const uint8_t g_alphaRoundingTables[16][13] =
+    {
+        { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3 },
+        { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3 },
+        { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3 },
+        { 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 },
+        { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 },
+        { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3 },
+        { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 },
+        { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3 },
+        { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 },
+        { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 },
+        { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 },
+        { 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3 },
+        { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3 },
+        { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3 },
+        { 0, 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3 },
+        { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3 },
+    };
+}}}
diff --git a/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h b/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h
new file mode 100644
index 0000000000..c1276553b2
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h
@@ -0,0 +1,181 @@
+#pragma once
+#ifndef __CVTT_ENDPOINTREFINER_H__
+#define __CVTT_ENDPOINTREFINER_H__
+
+#include "ConvectionKernels_ParallelMath.h"
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        // Solve for a, b where v = a*t + b
+        // This allows endpoints to be mapped to where T=0 and T=1
+        // Least squares from totals:
+        // a = (tv - t*v/w)/(tt - t*t/w)
+        // b = (v - a*t)/w
+        template<int TVectorSize>
+        class EndpointRefiner
+        {
+        public:
+            typedef ParallelMath::Float MFloat;
+            typedef ParallelMath::UInt16 MUInt16;
+            typedef ParallelMath::UInt15 MUInt15;
+            typedef ParallelMath::AInt16 MAInt16;
+            typedef ParallelMath::SInt16 MSInt16;
+            typedef ParallelMath::SInt32 MSInt32;
+
+            MFloat m_tv[TVectorSize];
+            MFloat m_v[TVectorSize];
+            MFloat m_tt;
+            MFloat m_t;
+            MFloat m_w;
+            int m_wu;
+
+            float m_rcpMaxIndex;
+            float m_channelWeights[TVectorSize];
+            float m_rcpChannelWeights[TVectorSize];
+
+            void Init(int indexRange, const float channelWeights[TVectorSize])
+            {
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    m_tv[ch] = ParallelMath::MakeFloatZero();
+                    m_v[ch] = ParallelMath::MakeFloatZero();
+                }
+                m_tt = ParallelMath::MakeFloatZero();
+                m_t = ParallelMath::MakeFloatZero();
+                m_w = ParallelMath::MakeFloatZero();
+
+                m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1);
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    m_channelWeights[ch] = channelWeights[ch];
+                    m_rcpChannelWeights[ch] = 1.0f;
+                    if (m_channelWeights[ch] != 0.0f)
+                        m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch];
+                }
+
+                m_wu = 0;
+            }
+
+            void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight)
+            {
+                MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    MFloat v = pwFloatPixel[ch] * weight;
+
+                    m_tv[ch] = m_tv[ch] + t * v;
+                    m_v[ch] = m_v[ch] + v;
+                }
+                m_tt = m_tt + weight * t * t;
+                m_t = m_t + weight * t;
+                m_w = m_w + weight;
+            }
+
+            void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels)
+            {
+                MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
+
+                for (int ch = 0; ch < numRealChannels; ch++)
+                {
+                    MFloat v = pwFloatPixel[ch];
+
+                    m_tv[ch] = m_tv[ch] + t * v;
+                    m_v[ch] = m_v[ch] + v;
+                }
+                m_tt = m_tt + t * t;
+                m_t = m_t + t;
+                m_wu++;
+            }
+
+            void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index)
+            {
+                ContributeUnweightedPW(floatPixel, index, TVectorSize);
+            }
+
+            void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize])
+            {
+                // a = (tv - t*v/w)/(tt - t*t/w)
+                // b = (v - a*t)/w
+                MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));
+
+                ParallelMath::MakeSafeDenominator(w);
+                MFloat wRcp = ParallelMath::Reciprocal(w);
+
+                MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;
+
+                ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());
+                ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f));
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    /*
+                    if (adenom == 0.0)
+                    p1 = p2 = er.v / er.w;
+                    else
+                    {
+                    float4 a = (er.tv - er.t*er.v / er.w) / adenom;
+                    float4 b = (er.v - a * er.t) / er.w;
+                    p1 = b;
+                    p2 = a + b;
+                    }
+                    */
+
+                    MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;
+                    MFloat b = (m_v[ch] - a * m_t) * wRcp;
+
+                    MFloat p1 = b;
+                    MFloat p2 = a + b;
+
+                    ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));
+                    ParallelMath::ConditionalSet(p2, adenomZero, p1);
+
+                    // Unweight
+                    float inverseWeight = m_rcpChannelWeights[ch];
+
+                    endPoint[0][ch] = p1 * inverseWeight;
+                    endPoint[1][ch] = p2 * inverseWeight;
+                }
+            }
+
+            void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
+            {
+                MFloat floatEndPoint[2][TVectorSize];
+                GetRefinedEndpoints(floatEndPoint);
+
+                for (int epi = 0; epi < 2; epi++)
+                    for (int ch = 0; ch < TVectorSize; ch++)
+                        endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode);
+            }
+
+            void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)
+            {
+                GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);
+            }
+
+            void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)
+            {
+                MFloat floatEndPoint[2][TVectorSize];
+                GetRefinedEndpoints(floatEndPoint);
+
+                for (int epi = 0; epi < 2; epi++)
+                {
+                    for (int ch = 0; ch < TVectorSize; ch++)
+                    {
+                        MFloat f = floatEndPoint[epi][ch];
+                        if (isSigned)
+                            endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode));
+                        else
+                            endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode));
+                    }
+                }
+            }
+        };
+    }
+}
+
+#endif
+
diff --git a/thirdparty/cvtt/ConvectionKernels_EndpointSelector.h b/thirdparty/cvtt/ConvectionKernels_EndpointSelector.h
new file mode 100644
index 0000000000..e09dfd248c
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_EndpointSelector.h
@@ -0,0 +1,153 @@
+#pragma once
+#ifndef __CVTT_ENDPOINTSELECTOR_H__
+#define __CVTT_ENDPOINTSELECTOR_H__
+
+#include "ConvectionKernels_ParallelMath.h"
+#include "ConvectionKernels_UnfinishedEndpoints.h"
+#include "ConvectionKernels_PackedCovarianceMatrix.h"
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        static const int NumEndpointSelectorPasses = 3;
+
+        template<int TVectorSize, int TIterationCount>
+        class EndpointSelector
+        {
+        public:
+            typedef ParallelMath::Float MFloat;
+
+            EndpointSelector()
+            {
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    m_centroid[ch] = ParallelMath::MakeFloatZero();
+                    m_direction[ch] = ParallelMath::MakeFloatZero();
+                }
+                m_weightTotal = ParallelMath::MakeFloatZero();
+                m_minDist = ParallelMath::MakeFloat(FLT_MAX);
+                m_maxDist = ParallelMath::MakeFloat(-FLT_MAX);
+            }
+
+            void ContributePass(const MFloat *value, int pass, const MFloat &weight)
+            {
+                if (pass == 0)
+                    ContributeCentroid(value, weight);
+                else if (pass == 1)
+                    ContributeDirection(value, weight);
+                else if (pass == 2)
+                    ContributeMinMax(value);
+            }
+
+            void FinishPass(int pass)
+            {
+                if (pass == 0)
+                    FinishCentroid();
+                else if (pass == 1)
+                    FinishDirection();
+            }
+
+            UnfinishedEndpoints<TVectorSize> GetEndpoints(const float channelWeights[TVectorSize]) const
+            {
+                MFloat unweightedBase[TVectorSize];
+                MFloat unweightedOffset[TVectorSize];
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    MFloat min = m_centroid[ch] + m_direction[ch] * m_minDist;
+                    MFloat max = m_centroid[ch] + m_direction[ch] * m_maxDist;
+
+                    float safeWeight = channelWeights[ch];
+                    if (safeWeight == 0.f)
+                        safeWeight = 1.0f;
+
+                    unweightedBase[ch] = min / channelWeights[ch];
+                    unweightedOffset[ch] = (max - min) / channelWeights[ch];
+                }
+
+                return UnfinishedEndpoints<TVectorSize>(unweightedBase, unweightedOffset);
+            }
+
+        private:
+            void ContributeCentroid(const MFloat *value, const MFloat &weight)
+            {
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    m_centroid[ch] = m_centroid[ch] + value[ch] * weight;
+                m_weightTotal = m_weightTotal + weight;
+            }
+
+            void FinishCentroid()
+            {
+                MFloat denom = m_weightTotal;
+                ParallelMath::MakeSafeDenominator(denom);
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    m_centroid[ch] = m_centroid[ch] / denom;
+            }
+
+            void ContributeDirection(const MFloat *value, const MFloat &weight)
+            {
+                MFloat diff[TVectorSize];
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    diff[ch] = value[ch] - m_centroid[ch];
+
+                m_covarianceMatrix.Add(diff, weight);
+            }
+
+            void FinishDirection()
+            {
+                MFloat approx[TVectorSize];
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    approx[ch] = ParallelMath::MakeFloat(1.0f);
+
+                for (int i = 0; i < TIterationCount; i++)
+                {
+                    MFloat product[TVectorSize];
+                    m_covarianceMatrix.Product(product, approx);
+
+                    MFloat largestComponent = product[0];
+                    for (int ch = 1; ch < TVectorSize; ch++)
+                        largestComponent = ParallelMath::Max(largestComponent, product[ch]);
+
+                    // product = largestComponent*newApprox
+                    ParallelMath::MakeSafeDenominator(largestComponent);
+                    for (int ch = 0; ch < TVectorSize; ch++)
+                        approx[ch] = product[ch] / largestComponent;
+                }
+
+                // Normalize
+                MFloat approxLen = ParallelMath::MakeFloatZero();
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    approxLen = approxLen + approx[ch] * approx[ch];
+
+                approxLen = ParallelMath::Sqrt(approxLen);
+
+                ParallelMath::MakeSafeDenominator(approxLen);
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    m_direction[ch] = approx[ch] / approxLen;
+            }
+
+            void ContributeMinMax(const MFloat *value)
+            {
+                MFloat dist = ParallelMath::MakeFloatZero();
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    dist = dist + m_direction[ch] * (value[ch] - m_centroid[ch]);
+
+                m_minDist = ParallelMath::Min(m_minDist, dist);
+                m_maxDist = ParallelMath::Max(m_maxDist, dist);
+            }
+
+            ParallelMath::Float m_centroid[TVectorSize];
+            ParallelMath::Float m_direction[TVectorSize];
+            PackedCovarianceMatrix<TVectorSize> m_covarianceMatrix;
+            ParallelMath::Float m_weightTotal;
+
+            ParallelMath::Float m_minDist;
+            ParallelMath::Float m_maxDist;
+        };
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_FakeBT709_Rounding.h b/thirdparty/cvtt/ConvectionKernels_FakeBT709_Rounding.h
new file mode 100644
index 0000000000..1eb924befe
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_FakeBT709_Rounding.h
@@ -0,0 +1,282 @@
+#pragma once
+#include <stdint.h>
+
+// This file is generated by the MakeTables app.  Do not edit this file manually.
+
+namespace cvtt { namespace Tables { namespace FakeBT709 {
+    const uint8_t g_rounding16[] =
+    {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
+
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
+
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
+
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
+
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
+
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
+
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 4, 4, 4, 4, 4, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
+
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
+        0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
+
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
+
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
+        3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
+
+    };
+}}}
diff --git a/thirdparty/cvtt/ConvectionKernels_IndexSelector.cpp b/thirdparty/cvtt/ConvectionKernels_IndexSelector.cpp
new file mode 100644
index 0000000000..b3d1b5497e
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_IndexSelector.cpp
@@ -0,0 +1,66 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-------------------------------------------------------------------------------------
+
+Portions based on DirectX Texture Library (DirectXTex)
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+
+http://go.microsoft.com/fwlink/?LinkId=248926
+*/
+#include "ConvectionKernels_Config.h"
+
+#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
+
+#include "ConvectionKernels_IndexSelector.h"
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        const ParallelMath::UInt16 g_weightReciprocals[17] =
+        {
+            ParallelMath::MakeUInt16(0),        // -1 
+            ParallelMath::MakeUInt16(0),        // 0
+            ParallelMath::MakeUInt16(32768),    // 1
+            ParallelMath::MakeUInt16(16384),    // 2
+            ParallelMath::MakeUInt16(10923),    // 3
+            ParallelMath::MakeUInt16(8192),     // 4
+            ParallelMath::MakeUInt16(6554),     // 5
+            ParallelMath::MakeUInt16(5461),     // 6
+            ParallelMath::MakeUInt16(4681),     // 7
+            ParallelMath::MakeUInt16(4096),     // 8
+            ParallelMath::MakeUInt16(3641),     // 9
+            ParallelMath::MakeUInt16(3277),     // 10
+            ParallelMath::MakeUInt16(2979),     // 11
+            ParallelMath::MakeUInt16(2731),     // 12
+            ParallelMath::MakeUInt16(2521),     // 13
+            ParallelMath::MakeUInt16(2341),     // 14
+            ParallelMath::MakeUInt16(2185),     // 15
+        };
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_IndexSelector.h b/thirdparty/cvtt/ConvectionKernels_IndexSelector.h
new file mode 100644
index 0000000000..0f9d209183
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_IndexSelector.h
@@ -0,0 +1,147 @@
+#pragma once
+#ifndef __CVTT_INDEXSELECTOR_H__
+#define __CVTT_INDEXSELECTOR_H__
+
+#include "ConvectionKernels_ParallelMath.h"
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        extern const ParallelMath::UInt16 g_weightReciprocals[17];
+
+        template<int TVectorSize>
+        class IndexSelector
+        {
+        public:
+            typedef ParallelMath::Float MFloat;
+            typedef ParallelMath::UInt16 MUInt16;
+            typedef ParallelMath::UInt15 MUInt15;
+            typedef ParallelMath::SInt16 MSInt16;
+            typedef ParallelMath::AInt16 MAInt16;
+            typedef ParallelMath::SInt32 MSInt32;
+            typedef ParallelMath::UInt31 MUInt31;
+
+
+            template<class TInterpolationEPType, class TColorEPType>
+            void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range)
+            {
+                // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space.
+                // We need to select indexes using the color-space endpoints.
+
+                m_isUniform = true;
+                for (int ch = 1; ch < TVectorSize; ch++)
+                {
+                    if (channelWeights[ch] != channelWeights[0])
+                        m_isUniform = false;
+                }
+
+                // To work with channel weights, we need something where:
+                // pxDiff = px - ep[0]
+                // epDiff = ep[1] - ep[0]
+                //
+                // weightedEPDiff = epDiff * channelWeights
+                // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff)
+                // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff)
+                // index = normalizedIndex * maxValue
+                //
+                // Equivalent to:
+                // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights)
+                // index = dot(axis, pxDiff)
+
+                for (int ep = 0; ep < 2; ep++)
+                    for (int ch = 0; ch < TVectorSize; ch++)
+                        m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]);
+
+                m_range = range;
+                m_maxValue = static_cast<float>(range - 1);
+
+                MFloat epDiffWeighted[TVectorSize];
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]);
+                    MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]);
+                    epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch];
+                }
+
+                MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0];
+                for (int ch = 1; ch < TVectorSize; ch++)
+                    lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch];
+
+                ParallelMath::MakeSafeDenominator(lenSquared);
+
+                MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared;
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared;
+            }
+
+            template<bool TSigned>
+            void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range)
+            {
+                MAInt16 converted[2][TVectorSize];
+                for (int epi = 0; epi < 2; epi++)
+                    for (int ch = 0; ch < TVectorSize; ch++)
+                        converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]);
+
+                Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range);
+            }
+
+            void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
+            {
+                MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
+
+                for (int ch = 0; ch < numRealChannels; ch++)
+                {
+                    MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
+                    MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
+                    pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6));
+                }
+            }
+
+            void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
+            {
+                MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7));
+
+                for (int ch = 0; ch < numRealChannels; ch++)
+                {
+                    MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
+                    MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
+                    pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8));
+                }
+            }
+
+            void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel)
+            {
+                ReconstructLDR_BC7(index, pixel, TVectorSize);
+            }
+
+            void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel)
+            {
+                ReconstructLDRPrecise(index, pixel, TVectorSize);
+            }
+
+            MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
+            {
+                MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0];
+                for (int ch = 1; ch < TVectorSize; ch++)
+                    dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch];
+
+                return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn);
+            }
+
+        protected:
+            MAInt16 m_endPoint[2][TVectorSize];
+
+        private:
+            MFloat m_origin[TVectorSize];
+            MFloat m_axis[TVectorSize];
+            int m_range;
+            float m_maxValue;
+            bool m_isUniform;
+        };
+    }
+}
+
+#endif
+
diff --git a/thirdparty/cvtt/ConvectionKernels_IndexSelectorHDR.h b/thirdparty/cvtt/ConvectionKernels_IndexSelectorHDR.h
new file mode 100644
index 0000000000..84795cd689
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_IndexSelectorHDR.h
@@ -0,0 +1,155 @@
+#pragma once
+#ifndef __CVTT_INDEXSELECTORHDR_H__
+#define __CVTT_INDEXSELECTORHDR_H__
+
+#include "ConvectionKernels_ParallelMath.h"
+#include "ConvectionKernels_IndexSelector.h"
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v);
+        ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v);
+
+        template<int TVectorSize>
+        class IndexSelectorHDR : public IndexSelector<TVectorSize>
+        {
+        public:
+            typedef ParallelMath::UInt15 MUInt15;
+            typedef ParallelMath::UInt16 MUInt16;
+            typedef ParallelMath::UInt31 MUInt31;
+            typedef ParallelMath::SInt16 MSInt16;
+            typedef ParallelMath::SInt32 MSInt32;
+            typedef ParallelMath::Float MFloat;
+
+        private:
+
+            MUInt15 InvertSingle(const MUInt15& anIndex) const
+            {
+                MUInt15 inverted = m_maxValueMinusOne - anIndex;
+                return ParallelMath::Select(m_isInverted, inverted, anIndex);
+            }
+
+            void ReconstructHDRSignedUninverted(const MUInt15 &index, MSInt16* pixel) const
+            {
+                MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    MSInt16 ep0 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[0][ch]);
+                    MSInt16 ep1 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[1][ch]);
+
+                    MSInt32 pixel32 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1);
+
+                    pixel32 = ParallelMath::RightShift(pixel32 + ParallelMath::MakeSInt32(32), 6);
+
+                    pixel[ch] = UnscaleHDRValueSigned(ParallelMath::ToSInt16(pixel32));
+                }
+            }
+
+            void ReconstructHDRUnsignedUninverted(const MUInt15 &index, MSInt16* pixel) const
+            {
+                MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    MUInt16 ep0 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[0][ch]);
+                    MUInt16 ep1 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[1][ch]);
+
+                    MUInt31 pixel31 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1);
+
+                    pixel31 = ParallelMath::RightShift(pixel31 + ParallelMath::MakeUInt31(32), 6);
+
+                    pixel[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::ToUInt16(pixel31)));
+                }
+            }
+
+            MFloat ErrorForInterpolatorComponent(int index, int ch, const MFloat *pixel) const
+            {
+                MFloat diff = pixel[ch] - m_reconstructedInterpolators[index][ch];
+                return diff * diff;
+            }
+
+            MFloat ErrorForInterpolator(int index, const MFloat *pixel) const
+            {
+                MFloat error = ErrorForInterpolatorComponent(index, 0, pixel);
+                for (int ch = 1; ch < TVectorSize; ch++)
+                    error = error + ErrorForInterpolatorComponent(index, ch, pixel);
+                return error;
+            }
+
+        public:
+
+            void InitHDR(int range, bool isSigned, bool fastIndexing, const float *channelWeights)
+            {
+                assert(range <= 16);
+
+                m_range = range;
+
+                m_isInverted = ParallelMath::MakeBoolInt16(false);
+                m_maxValueMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(range - 1));
+
+                if (!fastIndexing)
+                {
+                    for (int i = 0; i < range; i++)
+                    {
+                        MSInt16 recon2CL[TVectorSize];
+
+                        if (isSigned)
+                            ReconstructHDRSignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL);
+                        else
+                            ReconstructHDRUnsignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL);
+
+                        for (int ch = 0; ch < TVectorSize; ch++)
+                            m_reconstructedInterpolators[i][ch] = ParallelMath::TwosCLHalfToFloat(recon2CL[ch]) * channelWeights[ch];
+                    }
+                }
+            }
+
+            void ReconstructHDRSigned(const MUInt15 &index, MSInt16* pixel) const
+            {
+                ReconstructHDRSignedUninverted(InvertSingle(index), pixel);
+            }
+
+            void ReconstructHDRUnsigned(const MUInt15 &index, MSInt16* pixel) const
+            {
+                ReconstructHDRUnsignedUninverted(InvertSingle(index), pixel);
+            }
+
+            void ConditionalInvert(const ParallelMath::Int16CompFlag &invert)
+            {
+                m_isInverted = invert;
+            }
+
+            MUInt15 SelectIndexHDRSlow(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope*) const
+            {
+                MUInt15 index = ParallelMath::MakeUInt15(0);
+
+                MFloat bestError = ErrorForInterpolator(0, pixel);
+                for (int i = 1; i < m_range; i++)
+                {
+                    MFloat error = ErrorForInterpolator(i, pixel);
+                    ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
+                    ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(static_cast<uint16_t>(i)));
+                    bestError = ParallelMath::Min(bestError, error);
+                }
+
+                return InvertSingle(index);
+            }
+
+            MUInt15 SelectIndexHDRFast(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
+            {
+                return InvertSingle(this->SelectIndexLDR(pixel, rtn));
+            }
+
+        private:
+            MFloat m_reconstructedInterpolators[16][TVectorSize];
+            ParallelMath::Int16CompFlag m_isInverted;
+            MUInt15 m_maxValueMinusOne;
+            int m_range;
+        };
+    }
+}
+#endif
+
diff --git a/thirdparty/cvtt/ConvectionKernels_PackedCovarianceMatrix.h b/thirdparty/cvtt/ConvectionKernels_PackedCovarianceMatrix.h
new file mode 100644
index 0000000000..7ac3d4fdda
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_PackedCovarianceMatrix.h
@@ -0,0 +1,68 @@
+#pragma once
+#ifndef __CVTT_COVARIANCEMATRIX_H__
+#define __CVTT_COVARIANCEMATRIX_H__
+
+namespace cvtt
+{
+    namespace Internal
+    {
+
+        template<int TMatrixSize>
+        class PackedCovarianceMatrix
+        {
+        public:
+            // 0: xx,
+            // 1: xy, yy
+            // 3: xz, yz, zz 
+            // 6: xw, yw, zw, ww
+            // ... etc.
+            static const int PyramidSize = (TMatrixSize * (TMatrixSize + 1)) / 2;
+
+            typedef ParallelMath::Float MFloat;
+
+            PackedCovarianceMatrix()
+            {
+                for (int i = 0; i < PyramidSize; i++)
+                    m_values[i] = ParallelMath::MakeFloatZero();
+            }
+
+            void Add(const ParallelMath::Float *vec, const ParallelMath::Float &weight)
+            {
+                int index = 0;
+                for (int row = 0; row < TMatrixSize; row++)
+                {
+                    for (int col = 0; col <= row; col++)
+                    {
+                        m_values[index] = m_values[index] + vec[row] * vec[col] * weight;
+                        index++;
+                    }
+                }
+            }
+
+            void Product(MFloat *outVec, const MFloat *inVec)
+            {
+                for (int row = 0; row < TMatrixSize; row++)
+                {
+                    MFloat sum = ParallelMath::MakeFloatZero();
+
+                    int index = (row * (row + 1)) >> 1;
+                    for (int col = 0; col < TMatrixSize; col++)
+                    {
+                        sum = sum + inVec[col] * m_values[index];
+                        if (col >= row)
+                            index += col + 1;
+                        else
+                            index++;
+                    }
+
+                    outVec[row] = sum;
+                }
+            }
+
+        private:
+            ParallelMath::Float m_values[PyramidSize];
+        };
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_ParallelMath.h b/thirdparty/cvtt/ConvectionKernels_ParallelMath.h
new file mode 100644
index 0000000000..9e25280f45
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_ParallelMath.h
@@ -0,0 +1,1816 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+#pragma once
+#ifndef __CVTT_PARALLELMATH_H__
+#define __CVTT_PARALLELMATH_H__
+
+#include "ConvectionKernels.h"
+#include "ConvectionKernels_Config.h"
+
+#ifdef CVTT_USE_SSE2
+#include <emmintrin.h>
+#endif
+
+#include <float.h>
+#include <assert.h>
+#include <string.h>
+#include <algorithm>
+#include <math.h>
+
+#define UNREFERENCED_PARAMETER(n) ((void)n)
+
+// Parallel math implementation
+//
+// After preprocessor defs are handled, what this should do is expose the following types:
+// SInt16 - Signed 16-bit integer
+// UInt16 - Signed 16-bit integer
+// UInt15 - Unsigned 15-bit integer
+// SInt32 - Signed 32-bit integer
+// UInt31 - Unsigned 31-bit integer
+// AInt16 - 16-bit integer of unknown signedness (only used for storage)
+// Int16CompFlag - Comparison flags from comparing 16-bit integers
+// Int32CompFlag - Comparison flags from comparing 32-bit integers
+// FloatCompFlag - Comparison flags from comparing 32-bit floats
+//
+// The reason for these distinctions are that depending on the instruction set, signed or unsigned versions of certain ops
+// (particularly max, min, compares, and right shift) may not be available.  In cases where ops are not available, it's
+// necessary to do high bit manipulations to accomplish the operation with 16-bit numbers.  The 15-bit and 31-bit uint types
+// can elide the bit flips if unsigned versions are not available.
+
+namespace cvtt
+{
+#ifdef CVTT_USE_SSE2
+    // SSE2 version
+    struct ParallelMath
+    {
+        typedef uint16_t ScalarUInt16;
+        typedef int16_t ScalarSInt16;
+
+        template<unsigned int TRoundingMode>
+        struct RoundForScope
+        {
+            unsigned int m_oldCSR;
+
+            RoundForScope()
+            {
+                m_oldCSR = _mm_getcsr();
+                _mm_setcsr((m_oldCSR & ~_MM_ROUND_MASK) | (TRoundingMode));
+            }
+
+            ~RoundForScope()
+            {
+                _mm_setcsr(m_oldCSR);
+            }
+        };
+
+        struct RoundTowardZeroForScope : RoundForScope<_MM_ROUND_TOWARD_ZERO>
+        {
+        };
+
+        struct RoundTowardNearestForScope : RoundForScope<_MM_ROUND_NEAREST>
+        {
+        };
+
+        struct RoundUpForScope : RoundForScope<_MM_ROUND_UP>
+        {
+        };
+
+        struct RoundDownForScope : RoundForScope<_MM_ROUND_DOWN>
+        {
+        };
+
+        static const int ParallelSize = 8;
+
+        enum Int16Subtype
+        {
+            IntSubtype_Signed,
+            IntSubtype_UnsignedFull,
+            IntSubtype_UnsignedTruncated,
+            IntSubtype_Abstract,
+        };
+
+        template<int TSubtype>
+        struct VInt16
+        {
+            __m128i m_value;
+
+            inline VInt16 operator+(int16_t other) const
+            {
+                VInt16 result;
+                result.m_value = _mm_add_epi16(m_value, _mm_set1_epi16(static_cast<int16_t>(other)));
+                return result;
+            }
+
+            inline VInt16 operator+(const VInt16 &other) const
+            {
+                VInt16 result;
+                result.m_value = _mm_add_epi16(m_value, other.m_value);
+                return result;
+            }
+
+            inline VInt16 operator|(const VInt16 &other) const
+            {
+                VInt16 result;
+                result.m_value = _mm_or_si128(m_value, other.m_value);
+                return result;
+            }
+
+            inline VInt16 operator&(const VInt16 &other) const
+            {
+                VInt16 result;
+                result.m_value = _mm_and_si128(m_value, other.m_value);
+                return result;
+            }
+
+            inline VInt16 operator-(const VInt16 &other) const
+            {
+                VInt16 result;
+                result.m_value = _mm_sub_epi16(m_value, other.m_value);
+                return result;
+            }
+
+            inline VInt16 operator<<(int bits) const
+            {
+                VInt16 result;
+                result.m_value = _mm_slli_epi16(m_value, bits);
+                return result;
+            }
+
+            inline VInt16 operator^(const VInt16 &other) const
+            {
+                VInt16 result;
+                result.m_value = _mm_xor_si128(m_value, other.m_value);
+                return result;
+            }
+        };
+
+        typedef VInt16<IntSubtype_Signed> SInt16;
+        typedef VInt16<IntSubtype_UnsignedFull> UInt16;
+        typedef VInt16<IntSubtype_UnsignedTruncated> UInt15;
+        typedef VInt16<IntSubtype_Abstract> AInt16;
+
+        template<int TSubtype>
+        struct VInt32
+        {
+            __m128i m_values[2];
+
+            inline VInt32 operator+(const VInt32& other) const
+            {
+                VInt32 result;
+                result.m_values[0] = _mm_add_epi32(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_add_epi32(m_values[1], other.m_values[1]);
+                return result;
+            }
+
+            inline VInt32 operator-(const VInt32& other) const
+            {
+                VInt32 result;
+                result.m_values[0] = _mm_sub_epi32(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_sub_epi32(m_values[1], other.m_values[1]);
+                return result;
+            }
+
+            inline VInt32 operator<<(const int other) const
+            {
+                VInt32 result;
+                result.m_values[0] = _mm_slli_epi32(m_values[0], other);
+                result.m_values[1] = _mm_slli_epi32(m_values[1], other);
+                return result;
+            }
+
+            inline VInt32 operator|(const VInt32& other) const
+            {
+                VInt32 result;
+                result.m_values[0] = _mm_or_si128(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_or_si128(m_values[1], other.m_values[1]);
+                return result;
+            }
+        };
+
+        typedef VInt32<IntSubtype_Signed> SInt32;
+        typedef VInt32<IntSubtype_UnsignedTruncated> UInt31;
+        typedef VInt32<IntSubtype_UnsignedFull> UInt32;
+        typedef VInt32<IntSubtype_Abstract> AInt32;
+
+        template<class TTargetType>
+        struct LosslessCast
+        {
+#ifdef CVTT_PERMIT_ALIASING
+            template<int TSrcSubtype>
+            static const TTargetType& Cast(const VInt32<TSrcSubtype> &src)
+            {
+                return reinterpret_cast<VInt32<TSubtype>&>(src);
+            }
+
+            template<int TSrcSubtype>
+            static const TTargetType& Cast(const VInt16<TSrcSubtype> &src)
+            {
+                return reinterpret_cast<VInt16<TSubtype>&>(src);
+            }
+#else
+            template<int TSrcSubtype>
+            static TTargetType Cast(const VInt32<TSrcSubtype> &src)
+            {
+                TTargetType result;
+                result.m_values[0] = src.m_values[0];
+                result.m_values[1] = src.m_values[1];
+                return result;
+            }
+
+            template<int TSrcSubtype>
+            static TTargetType Cast(const VInt16<TSrcSubtype> &src)
+            {
+                TTargetType result;
+                result.m_value = src.m_value;
+                return result;
+            }
+#endif
+        };
+
+        struct Int64
+        {
+            __m128i m_values[4];
+        };
+
+        struct Float
+        {
+            __m128 m_values[2];
+
+            inline Float operator+(const Float &other) const
+            {
+                Float result;
+                result.m_values[0] = _mm_add_ps(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_add_ps(m_values[1], other.m_values[1]);
+                return result;
+            }
+
+            inline Float operator+(float other) const
+            {
+                Float result;
+                result.m_values[0] = _mm_add_ps(m_values[0], _mm_set1_ps(other));
+                result.m_values[1] = _mm_add_ps(m_values[1], _mm_set1_ps(other));
+                return result;
+            }
+
+            inline Float operator-(const Float& other) const
+            {
+                Float result;
+                result.m_values[0] = _mm_sub_ps(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_sub_ps(m_values[1], other.m_values[1]);
+                return result;
+            }
+
+            inline Float operator-() const
+            {
+                Float result;
+                result.m_values[0] = _mm_sub_ps(_mm_setzero_ps(), m_values[0]);
+                result.m_values[1] = _mm_sub_ps(_mm_setzero_ps(), m_values[1]);
+                return result;
+            }
+
+            inline Float operator*(const Float& other) const
+            {
+                Float result;
+                result.m_values[0] = _mm_mul_ps(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_mul_ps(m_values[1], other.m_values[1]);
+                return result;
+            }
+
+            inline Float operator*(float other) const
+            {
+                Float result;
+                result.m_values[0] = _mm_mul_ps(m_values[0], _mm_set1_ps(other));
+                result.m_values[1] = _mm_mul_ps(m_values[1], _mm_set1_ps(other));
+                return result;
+            }
+
+            inline Float operator/(const Float &other) const
+            {
+                Float result;
+                result.m_values[0] = _mm_div_ps(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_div_ps(m_values[1], other.m_values[1]);
+                return result;
+            }
+
+            inline Float operator/(float other) const
+            {
+                Float result;
+                result.m_values[0] = _mm_div_ps(m_values[0], _mm_set1_ps(other));
+                result.m_values[1] = _mm_div_ps(m_values[1], _mm_set1_ps(other));
+                return result;
+            }
+        };
+
+        struct Int16CompFlag
+        {
+            __m128i m_value;
+
+            inline Int16CompFlag operator&(const Int16CompFlag &other) const
+            {
+                Int16CompFlag result;
+                result.m_value = _mm_and_si128(m_value, other.m_value);
+                return result;
+            }
+
+            inline Int16CompFlag operator|(const Int16CompFlag &other) const
+            {
+                Int16CompFlag result;
+                result.m_value = _mm_or_si128(m_value, other.m_value);
+                return result;
+            }
+        };
+
+        struct Int32CompFlag
+        {
+            __m128i m_values[2];
+
+            inline Int32CompFlag operator&(const Int32CompFlag &other) const
+            {
+                Int32CompFlag result;
+                result.m_values[0] = _mm_and_si128(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_and_si128(m_values[1], other.m_values[1]);
+                return result;
+            }
+
+            inline Int32CompFlag operator|(const Int32CompFlag &other) const
+            {
+                Int32CompFlag result;
+                result.m_values[0] = _mm_or_si128(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_or_si128(m_values[1], other.m_values[1]);
+                return result;
+            }
+        };
+
+        struct FloatCompFlag
+        {
+            __m128 m_values[2];
+
+            inline FloatCompFlag operator&(const FloatCompFlag &other) const
+            {
+                FloatCompFlag result;
+                result.m_values[0] = _mm_and_ps(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_and_ps(m_values[1], other.m_values[1]);
+                return result;
+            }
+
+            inline FloatCompFlag operator|(const FloatCompFlag &other) const
+            {
+                FloatCompFlag result;
+                result.m_values[0] = _mm_or_ps(m_values[0], other.m_values[0]);
+                result.m_values[1] = _mm_or_ps(m_values[1], other.m_values[1]);
+                return result;
+            }
+        };
+
+        template<int TSubtype>
+        static VInt16<TSubtype> AbstractAdd(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b)
+        {
+            VInt16<TSubtype> result;
+            result.m_value = _mm_add_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        template<int TSubtype>
+        static VInt16<TSubtype> AbstractSubtract(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b)
+        {
+            VInt16<TSubtype> result;
+            result.m_value = _mm_sub_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static Float Select(const FloatCompFlag &flag, const Float &a, const Float &b)
+        {
+            Float result;
+            for (int i = 0; i < 2; i++)
+                result.m_values[i] = _mm_or_ps(_mm_and_ps(flag.m_values[i], a.m_values[i]), _mm_andnot_ps(flag.m_values[i], b.m_values[i]));
+            return result;
+        }
+
+        template<int TSubtype>
+        static VInt16<TSubtype> Select(const Int16CompFlag &flag, const VInt16<TSubtype> &a, const VInt16<TSubtype> &b)
+        {
+            VInt16<TSubtype> result;
+            result.m_value = _mm_or_si128(_mm_and_si128(flag.m_value, a.m_value), _mm_andnot_si128(flag.m_value, b.m_value));
+            return result;
+        }
+
+        template<int TSubtype>
+        static VInt16<TSubtype> SelectOrZero(const Int16CompFlag &flag, const VInt16<TSubtype> &a)
+        {
+            VInt16<TSubtype> result;
+            result.m_value = _mm_and_si128(flag.m_value, a.m_value);
+            return result;
+        }
+
+        template<int TSubtype>
+        static void ConditionalSet(VInt16<TSubtype> &dest, const Int16CompFlag &flag, const VInt16<TSubtype> &src)
+        {
+            dest.m_value = _mm_or_si128(_mm_andnot_si128(flag.m_value, dest.m_value), _mm_and_si128(flag.m_value, src.m_value));
+        }
+
+        template<int TSubtype>
+        static void ConditionalSet(VInt32<TSubtype> &dest, const Int16CompFlag &flag, const VInt32<TSubtype> &src)
+        {
+            __m128i lowFlags = _mm_unpacklo_epi16(flag.m_value, flag.m_value);
+            __m128i highFlags = _mm_unpackhi_epi16(flag.m_value, flag.m_value);
+            dest.m_values[0] = _mm_or_si128(_mm_andnot_si128(lowFlags, dest.m_values[0]), _mm_and_si128(lowFlags, src.m_values[0]));
+            dest.m_values[1] = _mm_or_si128(_mm_andnot_si128(highFlags, dest.m_values[1]), _mm_and_si128(highFlags, src.m_values[1]));
+        }
+
+        static void ConditionalSet(ParallelMath::Int16CompFlag &dest, const Int16CompFlag &flag, const ParallelMath::Int16CompFlag &src)
+        {
+            dest.m_value = _mm_or_si128(_mm_andnot_si128(flag.m_value, dest.m_value), _mm_and_si128(flag.m_value, src.m_value));
+        }
+
+        static SInt16 ConditionalNegate(const Int16CompFlag &flag, const SInt16 &v)
+        {
+            SInt16 result;
+            result.m_value = _mm_add_epi16(_mm_xor_si128(flag.m_value, v.m_value), _mm_srli_epi16(flag.m_value, 15));
+            return result;
+        }
+
+        template<int TSubtype>
+        static void NotConditionalSet(VInt16<TSubtype> &dest, const Int16CompFlag &flag, const VInt16<TSubtype> &src)
+        {
+            dest.m_value = _mm_or_si128(_mm_and_si128(flag.m_value, dest.m_value), _mm_andnot_si128(flag.m_value, src.m_value));
+        }
+
+        static void ConditionalSet(Float &dest, const FloatCompFlag &flag, const Float &src)
+        {
+            for (int i = 0; i < 2; i++)
+                dest.m_values[i] = _mm_or_ps(_mm_andnot_ps(flag.m_values[i], dest.m_values[i]), _mm_and_ps(flag.m_values[i], src.m_values[i]));
+        }
+
+        static void NotConditionalSet(Float &dest, const FloatCompFlag &flag, const Float &src)
+        {
+            for (int i = 0; i < 2; i++)
+                dest.m_values[i] = _mm_or_ps(_mm_and_ps(flag.m_values[i], dest.m_values[i]), _mm_andnot_ps(flag.m_values[i], src.m_values[i]));
+        }
+
+        static void MakeSafeDenominator(Float& v)
+        {
+            ConditionalSet(v, Equal(v, MakeFloatZero()), MakeFloat(1.0f));
+        }
+
+        static SInt16 TruncateToPrecisionSigned(const SInt16 &v, int precision)
+        {
+            int lostBits = 16 - precision;
+            if (lostBits == 0)
+                return v;
+
+            SInt16 result;
+            result.m_value = _mm_srai_epi16(_mm_slli_epi16(v.m_value, lostBits), lostBits);
+            return result;
+        }
+
+        static UInt16 TruncateToPrecisionUnsigned(const UInt16 &v, int precision)
+        {
+            int lostBits = 16 - precision;
+            if (lostBits == 0)
+                return v;
+
+            UInt16 result;
+            result.m_value = _mm_srli_epi16(_mm_slli_epi16(v.m_value, lostBits), lostBits);
+            return result;
+        }
+
+        static UInt16 Min(const UInt16 &a, const UInt16 &b)
+        {
+            __m128i bitFlip = _mm_set1_epi16(-32768);
+
+            UInt16 result;
+            result.m_value = _mm_xor_si128(_mm_min_epi16(_mm_xor_si128(a.m_value, bitFlip), _mm_xor_si128(b.m_value, bitFlip)), bitFlip);
+            return result;
+        }
+
+        static SInt16 Min(const SInt16 &a, const SInt16 &b)
+        {
+            SInt16 result;
+            result.m_value = _mm_min_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static UInt15 Min(const UInt15 &a, const UInt15 &b)
+        {
+            UInt15 result;
+            result.m_value = _mm_min_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static Float Min(const Float &a, const Float &b)
+        {
+            Float result;
+            for (int i = 0; i < 2; i++)
+                result.m_values[i] = _mm_min_ps(a.m_values[i], b.m_values[i]);
+            return result;
+        }
+
+        static UInt16 Max(const UInt16 &a, const UInt16 &b)
+        {
+            __m128i bitFlip = _mm_set1_epi16(-32768);
+
+            UInt16 result;
+            result.m_value = _mm_xor_si128(_mm_max_epi16(_mm_xor_si128(a.m_value, bitFlip), _mm_xor_si128(b.m_value, bitFlip)), bitFlip);
+            return result;
+        }
+
+        static SInt16 Max(const SInt16 &a, const SInt16 &b)
+        {
+            SInt16 result;
+            result.m_value = _mm_max_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static UInt15 Max(const UInt15 &a, const UInt15 &b)
+        {
+            UInt15 result;
+            result.m_value = _mm_max_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static Float Max(const Float &a, const Float &b)
+        {
+            Float result;
+            for (int i = 0; i < 2; i++)
+                result.m_values[i] = _mm_max_ps(a.m_values[i], b.m_values[i]);
+            return result;
+        }
+
+        static Float Clamp(const Float &v, float min, float max)
+        {
+            Float result;
+            for (int i = 0; i < 2; i++)
+                result.m_values[i] = _mm_max_ps(_mm_min_ps(v.m_values[i], _mm_set1_ps(max)), _mm_set1_ps(min));
+            return result;
+        }
+
+        static Float Reciprocal(const Float &v)
+        {
+            Float result;
+            for (int i = 0; i < 2; i++)
+                result.m_values[i] = _mm_rcp_ps(v.m_values[i]);
+            return result;
+        }
+
+        static void ConvertLDRInputs(const PixelBlockU8* inputBlocks, int pxOffset, int channel, UInt15 &chOut)
+        {
+            int16_t values[8];
+            for (int i = 0; i < 8; i++)
+                values[i] = inputBlocks[i].m_pixels[pxOffset][channel];
+
+            chOut.m_value = _mm_set_epi16(values[7], values[6], values[5], values[4], values[3], values[2], values[1], values[0]);
+        }
+
+        static void ConvertHDRInputs(const PixelBlockF16* inputBlocks, int pxOffset, int channel, SInt16 &chOut)
+        {
+            int16_t values[8];
+            for (int i = 0; i < 8; i++)
+                values[i] = inputBlocks[i].m_pixels[pxOffset][channel];
+
+            chOut.m_value = _mm_set_epi16(values[7], values[6], values[5], values[4], values[3], values[2], values[1], values[0]);
+        }
+
+        static Float MakeFloat(float v)
+        {
+            Float f;
+            f.m_values[0] = f.m_values[1] = _mm_set1_ps(v);
+            return f;
+        }
+
+        static Float MakeFloatZero()
+        {
+            Float f;
+            f.m_values[0] = f.m_values[1] = _mm_setzero_ps();
+            return f;
+        }
+
+        static UInt16 MakeUInt16(uint16_t v)
+        {
+            UInt16 result;
+            result.m_value = _mm_set1_epi16(static_cast<short>(v));
+            return result;
+        }
+
+        static SInt16 MakeSInt16(int16_t v)
+        {
+            SInt16 result;
+            result.m_value = _mm_set1_epi16(static_cast<short>(v));
+            return result;
+        }
+
+        static AInt16 MakeAInt16(int16_t v)
+        {
+            AInt16 result;
+            result.m_value = _mm_set1_epi16(static_cast<short>(v));
+            return result;
+        }
+
+        static UInt15 MakeUInt15(uint16_t v)
+        {
+            UInt15 result;
+            result.m_value = _mm_set1_epi16(static_cast<short>(v));
+            return result;
+        }
+
+        static SInt32 MakeSInt32(int32_t v)
+        {
+            SInt32 result;
+            result.m_values[0] = _mm_set1_epi32(v);
+            result.m_values[1] = _mm_set1_epi32(v);
+            return result;
+        }
+
+        static UInt31 MakeUInt31(uint32_t v)
+        {
+            UInt31 result;
+            result.m_values[0] = _mm_set1_epi32(v);
+            result.m_values[1] = _mm_set1_epi32(v);
+            return result;
+        }
+
+        static uint16_t Extract(const UInt16 &v, int offset)
+        {
+            return reinterpret_cast<const uint16_t*>(&v.m_value)[offset];
+        }
+
+        static int16_t Extract(const SInt16 &v, int offset)
+        {
+            return reinterpret_cast<const int16_t*>(&v.m_value)[offset];
+        }
+
+        static uint16_t Extract(const UInt15 &v, int offset)
+        {
+            return reinterpret_cast<const uint16_t*>(&v.m_value)[offset];
+        }
+
+        static int16_t Extract(const AInt16 &v, int offset)
+        {
+            return reinterpret_cast<const int16_t*>(&v.m_value)[offset];
+        }
+
+        static int32_t Extract(const SInt32 &v, int offset)
+        {
+            return reinterpret_cast<const int32_t*>(&v.m_values[offset >> 2])[offset & 3];
+        }
+
+        static float Extract(const Float &v, int offset)
+        {
+            return reinterpret_cast<const float*>(&v.m_values[offset >> 2])[offset & 3];
+        }
+
+        static bool Extract(const ParallelMath::Int16CompFlag &v, int offset)
+        {
+            return reinterpret_cast<const int16_t*>(&v.m_value)[offset] != 0;
+        }
+
+        static void PutUInt16(UInt16 &dest, int offset, uint16_t v)
+        {
+            reinterpret_cast<uint16_t*>(&dest)[offset] = v;
+        }
+
+        static void PutUInt15(UInt15 &dest, int offset, uint16_t v)
+        {
+            reinterpret_cast<uint16_t*>(&dest)[offset] = v;
+        }
+
+        static void PutSInt16(SInt16 &dest, int offset, int16_t v)
+        {
+            reinterpret_cast<int16_t*>(&dest)[offset] = v;
+        }
+
+        static float ExtractFloat(const Float& v, int offset)
+        {
+            return reinterpret_cast<const float*>(&v)[offset];
+        }
+
+        static void PutFloat(Float &dest, int offset, float v)
+        {
+            reinterpret_cast<float*>(&dest)[offset] = v;
+        }
+
+        static void PutBoolInt16(Int16CompFlag &dest, int offset, bool v)
+        {
+            reinterpret_cast<int16_t*>(&dest)[offset] = v ? -1 : 0;
+        }
+
+        static Int32CompFlag Less(const UInt31 &a, const UInt31 &b)
+        {
+            Int32CompFlag result;
+            result.m_values[0] = _mm_cmplt_epi32(a.m_values[0], b.m_values[0]);
+            result.m_values[1] = _mm_cmplt_epi32(a.m_values[1], b.m_values[1]);
+            return result;
+        }
+
+        static Int16CompFlag Less(const SInt16 &a, const SInt16 &b)
+        {
+            Int16CompFlag result;
+            result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static Int16CompFlag Less(const UInt15 &a, const UInt15 &b)
+        {
+            Int16CompFlag result;
+            result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static Int16CompFlag LessOrEqual(const UInt15 &a, const UInt15 &b)
+        {
+            Int16CompFlag result;
+            result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static FloatCompFlag Less(const Float &a, const Float &b)
+        {
+            FloatCompFlag result;
+            for (int i = 0; i < 2; i++)
+                result.m_values[i] = _mm_cmplt_ps(a.m_values[i], b.m_values[i]);
+            return result;
+        }
+
+        static FloatCompFlag LessOrEqual(const Float &a, const Float &b)
+        {
+            FloatCompFlag result;
+            for (int i = 0; i < 2; i++)
+                result.m_values[i] = _mm_cmple_ps(a.m_values[i], b.m_values[i]);
+            return result;
+        }
+
+        template<int TSubtype>
+        static Int16CompFlag Equal(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b)
+        {
+            Int16CompFlag result;
+            result.m_value = _mm_cmpeq_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static FloatCompFlag Equal(const Float &a, const Float &b)
+        {
+            FloatCompFlag result;
+            for (int i = 0; i < 2; i++)
+                result.m_values[i] = _mm_cmpeq_ps(a.m_values[i], b.m_values[i]);
+            return result;
+        }
+
+        static Int16CompFlag Equal(const Int16CompFlag &a, const Int16CompFlag &b)
+        {
+            Int16CompFlag notResult;
+            notResult.m_value = _mm_xor_si128(a.m_value, b.m_value);
+            return Not(notResult);
+        }
+
+        static Float ToFloat(const UInt16 &v)
+        {
+            Float result;
+            result.m_values[0] = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v.m_value, _mm_setzero_si128()));
+            result.m_values[1] = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v.m_value, _mm_setzero_si128()));
+            return result;
+        }
+
+        static UInt31 ToUInt31(const UInt16 &v)
+        {
+            UInt31 result;
+            result.m_values[0] = _mm_unpacklo_epi16(v.m_value, _mm_setzero_si128());
+            result.m_values[1] = _mm_unpackhi_epi16(v.m_value, _mm_setzero_si128());
+            return result;
+        }
+
+        static SInt32 ToInt32(const UInt16 &v)
+        {
+            SInt32 result;
+            result.m_values[0] = _mm_unpacklo_epi16(v.m_value, _mm_setzero_si128());
+            result.m_values[1] = _mm_unpackhi_epi16(v.m_value, _mm_setzero_si128());
+            return result;
+        }
+
+        static SInt32 ToInt32(const UInt15 &v)
+        {
+            SInt32 result;
+            result.m_values[0] = _mm_unpacklo_epi16(v.m_value, _mm_setzero_si128());
+            result.m_values[1] = _mm_unpackhi_epi16(v.m_value, _mm_setzero_si128());
+            return result;
+        }
+
+        static SInt32 ToInt32(const SInt16 &v)
+        {
+            SInt32 result;
+            result.m_values[0] = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), v.m_value), 16);
+            result.m_values[1] = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), v.m_value), 16);
+            return result;
+        }
+
+        static Float ToFloat(const SInt16 &v)
+        {
+            Float result;
+            result.m_values[0] = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), v.m_value), 16));
+            result.m_values[1] = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), v.m_value), 16));
+            return result;
+        }
+
+        static Float ToFloat(const UInt15 &v)
+        {
+            Float result;
+            result.m_values[0] = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v.m_value, _mm_setzero_si128()));
+            result.m_values[1] = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v.m_value, _mm_setzero_si128()));
+            return result;
+        }
+
+        static Float ToFloat(const UInt31 &v)
+        {
+            Float result;
+            result.m_values[0] = _mm_cvtepi32_ps(v.m_values[0]);
+            result.m_values[1] = _mm_cvtepi32_ps(v.m_values[1]);
+            return result;
+        }
+
+        static Int16CompFlag FloatFlagToInt16(const FloatCompFlag &v)
+        {
+            __m128i lo = _mm_castps_si128(v.m_values[0]);
+            __m128i hi = _mm_castps_si128(v.m_values[1]);
+
+            Int16CompFlag result;
+            result.m_value = _mm_packs_epi32(lo, hi);
+            return result;
+        }
+
+        static FloatCompFlag Int16FlagToFloat(const Int16CompFlag &v)
+        {
+            __m128i lo = _mm_unpacklo_epi16(v.m_value, v.m_value);
+            __m128i hi = _mm_unpackhi_epi16(v.m_value, v.m_value);
+
+            FloatCompFlag result;
+            result.m_values[0] = _mm_castsi128_ps(lo);
+            result.m_values[1] = _mm_castsi128_ps(hi);
+            return result;
+        }
+
+        static Int16CompFlag Int32FlagToInt16(const Int32CompFlag &v)
+        {
+            __m128i lo = v.m_values[0];
+            __m128i hi = v.m_values[1];
+
+            Int16CompFlag result;
+            result.m_value = _mm_packs_epi32(lo, hi);
+            return result;
+        }
+
+        static Int16CompFlag MakeBoolInt16(bool b)
+        {
+            Int16CompFlag result;
+            if (b)
+                result.m_value = _mm_set1_epi16(-1);
+            else
+                result.m_value = _mm_setzero_si128();
+            return result;
+        }
+
+        static FloatCompFlag MakeBoolFloat(bool b)
+        {
+            FloatCompFlag result;
+            if (b)
+                result.m_values[0] = result.m_values[1] = _mm_castsi128_ps(_mm_set1_epi32(-1));
+            else
+                result.m_values[0] = result.m_values[1] = _mm_setzero_ps();
+            return result;
+        }
+
+        static Int16CompFlag AndNot(const Int16CompFlag &a, const Int16CompFlag &b)
+        {
+            Int16CompFlag result;
+            result.m_value = _mm_andnot_si128(b.m_value, a.m_value);
+            return result;
+        }
+
+        static Int16CompFlag Not(const Int16CompFlag &b)
+        {
+            Int16CompFlag result;
+            result.m_value = _mm_xor_si128(b.m_value, _mm_set1_epi32(-1));
+            return result;
+        }
+
+        static Int32CompFlag Not(const Int32CompFlag &b)
+        {
+            Int32CompFlag result;
+            result.m_values[0] = _mm_xor_si128(b.m_values[0], _mm_set1_epi32(-1));
+            result.m_values[1] = _mm_xor_si128(b.m_values[1], _mm_set1_epi32(-1));
+            return result;
+        }
+
+        static UInt16 RoundAndConvertToU16(const Float &v, const void* /*roundingMode*/)
+        {
+            __m128i lo = _mm_cvtps_epi32(_mm_add_ps(v.m_values[0], _mm_set1_ps(-32768)));
+            __m128i hi = _mm_cvtps_epi32(_mm_add_ps(v.m_values[1], _mm_set1_ps(-32768)));
+
+            __m128i packed = _mm_packs_epi32(lo, hi);
+
+            UInt16 result;
+            result.m_value = _mm_xor_si128(packed, _mm_set1_epi16(-32768));
+            return result;
+        }
+
+        static UInt15 RoundAndConvertToU15(const Float &v, const void* /*roundingMode*/)
+        {
+            __m128i lo = _mm_cvtps_epi32(v.m_values[0]);
+            __m128i hi = _mm_cvtps_epi32(v.m_values[1]);
+
+            __m128i packed = _mm_packs_epi32(lo, hi);
+
+            UInt15 result;
+            result.m_value = _mm_packs_epi32(lo, hi);
+            return result;
+        }
+
+        static SInt16 RoundAndConvertToS16(const Float &v, const void* /*roundingMode*/)
+        {
+            __m128i lo = _mm_cvtps_epi32(v.m_values[0]);
+            __m128i hi = _mm_cvtps_epi32(v.m_values[1]);
+
+            __m128i packed = _mm_packs_epi32(lo, hi);
+
+            SInt16 result;
+            result.m_value = _mm_packs_epi32(lo, hi);
+            return result;
+        }
+
+        static Float Sqrt(const Float &f)
+        {
+            Float result;
+            for (int i = 0; i < 2; i++)
+                result.m_values[i] = _mm_sqrt_ps(f.m_values[i]);
+            return result;
+        }
+
+        static UInt16 Abs(const SInt16 &a)
+        {
+            __m128i signBitsXor = _mm_srai_epi16(a.m_value, 15);
+            __m128i signBitsAdd = _mm_srli_epi16(a.m_value, 15);
+
+            UInt16 result;
+            result.m_value = _mm_add_epi16(_mm_xor_si128(a.m_value, signBitsXor), signBitsAdd);
+            return result;
+        }
+
+        static Float Abs(const Float& a)
+        {
+            __m128 invMask = _mm_set1_ps(-0.0f);
+
+            Float result;
+            result.m_values[0] = _mm_andnot_ps(invMask, a.m_values[0]);
+            result.m_values[1] = _mm_andnot_ps(invMask, a.m_values[1]);
+            return result;
+        }
+
+        static UInt16 SqDiffUInt8(const UInt15 &a, const UInt15 &b)
+        {
+            __m128i diff = _mm_sub_epi16(a.m_value, b.m_value);
+
+            UInt16 result;
+            result.m_value = _mm_mullo_epi16(diff, diff);
+            return result;
+        }
+
+        static Float SqDiffSInt16(const SInt16 &a, const SInt16 &b)
+        {
+            __m128i diffU = _mm_sub_epi16(_mm_max_epi16(a.m_value, b.m_value), _mm_min_epi16(a.m_value, b.m_value));
+
+            __m128i mulHi = _mm_mulhi_epu16(diffU, diffU);
+            __m128i mulLo = _mm_mullo_epi16(diffU, diffU);
+            __m128i sqDiffHi = _mm_unpackhi_epi16(mulLo, mulHi);
+            __m128i sqDiffLo = _mm_unpacklo_epi16(mulLo, mulHi);
+
+            Float result;
+            result.m_values[0] = _mm_cvtepi32_ps(sqDiffLo);
+            result.m_values[1] = _mm_cvtepi32_ps(sqDiffHi);
+
+            return result;
+        }
+
+        static Float TwosCLHalfToFloat(const SInt16 &v)
+        {
+            __m128i absV = _mm_add_epi16(_mm_xor_si128(v.m_value, _mm_srai_epi16(v.m_value, 15)), _mm_srli_epi16(v.m_value, 15));
+
+            __m128i signBits = _mm_and_si128(v.m_value, _mm_set1_epi16(-32768));
+            __m128i mantissa = _mm_and_si128(v.m_value, _mm_set1_epi16(0x03ff));
+            __m128i exponent = _mm_and_si128(v.m_value, _mm_set1_epi16(0x7c00));
+
+            __m128i isDenormal = _mm_cmpeq_epi16(exponent, _mm_setzero_si128());
+
+            // Convert exponent to high-bits 
+            exponent = _mm_add_epi16(_mm_srli_epi16(exponent, 3), _mm_set1_epi16(14336));
+
+            __m128i denormalCorrectionHigh = _mm_and_si128(isDenormal, _mm_or_si128(signBits, _mm_set1_epi16(14336)));
+
+            __m128i highBits = _mm_or_si128(signBits, _mm_or_si128(exponent, _mm_srli_epi16(mantissa, 3)));
+            __m128i lowBits = _mm_slli_epi16(mantissa, 13);
+
+            __m128i flow = _mm_unpacklo_epi16(lowBits, highBits);
+            __m128i fhigh = _mm_unpackhi_epi16(lowBits, highBits);
+
+            __m128i correctionLow = _mm_unpacklo_epi16(_mm_setzero_si128(), denormalCorrectionHigh);
+            __m128i correctionHigh = _mm_unpackhi_epi16(_mm_setzero_si128(), denormalCorrectionHigh);
+
+            Float result;
+            result.m_values[0] = _mm_sub_ps(_mm_castsi128_ps(flow), _mm_castsi128_ps(correctionLow));
+            result.m_values[1] = _mm_sub_ps(_mm_castsi128_ps(fhigh), _mm_castsi128_ps(correctionHigh));
+
+            return result;
+        }
+
+        static Float SqDiff2CLFloat(const SInt16 &a, const Float &b)
+        {
+            Float fa = TwosCLHalfToFloat(a);
+
+            Float diff = fa - b;
+            return diff * diff;
+        }
+
+        static Float SqDiff2CL(const SInt16 &a, const SInt16 &b)
+        {
+            Float fa = TwosCLHalfToFloat(a);
+            Float fb = TwosCLHalfToFloat(b);
+
+            Float diff = fa - fb;
+            return diff * diff;
+        }
+
+        static Float SqDiff2CLFloat(const SInt16 &a, float aWeight, const Float &b)
+        {
+            Float fa = TwosCLHalfToFloat(a) * aWeight;
+
+            Float diff = fa - b;
+            return diff * diff;
+        }
+
+        static UInt16 RightShift(const UInt16 &v, int bits)
+        {
+            UInt16 result;
+            result.m_value = _mm_srli_epi16(v.m_value, bits);
+            return result;
+        }
+
+        static UInt31 RightShift(const UInt31 &v, int bits)
+        {
+            UInt31 result;
+            result.m_values[0] = _mm_srli_epi32(v.m_values[0], bits);
+            result.m_values[1] = _mm_srli_epi32(v.m_values[1], bits);
+            return result;
+        }
+
+        static SInt16 RightShift(const SInt16 &v, int bits)
+        {
+            SInt16 result;
+            result.m_value = _mm_srai_epi16(v.m_value, bits);
+            return result;
+        }
+
+        static UInt15 RightShift(const UInt15 &v, int bits)
+        {
+            UInt15 result;
+            result.m_value = _mm_srli_epi16(v.m_value, bits);
+            return result;
+        }
+
+        static SInt32 RightShift(const SInt32 &v, int bits)
+        {
+            SInt32 result;
+            result.m_values[0] = _mm_srai_epi32(v.m_values[0], bits);
+            result.m_values[1] = _mm_srai_epi32(v.m_values[1], bits);
+            return result;
+        }
+
+        static SInt16 ToSInt16(const SInt32 &v)
+        {
+            SInt16 result;
+            result.m_value = _mm_packs_epi32(v.m_values[0], v.m_values[1]);
+            return result;
+        }
+
+        static SInt16 ToSInt16(const UInt16 &v)
+        {
+            SInt16 result;
+            result.m_value = v.m_value;
+            return result;
+        }
+
+        static SInt16 ToSInt16(const UInt15 &v)
+        {
+            SInt16 result;
+            result.m_value = v.m_value;
+            return result;
+        }
+
+        static UInt16 ToUInt16(const UInt32 &v)
+        {
+            __m128i low = _mm_srai_epi32(_mm_slli_epi32(v.m_values[0], 16), 16);
+            __m128i high = _mm_srai_epi32(_mm_slli_epi32(v.m_values[1], 16), 16);
+
+            UInt16 result;
+            result.m_value = _mm_packs_epi32(low, high);
+            return result;
+        }
+
+        static UInt16 ToUInt16(const UInt31 &v)
+        {
+            __m128i low = _mm_srai_epi32(_mm_slli_epi32(v.m_values[0], 16), 16);
+            __m128i high = _mm_srai_epi32(_mm_slli_epi32(v.m_values[1], 16), 16);
+
+            UInt16 result;
+            result.m_value = _mm_packs_epi32(low, high);
+            return result;
+        }
+
+        static UInt15 ToUInt15(const UInt31 &v)
+        {
+            UInt15 result;
+            result.m_value = _mm_packs_epi32(v.m_values[0], v.m_values[1]);
+            return result;
+        }
+
+        static UInt15 ToUInt15(const SInt16 &v)
+        {
+            UInt15 result;
+            result.m_value = v.m_value;
+            return result;
+        }
+
+        static UInt15 ToUInt15(const UInt16 &v)
+        {
+            UInt15 result;
+            result.m_value = v.m_value;
+            return result;
+        }
+
+        static SInt32 XMultiply(const SInt16 &a, const SInt16 &b)
+        {
+            __m128i high = _mm_mulhi_epi16(a.m_value, b.m_value);
+            __m128i low = _mm_mullo_epi16(a.m_value, b.m_value);
+
+            SInt32 result;
+            result.m_values[0] = _mm_unpacklo_epi16(low, high);
+            result.m_values[1] = _mm_unpackhi_epi16(low, high);
+            return result;
+        }
+
+        static SInt32 XMultiply(const SInt16 &a, const UInt15 &b)
+        {
+            __m128i high = _mm_mulhi_epi16(a.m_value, b.m_value);
+            __m128i low = _mm_mullo_epi16(a.m_value, b.m_value);
+
+            SInt32 result;
+            result.m_values[0] = _mm_unpacklo_epi16(low, high);
+            result.m_values[1] = _mm_unpackhi_epi16(low, high);
+            return result;
+        }
+
+        static SInt32 XMultiply(const UInt15 &a, const SInt16 &b)
+        {
+            return XMultiply(b, a);
+        }
+
+        static UInt32 XMultiply(const UInt16 &a, const UInt16 &b)
+        {
+            __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value);
+            __m128i low = _mm_mullo_epi16(a.m_value, b.m_value);
+
+            UInt32 result;
+            result.m_values[0] = _mm_unpacklo_epi16(low, high);
+            result.m_values[1] = _mm_unpackhi_epi16(low, high);
+            return result;
+        }
+
+        static UInt16 CompactMultiply(const UInt16 &a, const UInt15 &b)
+        {
+            UInt16 result;
+            result.m_value = _mm_mullo_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static UInt16 CompactMultiply(const UInt15 &a, const UInt15 &b)
+        {
+            UInt16 result;
+            result.m_value = _mm_mullo_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static SInt16 CompactMultiply(const SInt16 &a, const UInt15 &b)
+        {
+            SInt16 result;
+            result.m_value = _mm_mullo_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static SInt16 CompactMultiply(const SInt16 &a, const SInt16 &b)
+        {
+            SInt16 result;
+            result.m_value = _mm_mullo_epi16(a.m_value, b.m_value);
+            return result;
+        }
+
+        static UInt31 XMultiply(const UInt15 &a, const UInt15 &b)
+        {
+            __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value);
+            __m128i low = _mm_mullo_epi16(a.m_value, b.m_value);
+
+            UInt31 result;
+            result.m_values[0] = _mm_unpacklo_epi16(low, high);
+            result.m_values[1] = _mm_unpackhi_epi16(low, high);
+            return result;
+        }
+
+        static UInt31 XMultiply(const UInt16 &a, const UInt15 &b)
+        {
+            __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value);
+            __m128i low = _mm_mullo_epi16(a.m_value, b.m_value);
+
+            UInt31 result;
+            result.m_values[0] = _mm_unpacklo_epi16(low, high);
+            result.m_values[1] = _mm_unpackhi_epi16(low, high);
+            return result;
+        }
+
+        static UInt31 XMultiply(const UInt15 &a, const UInt16 &b)
+        {
+            return XMultiply(b, a);
+        }
+
+        static bool AnySet(const Int16CompFlag &v)
+        {
+            return _mm_movemask_epi8(v.m_value) != 0;
+        }
+
+        static bool AllSet(const Int16CompFlag &v)
+        {
+            return _mm_movemask_epi8(v.m_value) == 0xffff;
+        }
+
+        static bool AnySet(const FloatCompFlag &v)
+        {
+            return _mm_movemask_ps(v.m_values[0]) != 0 || _mm_movemask_ps(v.m_values[1]) != 0;
+        }
+
+        static bool AllSet(const FloatCompFlag &v)
+        {
+            return _mm_movemask_ps(v.m_values[0]) == 0xf && _mm_movemask_ps(v.m_values[1]) == 0xf;
+        }
+    };
+
+#else
+    // Scalar version
+    struct ParallelMath
+    {
+        struct RoundTowardZeroForScope
+        {
+        };
+
+        struct RoundTowardNearestForScope
+        {
+        };
+
+        struct RoundUpForScope
+        {
+        };
+
+        struct RoundDownForScope
+        {
+        };
+
+        static const int ParallelSize = 1;
+
+        enum Int16Subtype
+        {
+            IntSubtype_Signed,
+            IntSubtype_UnsignedFull,
+            IntSubtype_UnsignedTruncated,
+            IntSubtype_Abstract,
+        };
+
+        typedef int32_t SInt16;
+        typedef int32_t UInt15;
+        typedef int32_t UInt16;
+        typedef int32_t AInt16;
+
+        typedef int32_t SInt32;
+        typedef int32_t UInt31;
+        typedef int32_t UInt32;
+        typedef int32_t AInt32;
+
+        typedef int32_t ScalarUInt16;
+        typedef int32_t ScalarSInt16;
+
+        typedef float Float;
+
+        template<class TTargetType>
+        struct LosslessCast
+        {
+            static const int32_t& Cast(const int32_t &src)
+            {
+                return src;
+            }
+        };
+
+        typedef bool Int16CompFlag;
+        typedef bool FloatCompFlag;
+
+        static int32_t AbstractAdd(const int32_t &a, const int32_t &b)
+        {
+            return a + b;
+        }
+
+        static int32_t AbstractSubtract(const int32_t &a, const int32_t &b)
+        {
+            return a - b;
+        }
+
+        static float Select(bool flag, float a, float b)
+        {
+            return flag ? a : b;
+        }
+
+        static int32_t Select(bool flag, int32_t a, int32_t b)
+        {
+            return flag ? a : b;
+        }
+
+        static int32_t SelectOrZero(bool flag, int32_t a)
+        {
+            return flag ? a : 0;
+        }
+
+        static void ConditionalSet(int32_t& dest, bool flag, int32_t src)
+        {
+            if (flag)
+                dest = src;
+        }
+
+        static void ConditionalSet(bool& dest, bool flag, bool src)
+        {
+            if (flag)
+                dest = src;
+        }
+
+        static int32_t ConditionalNegate(bool flag, int32_t v)
+        {
+            return (flag) ? -v : v;
+        }
+
+        static void NotConditionalSet(int32_t& dest, bool flag, int32_t src)
+        {
+            if (!flag)
+                dest = src;
+        }
+
+        static void ConditionalSet(float& dest, bool flag, float src)
+        {
+            if (flag)
+                dest = src;
+        }
+
+        static void NotConditionalSet(float& dest, bool flag, float src)
+        {
+            if (!flag)
+                dest = src;
+        }
+
+        static void MakeSafeDenominator(float& v)
+        {
+            if (v == 0.0f)
+                v = 1.0f;
+        }
+
+        static int32_t SignedRightShift(int32_t v, int bits)
+        {
+            return v >> bits;
+        }
+
+        static int32_t TruncateToPrecisionSigned(int32_t v, int precision)
+        {
+            v = (v << (32 - precision)) & 0xffffffff;
+            return SignedRightShift(v, 32 - precision);
+        }
+
+        static int32_t TruncateToPrecisionUnsigned(int32_t v, int precision)
+        {
+            return v & ((1 << precision) - 1);
+        }
+
+        static int32_t Min(int32_t a, int32_t b)
+        {
+            if (a < b)
+                return a;
+            return b;
+        }
+
+        static float Min(float a, float b)
+        {
+            if (a < b)
+                return a;
+            return b;
+        }
+
+        static int32_t Max(int32_t a, int32_t b)
+        {
+            if (a > b)
+                return a;
+            return b;
+        }
+
+        static float Max(float a, float b)
+        {
+            if (a > b)
+                return a;
+            return b;
+        }
+
+        static float Abs(float a)
+        {
+            return fabsf(a);
+        }
+
+        static int32_t Abs(int32_t a)
+        {
+            if (a < 0)
+                return -a;
+            return a;
+        }
+
+        static float Clamp(float v, float min, float max)
+        {
+            if (v < min)
+                return min;
+            if (v > max)
+                return max;
+            return v;
+        }
+
+        static float Reciprocal(float v)
+        {
+            return 1.0f / v;
+        }
+
+        static void ConvertLDRInputs(const PixelBlockU8* inputBlocks, int pxOffset, int channel, int32_t& chOut)
+        {
+            chOut = inputBlocks[0].m_pixels[pxOffset][channel];
+        }
+
+        static void ConvertHDRInputs(const PixelBlockF16* inputBlocks, int pxOffset, int channel, int32_t& chOut)
+        {
+            chOut = inputBlocks[0].m_pixels[pxOffset][channel];
+        }
+
+        static float MakeFloat(float v)
+        {
+            return v;
+        }
+
+        static float MakeFloatZero()
+        {
+            return 0.0f;
+        }
+
+        static int32_t MakeUInt16(uint16_t v)
+        {
+            return v;
+        }
+
+        static int32_t MakeSInt16(int16_t v)
+        {
+            return v;
+        }
+
+        static int32_t MakeAInt16(int16_t v)
+        {
+            return v;
+        }
+
+        static int32_t MakeUInt15(uint16_t v)
+        {
+            return v;
+        }
+
+        static int32_t MakeSInt32(int32_t v)
+        {
+            return v;
+        }
+
+        static int32_t MakeUInt31(int32_t v)
+        {
+            return v;
+        }
+
+        static int32_t Extract(int32_t v, int offset)
+        {
+            UNREFERENCED_PARAMETER(offset);
+            return v;
+        }
+
+        static bool Extract(bool v, int offset)
+        {
+            UNREFERENCED_PARAMETER(offset);
+            return v;
+        }
+
+        static float Extract(float v, int offset)
+        {
+            UNREFERENCED_PARAMETER(offset);
+            return v;
+        }
+
+        static void PutUInt16(int32_t &dest, int offset, ParallelMath::ScalarUInt16 v)
+        {
+            UNREFERENCED_PARAMETER(offset);
+            dest = v;
+        }
+
+        static void PutUInt15(int32_t &dest, int offset, ParallelMath::ScalarUInt16 v)
+        {
+            UNREFERENCED_PARAMETER(offset);
+            dest = v;
+        }
+
+        static void PutSInt16(int32_t &dest, int offset, ParallelMath::ScalarSInt16 v)
+        {
+            UNREFERENCED_PARAMETER(offset);
+            dest = v;
+        }
+
+        static float ExtractFloat(float v, int offset)
+        {
+            UNREFERENCED_PARAMETER(offset);
+            return v;
+        }
+
+        static void PutFloat(float &dest, int offset, float v)
+        {
+            UNREFERENCED_PARAMETER(offset);
+            dest = v;
+        }
+
+        static void PutBoolInt16(bool &dest, int offset, bool v)
+        {
+            UNREFERENCED_PARAMETER(offset);
+            dest = v;
+        }
+
+        static bool Less(int32_t a, int32_t b)
+        {
+            return a < b;
+        }
+
+        static bool Less(float a, float b)
+        {
+            return a < b;
+        }
+
+        static bool LessOrEqual(int32_t a, int32_t b)
+        {
+            return a < b;
+        }
+
+        static bool LessOrEqual(float a, float b)
+        {
+            return a < b;
+        }
+
+        static bool Equal(int32_t a, int32_t b)
+        {
+            return a == b;
+        }
+
+        static bool Equal(float a, float b)
+        {
+            return a == b;
+        }
+
+        static float ToFloat(int32_t v)
+        {
+            return static_cast<float>(v);
+        }
+
+        static int32_t ToUInt31(int32_t v)
+        {
+            return v;
+        }
+
+        static int32_t ToInt32(int32_t v)
+        {
+            return v;
+        }
+
+        static bool FloatFlagToInt16(bool v)
+        {
+            return v;
+        }
+
+        static bool Int32FlagToInt16(bool v)
+        {
+            return v;
+        }
+
+        static bool Int16FlagToFloat(bool v)
+        {
+            return v;
+        }
+
+        static bool MakeBoolInt16(bool b)
+        {
+            return b;
+        }
+
+        static bool MakeBoolFloat(bool b)
+        {
+            return b;
+        }
+
+        static bool AndNot(bool a, bool b)
+        {
+            return a && !b;
+        }
+
+        static bool Not(bool b)
+        {
+            return !b;
+        }
+
+        static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundTowardZeroForScope *rtz)
+        {
+            UNREFERENCED_PARAMETER(rtz);
+            return static_cast<int>(v);
+        }
+
+        static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundUpForScope *ru)
+        {
+            UNREFERENCED_PARAMETER(ru);
+            return static_cast<int>(ceilf(v));
+        }
+
+        static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundDownForScope *rd)
+        {
+            UNREFERENCED_PARAMETER(rd);
+            return static_cast<int>(floorf(v));
+        }
+
+        static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundTowardNearestForScope *rtn)
+        {
+            UNREFERENCED_PARAMETER(rtn);
+            return static_cast<int>(floorf(v + 0.5f));
+        }
+
+        template<class TRoundMode>
+        static int32_t RoundAndConvertToU16(float v, const TRoundMode *roundingMode)
+        {
+            return RoundAndConvertToInt(v, roundingMode);
+        }
+
+        template<class TRoundMode>
+        static int32_t RoundAndConvertToU15(float v, const TRoundMode *roundingMode)
+        {
+            return RoundAndConvertToInt(v, roundingMode);
+        }
+
+        template<class TRoundMode>
+        static int32_t RoundAndConvertToS16(float v, const TRoundMode *roundingMode)
+        {
+            return RoundAndConvertToInt(v, roundingMode);
+        }
+
+        static float Sqrt(float f)
+        {
+            return sqrtf(f);
+        }
+
+        static int32_t SqDiffUInt8(int32_t a, int32_t b)
+        {
+            int32_t delta = a - b;
+            return delta * delta;
+        }
+
+        static int32_t SqDiffInt16(int32_t a, int32_t b)
+        {
+            int32_t delta = a - b;
+            return delta * delta;
+        }
+
+        static int32_t SqDiffSInt16(int32_t a, int32_t b)
+        {
+            int32_t delta = a - b;
+            return delta * delta;
+        }
+
+        static float TwosCLHalfToFloat(int32_t v)
+        {
+            int32_t absV = (v < 0) ? -v : v;
+
+            int32_t signBits = (absV & -32768);
+            int32_t mantissa = (absV & 0x03ff);
+            int32_t exponent = (absV & 0x7c00);
+
+            bool isDenormal = (exponent == 0);
+
+            // Convert exponent to high-bits
+            exponent = (exponent >> 3) + 14336;
+
+            int32_t denormalCorrection = (isDenormal ? (signBits | 14336) : 0) << 16;
+
+            int32_t fBits = ((exponent | signBits) << 16) | (mantissa << 13);
+
+            float f, correction;
+            memcpy(&f, &fBits, 4);
+            memcpy(&correction, &denormalCorrection, 4);
+
+            return f - correction;
+        }
+
+        static Float SqDiff2CLFloat(const SInt16 &a, const Float &b)
+        {
+            Float fa = TwosCLHalfToFloat(a);
+
+            Float diff = fa - b;
+            return diff * diff;
+        }
+
+        static Float SqDiff2CL(const SInt16 &a, const SInt16 &b)
+        {
+            Float fa = TwosCLHalfToFloat(a);
+            Float fb = TwosCLHalfToFloat(b);
+
+            Float diff = fa - fb;
+            return diff * diff;
+        }
+
+        static Float SqDiff2CLFloat(const SInt16 &a, float aWeight, const Float &b)
+        {
+            Float fa = TwosCLHalfToFloat(a) * aWeight;
+
+            Float diff = fa - b;
+            return diff * diff;
+        }
+
+        static int32_t RightShift(int32_t v, int bits)
+        {
+            return SignedRightShift(v, bits);
+        }
+
+        static int32_t ToSInt16(int32_t v)
+        {
+            return v;
+        }
+
+        static int32_t ToUInt16(int32_t v)
+        {
+            return v;
+        }
+
+        static int32_t ToUInt15(int32_t v)
+        {
+            return v;
+        }
+
+        static int32_t XMultiply(int32_t a, int32_t b)
+        {
+            return a * b;
+        }
+
+        static int32_t CompactMultiply(int32_t a, int32_t b)
+        {
+            return a * b;
+        }
+
+        static bool AnySet(bool v)
+        {
+            return v;
+        }
+
+        static bool AllSet(bool v)
+        {
+            return v;
+        }
+    };
+
+#endif
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_S3TC.cpp b/thirdparty/cvtt/ConvectionKernels_S3TC.cpp
new file mode 100644
index 0000000000..23f1bd3314
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_S3TC.cpp
@@ -0,0 +1,1054 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-------------------------------------------------------------------------------------
+
+Portions based on DirectX Texture Library (DirectXTex)
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+
+http://go.microsoft.com/fwlink/?LinkId=248926
+*/
+#include "ConvectionKernels_Config.h"
+
+#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
+
+#include "ConvectionKernels_S3TC.h"
+
+#include "ConvectionKernels_AggregatedError.h"
+#include "ConvectionKernels_BCCommon.h"
+#include "ConvectionKernels_EndpointRefiner.h"
+#include "ConvectionKernels_EndpointSelector.h"
+#include "ConvectionKernels_IndexSelector.h"
+#include "ConvectionKernels_UnfinishedEndpoints.h"
+#include "ConvectionKernels_S3TC_SingleColor.h"
+
+void cvtt::Internal::S3TCComputer::Init(MFloat& error)
+{
+    error = ParallelMath::MakeFloat(FLT_MAX);
+}
+
+void cvtt::Internal::S3TCComputer::QuantizeTo6Bits(MUInt15& v)
+{
+    MUInt15 reduced = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(v, ParallelMath::MakeUInt15(253)) + ParallelMath::MakeUInt16(512), 10));
+    v = (reduced << 2) | ParallelMath::RightShift(reduced, 4);
+}
+
+void cvtt::Internal::S3TCComputer::QuantizeTo5Bits(MUInt15& v)
+{
+    MUInt15 reduced = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(v, ParallelMath::MakeUInt15(249)) + ParallelMath::MakeUInt16(1024), 11));
+    v = (reduced << 3) | ParallelMath::RightShift(reduced, 2);
+}
+
+void cvtt::Internal::S3TCComputer::QuantizeTo565(MUInt15 endPoint[3])
+{
+    QuantizeTo5Bits(endPoint[0]);
+    QuantizeTo6Bits(endPoint[1]);
+    QuantizeTo5Bits(endPoint[2]);
+}
+
+cvtt::ParallelMath::Float cvtt::Internal::S3TCComputer::ParanoidFactorForSpan(const MSInt16& span)
+{
+    return ParallelMath::Abs(ParallelMath::ToFloat(span)) * 0.03f;
+}
+
+cvtt::ParallelMath::Float cvtt::Internal::S3TCComputer::ParanoidDiff(const MUInt15& a, const MUInt15& b, const MFloat& d)
+{
+    MFloat absDiff = ParallelMath::Abs(ParallelMath::ToFloat(ParallelMath::LosslessCast<MSInt16>::Cast(a) - ParallelMath::LosslessCast<MSInt16>::Cast(b)));
+    absDiff = absDiff + d;
+    return absDiff * absDiff;
+}
+
+void cvtt::Internal::S3TCComputer::TestSingleColor(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], int range, const float* channelWeights,
+    MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, const ParallelMath::RoundTowardNearestForScope *rtn)
+{
+    float channelWeightsSq[3];
+
+    for (int ch = 0; ch < 3; ch++)
+        channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
+
+    MUInt15 totals[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) };
+
+    for (int px = 0; px < 16; px++)
+    {
+        for (int ch = 0; ch < 3; ch++)
+            totals[ch] = totals[ch] + pixels[px][ch];
+    }
+
+    MUInt15 average[3];
+    for (int ch = 0; ch < 3; ch++)
+        average[ch] = ParallelMath::RightShift(totals[ch] + ParallelMath::MakeUInt15(8), 4);
+
+    const Tables::S3TCSC::TableEntry* rbTable = NULL;
+    const Tables::S3TCSC::TableEntry* gTable = NULL;
+    if (flags & cvtt::Flags::S3TC_Paranoid)
+    {
+        if (range == 4)
+        {
+            rbTable = Tables::S3TCSC::g_singleColor5_3_p;
+            gTable = Tables::S3TCSC::g_singleColor6_3_p;
+        }
+        else
+        {
+            assert(range == 3);
+            rbTable = Tables::S3TCSC::g_singleColor5_2_p;
+            gTable = Tables::S3TCSC::g_singleColor6_2_p;
+        }
+    }
+    else
+    {
+        if (range == 4)
+        {
+            rbTable = Tables::S3TCSC::g_singleColor5_3;
+            gTable = Tables::S3TCSC::g_singleColor6_3;
+        }
+        else
+        {
+            assert(range == 3);
+            rbTable = Tables::S3TCSC::g_singleColor5_2;
+            gTable = Tables::S3TCSC::g_singleColor6_2;
+        }
+    }
+
+    MUInt15 interpolated[3];
+    MUInt15 eps[2][3];
+    MSInt16 spans[3];
+    for (int i = 0; i < ParallelMath::ParallelSize; i++)
+    {
+        for (int ch = 0; ch < 3; ch++)
+        {
+            uint16_t avg = ParallelMath::Extract(average[ch], i);
+            const Tables::S3TCSC::TableEntry& tableEntry = ((ch == 1) ? gTable[avg] : rbTable[avg]);
+            ParallelMath::PutUInt15(eps[0][ch], i, tableEntry.m_min);
+            ParallelMath::PutUInt15(eps[1][ch], i, tableEntry.m_max);
+            ParallelMath::PutUInt15(interpolated[ch], i, tableEntry.m_actualColor);
+            ParallelMath::PutSInt16(spans[ch], i, tableEntry.m_span);
+        }
+    }
+
+    MFloat error = ParallelMath::MakeFloatZero();
+    if (flags & cvtt::Flags::S3TC_Paranoid)
+    {
+        MFloat spanParanoidFactors[3];
+        for (int ch = 0; ch < 3; ch++)
+            spanParanoidFactors[ch] = ParanoidFactorForSpan(spans[ch]);
+
+        for (int px = 0; px < 16; px++)
+        {
+            for (int ch = 0; ch < 3; ch++)
+                error = error + ParanoidDiff(interpolated[ch], pixels[px][ch], spanParanoidFactors[ch]) * channelWeightsSq[ch];
+        }
+    }
+    else
+    {
+        for (int px = 0; px < 16; px++)
+        {
+            for (int ch = 0; ch < 3; ch++)
+                error = error + ParallelMath::ToFloat(ParallelMath::SqDiffUInt8(interpolated[ch], pixels[px][ch])) * channelWeightsSq[ch];
+        }
+    }
+
+    ParallelMath::FloatCompFlag better = ParallelMath::Less(error, bestError);
+    ParallelMath::Int16CompFlag better16 = ParallelMath::FloatFlagToInt16(better);
+
+    if (ParallelMath::AnySet(better16))
+    {
+        bestError = ParallelMath::Min(bestError, error);
+        for (int epi = 0; epi < 2; epi++)
+            for (int ch = 0; ch < 3; ch++)
+                ParallelMath::ConditionalSet(bestEndpoints[epi][ch], better16, eps[epi][ch]);
+
+        MUInt15 vindexes = ParallelMath::MakeUInt15(1);
+        for (int px = 0; px < 16; px++)
+            ParallelMath::ConditionalSet(bestIndexes[px], better16, vindexes);
+
+        ParallelMath::ConditionalSet(bestRange, better16, ParallelMath::MakeUInt15(range));
+    }
+}
+
+void cvtt::Internal::S3TCComputer::TestEndpoints(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], const MUInt15 unquantizedEndPoints[2][3], int range, const float* channelWeights,
+    MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, EndpointRefiner<3> *refiner, const ParallelMath::RoundTowardNearestForScope *rtn)
+{
+    float channelWeightsSq[3];
+
+    for (int ch = 0; ch < 3; ch++)
+        channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
+
+    MUInt15 endPoints[2][3];
+
+    for (int ep = 0; ep < 2; ep++)
+        for (int ch = 0; ch < 3; ch++)
+            endPoints[ep][ch] = unquantizedEndPoints[ep][ch];
+
+    QuantizeTo565(endPoints[0]);
+    QuantizeTo565(endPoints[1]);
+
+    IndexSelector<3> selector;
+    selector.Init<false>(channelWeights, endPoints, range);
+
+    MUInt15 indexes[16];
+
+    MFloat paranoidFactors[3];
+    for (int ch = 0; ch < 3; ch++)
+        paranoidFactors[ch] = ParanoidFactorForSpan(ParallelMath::LosslessCast<MSInt16>::Cast(endPoints[0][ch]) - ParallelMath::LosslessCast<MSInt16>::Cast(endPoints[1][ch]));
+
+    MFloat error = ParallelMath::MakeFloatZero();
+    AggregatedError<3> aggError;
+    for (int px = 0; px < 16; px++)
+    {
+        MUInt15 index = selector.SelectIndexLDR(floatPixels[px], rtn);
+        indexes[px] = index;
+
+        if (refiner)
+            refiner->ContributeUnweightedPW(preWeightedPixels[px], index);
+
+        MUInt15 reconstructed[3];
+        selector.ReconstructLDRPrecise(index, reconstructed);
+
+        if (flags & Flags::S3TC_Paranoid)
+        {
+            for (int ch = 0; ch < 3; ch++)
+                error = error + ParanoidDiff(reconstructed[ch], pixels[px][ch], paranoidFactors[ch]) * channelWeightsSq[ch];
+        }
+        else
+            BCCommon::ComputeErrorLDR<3>(flags, reconstructed, pixels[px], aggError);
+    }
+
+    if (!(flags & Flags::S3TC_Paranoid))
+        error = aggError.Finalize(flags, channelWeightsSq);
+
+    ParallelMath::FloatCompFlag better = ParallelMath::Less(error, bestError);
+
+    if (ParallelMath::AnySet(better))
+    {
+        ParallelMath::Int16CompFlag betterInt16 = ParallelMath::FloatFlagToInt16(better);
+
+        ParallelMath::ConditionalSet(bestError, better, error);
+
+        for (int ep = 0; ep < 2; ep++)
+            for (int ch = 0; ch < 3; ch++)
+                ParallelMath::ConditionalSet(bestEndpoints[ep][ch], betterInt16, endPoints[ep][ch]);
+
+        for (int px = 0; px < 16; px++)
+            ParallelMath::ConditionalSet(bestIndexes[px], betterInt16, indexes[px]);
+
+        ParallelMath::ConditionalSet(bestRange, betterInt16, ParallelMath::MakeUInt15(static_cast<uint16_t>(range)));
+    }
+}
+
+void cvtt::Internal::S3TCComputer::TestCounts(uint32_t flags, const int *counts, int nCounts, const MUInt15 &numElements, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], bool alphaTest,
+    const MFloat floatSortedInputs[16][4], const MFloat preWeightedFloatSortedInputs[16][4], const float *channelWeights, MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange,
+    const ParallelMath::RoundTowardNearestForScope* rtn)
+{
+    UNREFERENCED_PARAMETER(alphaTest);
+    UNREFERENCED_PARAMETER(flags);
+
+    EndpointRefiner<3> refiner;
+
+    refiner.Init(nCounts, channelWeights);
+
+    bool escape = false;
+    int e = 0;
+    for (int i = 0; i < nCounts; i++)
+    {
+        for (int n = 0; n < counts[i]; n++)
+        {
+            ParallelMath::Int16CompFlag valid = ParallelMath::Less(ParallelMath::MakeUInt15(static_cast<uint16_t>(n)), numElements);
+            if (!ParallelMath::AnySet(valid))
+            {
+                escape = true;
+                break;
+            }
+
+            if (ParallelMath::AllSet(valid))
+                refiner.ContributeUnweightedPW(preWeightedFloatSortedInputs[e++], ParallelMath::MakeUInt15(static_cast<uint16_t>(i)));
+            else
+            {
+                MFloat weight = ParallelMath::Select(ParallelMath::Int16FlagToFloat(valid), ParallelMath::MakeFloat(1.0f), ParallelMath::MakeFloat(0.0f));
+                refiner.ContributePW(preWeightedFloatSortedInputs[e++], ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), weight);
+            }
+        }
+
+        if (escape)
+            break;
+    }
+
+    MUInt15 endPoints[2][3];
+    refiner.GetRefinedEndpointsLDR(endPoints, rtn);
+
+    TestEndpoints(flags, pixels, floatPixels, preWeightedPixels, endPoints, nCounts, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, NULL, rtn);
+}
+
+void cvtt::Internal::S3TCComputer::PackExplicitAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride)
+{
+    UNREFERENCED_PARAMETER(flags);
+    ParallelMath::RoundTowardNearestForScope rtn;
+
+    float weights[1] = { 1.0f };
+
+    MUInt15 pixels[16];
+    MFloat floatPixels[16];
+
+    for (int px = 0; px < 16; px++)
+    {
+        ParallelMath::ConvertLDRInputs(inputs, px, inputChannel, pixels[px]);
+        floatPixels[px] = ParallelMath::ToFloat(pixels[px]);
+    }
+
+    MUInt15 ep[2][1] = { { ParallelMath::MakeUInt15(0) },{ ParallelMath::MakeUInt15(255) } };
+
+    IndexSelector<1> selector;
+    selector.Init<false>(weights, ep, 16);
+
+    MUInt15 indexes[16];
+
+    for (int px = 0; px < 16; px++)
+        indexes[px] = selector.SelectIndexLDR(&floatPixels[px], &rtn);
+
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        for (int px = 0; px < 16; px += 2)
+        {
+            int index0 = ParallelMath::Extract(indexes[px], block);
+            int index1 = ParallelMath::Extract(indexes[px + 1], block);
+
+            packedBlocks[px / 2] = static_cast<uint8_t>(index0 | (index1 << 4));
+        }
+
+        packedBlocks += packedBlockStride;
+    }
+}
+
+void cvtt::Internal::S3TCComputer::PackInterpolatedAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride, bool isSigned, int maxTweakRounds, int numRefineRounds)
+{
+    if (maxTweakRounds < 1)
+        maxTweakRounds = 1;
+
+    if (numRefineRounds < 1)
+        numRefineRounds = 1;
+
+    ParallelMath::RoundTowardNearestForScope rtn;
+
+    float oneWeight[1] = { 1.0f };
+
+    MUInt15 pixels[16];
+    MFloat floatPixels[16];
+
+    MUInt15 highTerminal = isSigned ? ParallelMath::MakeUInt15(254) : ParallelMath::MakeUInt15(255);
+    MUInt15 highTerminalMinusOne = highTerminal - ParallelMath::MakeUInt15(1);
+
+    for (int px = 0; px < 16; px++)
+    {
+        ParallelMath::ConvertLDRInputs(inputs, px, inputChannel, pixels[px]);
+
+        if (isSigned)
+            pixels[px] = ParallelMath::Min(pixels[px], highTerminal);
+
+        floatPixels[px] = ParallelMath::ToFloat(pixels[px]);
+    }
+
+    MUInt15 sortedPixels[16];
+    for (int px = 0; px < 16; px++)
+        sortedPixels[px] = pixels[px];
+
+    for (int sortEnd = 15; sortEnd > 0; sortEnd--)
+    {
+        for (int sortOffset = 0; sortOffset < sortEnd; sortOffset++)
+        {
+            MUInt15 a = sortedPixels[sortOffset];
+            MUInt15 b = sortedPixels[sortOffset + 1];
+
+            sortedPixels[sortOffset] = ParallelMath::Min(a, b);
+            sortedPixels[sortOffset + 1] = ParallelMath::Max(a, b);
+        }
+    }
+
+    MUInt15 zero = ParallelMath::MakeUInt15(0);
+    MUInt15 one = ParallelMath::MakeUInt15(1);
+
+    MUInt15 bestIsFullRange = zero;
+    MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
+    MUInt15 bestEP[2] = { zero, zero };
+    MUInt15 bestIndexes[16] = {
+        zero, zero, zero, zero,
+        zero, zero, zero, zero,
+        zero, zero, zero, zero,
+        zero, zero, zero, zero
+    };
+
+    // Full-precision
+    {
+        MUInt15 minEP = sortedPixels[0];
+        MUInt15 maxEP = sortedPixels[15];
+
+        MFloat base[1] = { ParallelMath::ToFloat(minEP) };
+        MFloat offset[1] = { ParallelMath::ToFloat(maxEP - minEP) };
+
+        UnfinishedEndpoints<1> ufep = UnfinishedEndpoints<1>(base, offset);
+
+        int numTweakRounds = BCCommon::TweakRoundsForRange(8);
+        if (numTweakRounds > maxTweakRounds)
+            numTweakRounds = maxTweakRounds;
+
+        for (int tweak = 0; tweak < numTweakRounds; tweak++)
+        {
+            MUInt15 ep[2][1];
+
+            ufep.FinishLDR(tweak, 8, ep[0], ep[1]);
+
+            for (int refinePass = 0; refinePass < numRefineRounds; refinePass++)
+            {
+                EndpointRefiner<1> refiner;
+                refiner.Init(8, oneWeight);
+
+                if (isSigned)
+                    for (int epi = 0; epi < 2; epi++)
+                        ep[epi][0] = ParallelMath::Min(ep[epi][0], highTerminal);
+
+                IndexSelector<1> indexSelector;
+                indexSelector.Init<false>(oneWeight, ep, 8);
+
+                MUInt15 indexes[16];
+
+                AggregatedError<1> aggError;
+                for (int px = 0; px < 16; px++)
+                {
+                    MUInt15 index = indexSelector.SelectIndexLDR(&floatPixels[px], &rtn);
+
+                    MUInt15 reconstructedPixel;
+
+                    indexSelector.ReconstructLDRPrecise(index, &reconstructedPixel);
+                    BCCommon::ComputeErrorLDR<1>(flags, &reconstructedPixel, &pixels[px], aggError);
+
+                    if (refinePass != numRefineRounds - 1)
+                        refiner.ContributeUnweightedPW(&floatPixels[px], index);
+
+                    indexes[px] = index;
+                }
+                MFloat error = aggError.Finalize(flags | Flags::Uniform, oneWeight);
+
+                ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
+                ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
+
+                if (ParallelMath::AnySet(errorBetter16))
+                {
+                    bestError = ParallelMath::Min(error, bestError);
+                    ParallelMath::ConditionalSet(bestIsFullRange, errorBetter16, one);
+                    for (int px = 0; px < 16; px++)
+                        ParallelMath::ConditionalSet(bestIndexes[px], errorBetter16, indexes[px]);
+
+                    for (int epi = 0; epi < 2; epi++)
+                        ParallelMath::ConditionalSet(bestEP[epi], errorBetter16, ep[epi][0]);
+                }
+
+                if (refinePass != numRefineRounds - 1)
+                    refiner.GetRefinedEndpointsLDR(ep, &rtn);
+            }
+        }
+    }
+
+    // Reduced precision with special endpoints
+    {
+        MUInt15 bestHeuristicMin = sortedPixels[0];
+        MUInt15 bestHeuristicMax = sortedPixels[15];
+
+        ParallelMath::Int16CompFlag canTryClipping;
+
+        // In reduced precision, we want try putting endpoints at the reserved indexes at the ends.
+        // The heuristic we use is to assign indexes to the end as long as they aren't off by more than half of the index range.
+        // This will usually not find anything, but it's cheap to check.
+
+        {
+            MUInt15 largestPossibleRange = bestHeuristicMax - bestHeuristicMin; // Max: 255
+            MUInt15 lowestPossibleClearance = ParallelMath::Min(bestHeuristicMin, static_cast<MUInt15>(highTerminal - bestHeuristicMax));
+
+            MUInt15 lowestPossibleClearanceTimes10 = (lowestPossibleClearance << 2) + (lowestPossibleClearance << 4);
+            canTryClipping = ParallelMath::LessOrEqual(lowestPossibleClearanceTimes10, largestPossibleRange);
+        }
+
+        if (ParallelMath::AnySet(canTryClipping))
+        {
+            MUInt15 lowClearances[16];
+            MUInt15 highClearances[16];
+            MUInt15 bestSkipCount = ParallelMath::MakeUInt15(0);
+
+            lowClearances[0] = highClearances[0] = ParallelMath::MakeUInt15(0);
+
+            for (int px = 1; px < 16; px++)
+            {
+                lowClearances[px] = sortedPixels[px - 1];
+                highClearances[px] = highTerminal - sortedPixels[16 - px];
+            }
+
+            for (uint16_t firstIndex = 0; firstIndex < 16; firstIndex++)
+            {
+                uint16_t numSkippedLow = firstIndex;
+
+                MUInt15 lowClearance = lowClearances[firstIndex];
+
+                for (uint16_t lastIndex = firstIndex; lastIndex < 16; lastIndex++)
+                {
+                    uint16_t numSkippedHigh = 15 - lastIndex;
+                    uint16_t numSkipped = numSkippedLow + numSkippedHigh;
+
+                    MUInt15 numSkippedV = ParallelMath::MakeUInt15(numSkipped);
+
+                    ParallelMath::Int16CompFlag areMoreSkipped = ParallelMath::Less(bestSkipCount, numSkippedV);
+
+                    if (!ParallelMath::AnySet(areMoreSkipped))
+                        continue;
+
+                    MUInt15 clearance = ParallelMath::Max(highClearances[numSkippedHigh], lowClearance);
+                    MUInt15 clearanceTimes10 = (clearance << 2) + (clearance << 4);
+
+                    MUInt15 range = sortedPixels[lastIndex] - sortedPixels[firstIndex];
+
+                    ParallelMath::Int16CompFlag isBetter = (areMoreSkipped & ParallelMath::LessOrEqual(clearanceTimes10, range));
+                    ParallelMath::ConditionalSet(bestHeuristicMin, isBetter, sortedPixels[firstIndex]);
+                    ParallelMath::ConditionalSet(bestHeuristicMax, isBetter, sortedPixels[lastIndex]);
+                }
+            }
+        }
+
+        MUInt15 bestSimpleMin = one;
+        MUInt15 bestSimpleMax = highTerminalMinusOne;
+
+        for (int px = 0; px < 16; px++)
+        {
+            ParallelMath::ConditionalSet(bestSimpleMin, ParallelMath::Less(zero, sortedPixels[15 - px]), sortedPixels[15 - px]);
+            ParallelMath::ConditionalSet(bestSimpleMax, ParallelMath::Less(sortedPixels[px], highTerminal), sortedPixels[px]);
+        }
+
+        MUInt15 minEPs[2] = { bestSimpleMin, bestHeuristicMin };
+        MUInt15 maxEPs[2] = { bestSimpleMax, bestHeuristicMax };
+
+        int minEPRange = 2;
+        if (ParallelMath::AllSet(ParallelMath::Equal(minEPs[0], minEPs[1])))
+            minEPRange = 1;
+
+        int maxEPRange = 2;
+        if (ParallelMath::AllSet(ParallelMath::Equal(maxEPs[0], maxEPs[1])))
+            maxEPRange = 1;
+
+        for (int minEPIndex = 0; minEPIndex < minEPRange; minEPIndex++)
+        {
+            for (int maxEPIndex = 0; maxEPIndex < maxEPRange; maxEPIndex++)
+            {
+                MFloat base[1] = { ParallelMath::ToFloat(minEPs[minEPIndex]) };
+                MFloat offset[1] = { ParallelMath::ToFloat(maxEPs[maxEPIndex] - minEPs[minEPIndex]) };
+
+                UnfinishedEndpoints<1> ufep = UnfinishedEndpoints<1>(base, offset);
+
+                int numTweakRounds = BCCommon::TweakRoundsForRange(6);
+                if (numTweakRounds > maxTweakRounds)
+                    numTweakRounds = maxTweakRounds;
+
+                for (int tweak = 0; tweak < numTweakRounds; tweak++)
+                {
+                    MUInt15 ep[2][1];
+
+                    ufep.FinishLDR(tweak, 8, ep[0], ep[1]);
+
+                    for (int refinePass = 0; refinePass < numRefineRounds; refinePass++)
+                    {
+                        EndpointRefiner<1> refiner;
+                        refiner.Init(6, oneWeight);
+
+                        if (isSigned)
+                            for (int epi = 0; epi < 2; epi++)
+                                ep[epi][0] = ParallelMath::Min(ep[epi][0], highTerminal);
+
+                        IndexSelector<1> indexSelector;
+                        indexSelector.Init<false>(oneWeight, ep, 6);
+
+                        MUInt15 indexes[16];
+                        MFloat error = ParallelMath::MakeFloatZero();
+
+                        for (int px = 0; px < 16; px++)
+                        {
+                            MUInt15 selectedIndex = indexSelector.SelectIndexLDR(&floatPixels[px], &rtn);
+
+                            MUInt15 reconstructedPixel;
+
+                            indexSelector.ReconstructLDRPrecise(selectedIndex, &reconstructedPixel);
+
+                            MFloat zeroError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &zero, &pixels[px], 1, oneWeight);
+                            MFloat highTerminalError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &highTerminal, &pixels[px], 1, oneWeight);
+                            MFloat selectedIndexError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &reconstructedPixel, &pixels[px], 1, oneWeight);
+
+                            MFloat bestPixelError = zeroError;
+                            MUInt15 index = ParallelMath::MakeUInt15(6);
+
+                            ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(ParallelMath::Less(highTerminalError, bestPixelError)), ParallelMath::MakeUInt15(7));
+                            bestPixelError = ParallelMath::Min(bestPixelError, highTerminalError);
+
+                            ParallelMath::FloatCompFlag selectedIndexBetter = ParallelMath::Less(selectedIndexError, bestPixelError);
+
+                            if (ParallelMath::AllSet(selectedIndexBetter))
+                            {
+                                if (refinePass != numRefineRounds - 1)
+                                    refiner.ContributeUnweightedPW(&floatPixels[px], selectedIndex);
+                            }
+                            else
+                            {
+                                MFloat refineWeight = ParallelMath::Select(selectedIndexBetter, ParallelMath::MakeFloat(1.0f), ParallelMath::MakeFloatZero());
+
+                                if (refinePass != numRefineRounds - 1)
+                                    refiner.ContributePW(&floatPixels[px], selectedIndex, refineWeight);
+                            }
+
+                            ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(selectedIndexBetter), selectedIndex);
+                            bestPixelError = ParallelMath::Min(bestPixelError, selectedIndexError);
+
+                            error = error + bestPixelError;
+
+                            indexes[px] = index;
+                        }
+
+                        ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
+                        ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
+
+                        if (ParallelMath::AnySet(errorBetter16))
+                        {
+                            bestError = ParallelMath::Min(error, bestError);
+                            ParallelMath::ConditionalSet(bestIsFullRange, errorBetter16, zero);
+                            for (int px = 0; px < 16; px++)
+                                ParallelMath::ConditionalSet(bestIndexes[px], errorBetter16, indexes[px]);
+
+                            for (int epi = 0; epi < 2; epi++)
+                                ParallelMath::ConditionalSet(bestEP[epi], errorBetter16, ep[epi][0]);
+                        }
+
+                        if (refinePass != numRefineRounds - 1)
+                            refiner.GetRefinedEndpointsLDR(ep, &rtn);
+                    }
+                }
+            }
+        }
+    }
+
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        int ep0 = ParallelMath::Extract(bestEP[0], block);
+        int ep1 = ParallelMath::Extract(bestEP[1], block);
+        int isFullRange = ParallelMath::Extract(bestIsFullRange, block);
+
+        if (isSigned)
+        {
+            ep0 -= 127;
+            ep1 -= 127;
+
+            assert(ep0 >= -127 && ep0 <= 127);
+            assert(ep1 >= -127 && ep1 <= 127);
+        }
+
+
+        bool swapEndpoints = (isFullRange != 0) != (ep0 > ep1);
+
+        if (swapEndpoints)
+            std::swap(ep0, ep1);
+
+        uint16_t dumpBits = 0;
+        int dumpBitsOffset = 0;
+        int dumpByteOffset = 2;
+        packedBlocks[0] = static_cast<uint8_t>(ep0 & 0xff);
+        packedBlocks[1] = static_cast<uint8_t>(ep1 & 0xff);
+
+        int maxValue = (isFullRange != 0) ? 7 : 5;
+
+        for (int px = 0; px < 16; px++)
+        {
+            int index = ParallelMath::Extract(bestIndexes[px], block);
+
+            if (swapEndpoints && index <= maxValue)
+                index = maxValue - index;
+
+            if (index != 0)
+            {
+                if (index == maxValue)
+                    index = 1;
+                else if (index < maxValue)
+                    index++;
+            }
+
+            assert(index >= 0 && index < 8);
+
+            dumpBits |= static_cast<uint16_t>(index << dumpBitsOffset);
+            dumpBitsOffset += 3;
+
+            if (dumpBitsOffset >= 8)
+            {
+                assert(dumpByteOffset < 8);
+                packedBlocks[dumpByteOffset] = static_cast<uint8_t>(dumpBits & 0xff);
+                dumpBits >>= 8;
+                dumpBitsOffset -= 8;
+                dumpByteOffset++;
+            }
+        }
+
+        assert(dumpBitsOffset == 0);
+        assert(dumpByteOffset == 8);
+
+        packedBlocks += packedBlockStride;
+    }
+}
+
+void cvtt::Internal::S3TCComputer::PackRGB(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, size_t packedBlockStride, const float channelWeights[4], bool alphaTest, float alphaThreshold, bool exhaustive, int maxTweakRounds, int numRefineRounds)
+{
+    ParallelMath::RoundTowardNearestForScope rtn;
+
+    if (numRefineRounds < 1)
+        numRefineRounds = 1;
+
+    if (maxTweakRounds < 1)
+        maxTweakRounds = 1;
+
+    EndpointSelector<3, 8> endpointSelector;
+
+    MUInt15 pixels[16][4];
+    MFloat floatPixels[16][4];
+
+    MFloat preWeightedPixels[16][4];
+
+    for (int px = 0; px < 16; px++)
+    {
+        for (int ch = 0; ch < 4; ch++)
+            ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]);
+    }
+
+    for (int px = 0; px < 16; px++)
+    {
+        for (int ch = 0; ch < 4; ch++)
+            floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]);
+    }
+
+    if (alphaTest)
+    {
+        MUInt15 threshold = ParallelMath::MakeUInt15(static_cast<uint16_t>(floor(alphaThreshold * 255.0f + 0.5f)));
+
+        for (int px = 0; px < 16; px++)
+        {
+            ParallelMath::Int16CompFlag belowThreshold = ParallelMath::Less(pixels[px][3], threshold);
+            pixels[px][3] = ParallelMath::Select(belowThreshold, ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(255));
+        }
+    }
+
+    BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights);
+
+    MUInt15 minAlpha = ParallelMath::MakeUInt15(255);
+
+    for (int px = 0; px < 16; px++)
+        minAlpha = ParallelMath::Min(minAlpha, pixels[px][3]);
+
+    MFloat pixelWeights[16];
+    for (int px = 0; px < 16; px++)
+    {
+        pixelWeights[px] = ParallelMath::MakeFloat(1.0f);
+        if (alphaTest)
+        {
+            ParallelMath::Int16CompFlag isTransparent = ParallelMath::Less(pixels[px][3], ParallelMath::MakeUInt15(255));
+
+            ParallelMath::ConditionalSet(pixelWeights[px], ParallelMath::Int16FlagToFloat(isTransparent), ParallelMath::MakeFloatZero());
+        }
+    }
+
+    for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)
+    {
+        for (int px = 0; px < 16; px++)
+            endpointSelector.ContributePass(preWeightedPixels[px], pass, pixelWeights[px]);
+
+        endpointSelector.FinishPass(pass);
+    }
+
+    UnfinishedEndpoints<3> ufep = endpointSelector.GetEndpoints(channelWeights);
+
+    MUInt15 bestEndpoints[2][3];
+    MUInt15 bestIndexes[16];
+    MUInt15 bestRange = ParallelMath::MakeUInt15(0);
+    MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
+
+    for (int px = 0; px < 16; px++)
+        bestIndexes[px] = ParallelMath::MakeUInt15(0);
+
+    for (int ep = 0; ep < 2; ep++)
+        for (int ch = 0; ch < 3; ch++)
+            bestEndpoints[ep][ch] = ParallelMath::MakeUInt15(0);
+
+    if (exhaustive)
+    {
+        MSInt16 sortBins[16];
+
+        {
+            // Compute an 11-bit index, change it to signed, stuff it in the high bits of the sort bins,
+            // and pack the original indexes into the low bits.
+
+            MUInt15 sortEP[2][3];
+            ufep.FinishLDR(0, 11, sortEP[0], sortEP[1]);
+
+            IndexSelector<3> sortSelector;
+            sortSelector.Init<false>(channelWeights, sortEP, 1 << 11);
+
+            for (int16_t px = 0; px < 16; px++)
+            {
+                MSInt16 sortBin = ParallelMath::LosslessCast<MSInt16>::Cast(sortSelector.SelectIndexLDR(floatPixels[px], &rtn) << 4);
+
+                if (alphaTest)
+                {
+                    ParallelMath::Int16CompFlag isTransparent = ParallelMath::Less(pixels[px][3], ParallelMath::MakeUInt15(255));
+
+                    ParallelMath::ConditionalSet(sortBin, isTransparent, ParallelMath::MakeSInt16(-16)); // 0xfff0
+                }
+
+                sortBin = sortBin + ParallelMath::MakeSInt16(px);
+
+                sortBins[px] = sortBin;
+            }
+        }
+
+        // Sort bins
+        for (int sortEnd = 1; sortEnd < 16; sortEnd++)
+        {
+            for (int sortLoc = sortEnd; sortLoc > 0; sortLoc--)
+            {
+                MSInt16 a = sortBins[sortLoc];
+                MSInt16 b = sortBins[sortLoc - 1];
+
+                sortBins[sortLoc] = ParallelMath::Max(a, b);
+                sortBins[sortLoc - 1] = ParallelMath::Min(a, b);
+            }
+        }
+
+        MUInt15 firstElement = ParallelMath::MakeUInt15(0);
+        for (uint16_t e = 0; e < 16; e++)
+        {
+            ParallelMath::Int16CompFlag isInvalid = ParallelMath::Less(sortBins[e], ParallelMath::MakeSInt16(0));
+            ParallelMath::ConditionalSet(firstElement, isInvalid, ParallelMath::MakeUInt15(e + 1));
+            if (!ParallelMath::AnySet(isInvalid))
+                break;
+        }
+
+        MUInt15 numElements = ParallelMath::MakeUInt15(16) - firstElement;
+
+        MUInt15 sortedInputs[16][4];
+        MFloat floatSortedInputs[16][4];
+        MFloat pwFloatSortedInputs[16][4];
+
+        for (int e = 0; e < 16; e++)
+        {
+            for (int ch = 0; ch < 4; ch++)
+                sortedInputs[e][ch] = ParallelMath::MakeUInt15(0);
+        }
+
+        for (int block = 0; block < ParallelMath::ParallelSize; block++)
+        {
+            for (int e = ParallelMath::Extract(firstElement, block); e < 16; e++)
+            {
+                ParallelMath::ScalarUInt16 sortBin = ParallelMath::Extract(sortBins[e], block);
+                int originalIndex = (sortBin & 15);
+
+                for (int ch = 0; ch < 4; ch++)
+                    ParallelMath::PutUInt15(sortedInputs[15 - e][ch], block, ParallelMath::Extract(pixels[originalIndex][ch], block));
+            }
+        }
+
+        for (int e = 0; e < 16; e++)
+        {
+            for (int ch = 0; ch < 4; ch++)
+            {
+                MFloat f = ParallelMath::ToFloat(sortedInputs[e][ch]);
+                floatSortedInputs[e][ch] = f;
+                pwFloatSortedInputs[e][ch] = f * channelWeights[ch];
+            }
+        }
+
+        for (int n0 = 0; n0 <= 15; n0++)
+        {
+            int remainingFor1 = 16 - n0;
+            if (remainingFor1 == 16)
+                remainingFor1 = 15;
+
+            for (int n1 = 0; n1 <= remainingFor1; n1++)
+            {
+                int remainingFor2 = 16 - n1 - n0;
+                if (remainingFor2 == 16)
+                    remainingFor2 = 15;
+
+                for (int n2 = 0; n2 <= remainingFor2; n2++)
+                {
+                    int n3 = 16 - n2 - n1 - n0;
+
+                    if (n3 == 16)
+                        continue;
+
+                    int counts[4] = { n0, n1, n2, n3 };
+
+                    TestCounts(flags, counts, 4, numElements, pixels, floatPixels, preWeightedPixels, alphaTest, floatSortedInputs, pwFloatSortedInputs, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn);
+                }
+            }
+        }
+
+        TestSingleColor(flags, pixels, floatPixels, 4, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn);
+
+        if (alphaTest)
+        {
+            for (int n0 = 0; n0 <= 15; n0++)
+            {
+                int remainingFor1 = 16 - n0;
+                if (remainingFor1 == 16)
+                    remainingFor1 = 15;
+
+                for (int n1 = 0; n1 <= remainingFor1; n1++)
+                {
+                    int n2 = 16 - n1 - n0;
+
+                    if (n2 == 16)
+                        continue;
+
+                    int counts[3] = { n0, n1, n2 };
+
+                    TestCounts(flags, counts, 3, numElements, pixels, floatPixels, preWeightedPixels, alphaTest, floatSortedInputs, pwFloatSortedInputs, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn);
+                }
+            }
+
+            TestSingleColor(flags, pixels, floatPixels, 3, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn);
+        }
+    }
+    else
+    {
+        int minRange = alphaTest ? 3 : 4;
+
+        for (int range = minRange; range <= 4; range++)
+        {
+            int tweakRounds = BCCommon::TweakRoundsForRange(range);
+            if (tweakRounds > maxTweakRounds)
+                tweakRounds = maxTweakRounds;
+
+            for (int tweak = 0; tweak < tweakRounds; tweak++)
+            {
+                MUInt15 endPoints[2][3];
+
+                ufep.FinishLDR(tweak, range, endPoints[0], endPoints[1]);
+
+                for (int refine = 0; refine < numRefineRounds; refine++)
+                {
+                    EndpointRefiner<3> refiner;
+                    refiner.Init(range, channelWeights);
+
+                    TestEndpoints(flags, pixels, floatPixels, preWeightedPixels, endPoints, range, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &refiner, &rtn);
+
+                    if (refine != numRefineRounds - 1)
+                        refiner.GetRefinedEndpointsLDR(endPoints, &rtn);
+                }
+            }
+        }
+    }
+
+    for (int block = 0; block < ParallelMath::ParallelSize; block++)
+    {
+        ParallelMath::ScalarUInt16 range = ParallelMath::Extract(bestRange, block);
+        assert(range == 3 || range == 4);
+
+        ParallelMath::ScalarUInt16 compressedEP[2];
+        for (int ep = 0; ep < 2; ep++)
+        {
+            ParallelMath::ScalarUInt16 endPoint[3];
+            for (int ch = 0; ch < 3; ch++)
+                endPoint[ch] = ParallelMath::Extract(bestEndpoints[ep][ch], block);
+
+            int compressed = (endPoint[0] & 0xf8) << 8;
+            compressed |= (endPoint[1] & 0xfc) << 3;
+            compressed |= (endPoint[2] & 0xf8) >> 3;
+
+            compressedEP[ep] = static_cast<ParallelMath::ScalarUInt16>(compressed);
+        }
+
+        int indexOrder[4];
+
+        if (range == 4)
+        {
+            if (compressedEP[0] == compressedEP[1])
+            {
+                indexOrder[0] = 0;
+                indexOrder[1] = 0;
+                indexOrder[2] = 0;
+                indexOrder[3] = 0;
+            }
+            else if (compressedEP[0] < compressedEP[1])
+            {
+                std::swap(compressedEP[0], compressedEP[1]);
+                indexOrder[0] = 1;
+                indexOrder[1] = 3;
+                indexOrder[2] = 2;
+                indexOrder[3] = 0;
+            }
+            else
+            {
+                indexOrder[0] = 0;
+                indexOrder[1] = 2;
+                indexOrder[2] = 3;
+                indexOrder[3] = 1;
+            }
+        }
+        else
+        {
+            assert(range == 3);
+
+            if (compressedEP[0] > compressedEP[1])
+            {
+                std::swap(compressedEP[0], compressedEP[1]);
+                indexOrder[0] = 1;
+                indexOrder[1] = 2;
+                indexOrder[2] = 0;
+            }
+            else
+            {
+                indexOrder[0] = 0;
+                indexOrder[1] = 2;
+                indexOrder[2] = 1;
+            }
+            indexOrder[3] = 3;
+        }
+
+        packedBlocks[0] = static_cast<uint8_t>(compressedEP[0] & 0xff);
+        packedBlocks[1] = static_cast<uint8_t>((compressedEP[0] >> 8) & 0xff);
+        packedBlocks[2] = static_cast<uint8_t>(compressedEP[1] & 0xff);
+        packedBlocks[3] = static_cast<uint8_t>((compressedEP[1] >> 8) & 0xff);
+
+        for (int i = 0; i < 16; i += 4)
+        {
+            int packedIndexes = 0;
+            for (int subi = 0; subi < 4; subi++)
+            {
+                ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[i + subi], block);
+                packedIndexes |= (indexOrder[index] << (subi * 2));
+            }
+
+            packedBlocks[4 + i / 4] = static_cast<uint8_t>(packedIndexes);
+        }
+
+        packedBlocks += packedBlockStride;
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_S3TC.h b/thirdparty/cvtt/ConvectionKernels_S3TC.h
new file mode 100644
index 0000000000..aa197229c2
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_S3TC.h
@@ -0,0 +1,51 @@
+#pragma once
+#ifndef __CVTT_S3TC_H__
+#define __CVTT_S3TC_H__
+
+#include "ConvectionKernels_ParallelMath.h"
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        template<int TVectorSize>
+        class EndpointRefiner;
+    }
+
+    struct PixelBlockU8;
+}
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        class S3TCComputer
+        {
+        public:
+            typedef ParallelMath::Float MFloat;
+            typedef ParallelMath::SInt16 MSInt16;
+            typedef ParallelMath::UInt15 MUInt15;
+            typedef ParallelMath::UInt16 MUInt16;
+            typedef ParallelMath::SInt32 MSInt32;
+
+            static void Init(MFloat& error);
+            static void QuantizeTo6Bits(MUInt15& v);
+            static void QuantizeTo5Bits(MUInt15& v);
+            static void QuantizeTo565(MUInt15 endPoint[3]);
+            static MFloat ParanoidFactorForSpan(const MSInt16& span);
+            static MFloat ParanoidDiff(const MUInt15& a, const MUInt15& b, const MFloat& d);
+            static void TestSingleColor(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], int range, const float* channelWeights,
+                MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, const ParallelMath::RoundTowardNearestForScope *rtn);
+            static void TestEndpoints(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], const MUInt15 unquantizedEndPoints[2][3], int range, const float* channelWeights,
+                MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, EndpointRefiner<3> *refiner, const ParallelMath::RoundTowardNearestForScope *rtn);
+            static void TestCounts(uint32_t flags, const int *counts, int nCounts, const MUInt15 &numElements, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], bool alphaTest,
+                const MFloat floatSortedInputs[16][4], const MFloat preWeightedFloatSortedInputs[16][4], const float *channelWeights, MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange,
+                const ParallelMath::RoundTowardNearestForScope* rtn);
+            static void PackExplicitAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride);
+            static void PackInterpolatedAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride, bool isSigned, int maxTweakRounds, int numRefineRounds);
+            static void PackRGB(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, size_t packedBlockStride, const float channelWeights[4], bool alphaTest, float alphaThreshold, bool exhaustive, int maxTweakRounds, int numRefineRounds);
+        };
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_S3TC_SingleColor.h b/thirdparty/cvtt/ConvectionKernels_S3TC_SingleColor.h
new file mode 100644
index 0000000000..c772b163c2
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_S3TC_SingleColor.h
@@ -0,0 +1,304 @@
+#pragma once
+#include <stdint.h>
+
+// This file is generated by the MakeTables app.  Do not edit this file manually.
+
+namespace cvtt { namespace Tables { namespace S3TCSC {
+
+struct TableEntry
+{
+    uint8_t m_min;
+    uint8_t m_max;
+    uint8_t m_actualColor;
+    uint8_t m_span;
+};
+
+TableEntry g_singleColor5_3[256] =
+{
+    { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 2, 8 }, { 0, 8, 2, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 8, 8, 0 },
+    { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 10, 8 }, { 0, 33, 11, 33 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 16, 16, 0 },
+    { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 18, 8 }, { 8, 41, 19, 33 }, { 24, 16, 21, 8 }, { 24, 16, 21, 8 }, { 33, 0, 22, 33 }, { 24, 24, 24, 0 },
+    { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 41, 29, 17 }, { 33, 24, 30, 9 }, { 33, 24, 30, 9 },
+    { 24, 49, 32, 25 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 33, 41, 35, 8 }, { 33, 41, 35, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 },
+    { 49, 24, 40, 25 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 43, 8 }, { 33, 66, 44, 33 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 },
+    { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 51, 8 }, { 41, 74, 52, 33 }, { 57, 49, 54, 8 }, { 57, 49, 54, 8 }, { 66, 33, 55, 33 },
+    { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 74, 62, 17 }, { 66, 57, 63, 9 },
+    { 66, 57, 63, 9 }, { 57, 82, 65, 25 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 66, 74, 68, 8 }, { 66, 74, 68, 8 }, { 74, 66, 71, 8 }, { 74, 66, 71, 8 },
+    { 74, 66, 71, 8 }, { 82, 57, 73, 25 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 76, 8 }, { 66, 99, 77, 33 }, { 82, 74, 79, 8 }, { 82, 74, 79, 8 },
+    { 82, 74, 79, 8 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 84, 8 }, { 74, 107, 85, 33 }, { 90, 82, 87, 8 }, { 90, 82, 87, 8 },
+    { 99, 66, 88, 33 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 107, 95, 17 },
+    { 99, 90, 96, 9 }, { 99, 90, 96, 9 }, { 90, 115, 98, 25 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 99, 107, 101, 8 }, { 99, 107, 101, 8 }, { 107, 99, 104, 8 },
+    { 107, 99, 104, 8 }, { 107, 99, 104, 8 }, { 115, 90, 106, 25 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 109, 8 }, { 99, 132, 110, 33 }, { 115, 107, 112, 8 },
+    { 115, 107, 112, 8 }, { 115, 107, 112, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 117, 8 }, { 107, 140, 118, 33 }, { 123, 115, 120, 8 },
+    { 123, 115, 120, 8 }, { 132, 99, 121, 33 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 },
+    { 123, 140, 128, 17 }, { 132, 123, 129, 9 }, { 132, 123, 129, 9 }, { 123, 148, 131, 25 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 132, 140, 134, 8 }, { 132, 140, 134, 8 },
+    { 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 148, 123, 139, 25 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 142, 8 }, { 132, 165, 143, 33 },
+    { 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 150, 8 }, { 140, 173, 151, 33 },
+    { 156, 148, 153, 8 }, { 156, 148, 153, 8 }, { 165, 132, 154, 33 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 159, 9 }, { 156, 165, 159, 9 },
+    { 156, 165, 159, 9 }, { 156, 173, 161, 17 }, { 165, 156, 162, 9 }, { 165, 156, 162, 9 }, { 156, 181, 164, 25 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 165, 173, 167, 8 },
+    { 165, 173, 167, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 181, 156, 172, 25 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 181, 175, 8 },
+    { 165, 198, 176, 33 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 189, 183, 8 },
+    { 173, 206, 184, 33 }, { 189, 181, 186, 8 }, { 189, 181, 186, 8 }, { 198, 165, 187, 33 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 198, 192, 9 },
+    { 189, 198, 192, 9 }, { 189, 198, 192, 9 }, { 189, 206, 194, 17 }, { 198, 189, 195, 9 }, { 198, 189, 195, 9 }, { 189, 214, 197, 25 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
+    { 198, 206, 200, 8 }, { 198, 206, 200, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 214, 189, 205, 25 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
+    { 206, 214, 208, 8 }, { 198, 231, 209, 33 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
+    { 214, 222, 216, 8 }, { 206, 239, 217, 33 }, { 222, 214, 219, 8 }, { 222, 214, 219, 8 }, { 231, 198, 220, 33 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
+    { 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 239, 227, 17 }, { 231, 222, 228, 9 }, { 231, 222, 228, 9 }, { 222, 247, 230, 25 }, { 231, 231, 231, 0 },
+    { 231, 231, 231, 0 }, { 231, 239, 233, 8 }, { 231, 239, 233, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 247, 222, 238, 25 }, { 239, 239, 239, 0 },
+    { 239, 239, 239, 0 }, { 239, 247, 241, 8 }, { 239, 247, 241, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
+    { 247, 247, 247, 0 }, { 247, 255, 249, 8 }, { 247, 255, 249, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
+};
+
+TableEntry g_singleColor6_3[256] =
+{
+    { 0, 0, 0, 0 }, { 0, 4, 1, 4 }, { 4, 0, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 5, 4 }, { 8, 4, 6, 4 }, { 8, 8, 8, 0 },
+    { 8, 8, 8, 0 }, { 8, 12, 9, 4 }, { 12, 8, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 13, 4 }, { 16, 12, 14, 4 }, { 16, 16, 16, 0 },
+    { 16, 16, 16, 0 }, { 16, 20, 17, 4 }, { 20, 16, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 21, 4 }, { 24, 20, 22, 4 }, { 0, 69, 23, 69 },
+    { 24, 24, 24, 0 }, { 24, 28, 25, 4 }, { 28, 24, 26, 4 }, { 8, 65, 27, 57 }, { 28, 28, 28, 0 }, { 28, 32, 29, 4 }, { 32, 28, 30, 4 }, { 12, 69, 31, 57 },
+    { 32, 32, 32, 0 }, { 32, 36, 33, 4 }, { 36, 32, 34, 4 }, { 20, 65, 35, 45 }, { 36, 36, 36, 0 }, { 36, 40, 37, 4 }, { 40, 36, 38, 4 }, { 24, 69, 39, 45 },
+    { 40, 40, 40, 0 }, { 40, 44, 41, 4 }, { 44, 40, 42, 4 }, { 32, 65, 43, 33 }, { 44, 44, 44, 0 }, { 44, 48, 45, 4 }, { 48, 44, 46, 4 }, { 36, 69, 47, 33 },
+    { 48, 48, 48, 0 }, { 48, 52, 49, 4 }, { 52, 48, 50, 4 }, { 44, 65, 51, 21 }, { 52, 52, 52, 0 }, { 52, 56, 53, 4 }, { 56, 52, 54, 4 }, { 48, 69, 55, 21 },
+    { 56, 56, 56, 0 }, { 56, 60, 57, 4 }, { 60, 56, 58, 4 }, { 56, 65, 59, 9 }, { 60, 60, 60, 0 }, { 60, 65, 61, 5 }, { 65, 56, 62, 9 }, { 65, 60, 63, 5 },
+    { 60, 73, 64, 13 }, { 65, 65, 65, 0 }, { 65, 69, 66, 4 }, { 69, 65, 67, 4 }, { 73, 60, 68, 13 }, { 69, 69, 69, 0 }, { 69, 73, 70, 4 }, { 73, 69, 71, 4 },
+    { 81, 56, 72, 25 }, { 73, 73, 73, 0 }, { 73, 77, 74, 4 }, { 77, 73, 75, 4 }, { 85, 60, 76, 25 }, { 77, 77, 77, 0 }, { 77, 81, 78, 4 }, { 81, 77, 79, 4 },
+    { 93, 56, 80, 37 }, { 81, 81, 81, 0 }, { 81, 85, 82, 4 }, { 85, 81, 83, 4 }, { 97, 60, 84, 37 }, { 85, 85, 85, 0 }, { 85, 89, 86, 4 }, { 89, 85, 87, 4 },
+    { 105, 56, 88, 49 }, { 89, 89, 89, 0 }, { 89, 93, 90, 4 }, { 93, 89, 91, 4 }, { 109, 60, 92, 49 }, { 93, 93, 93, 0 }, { 93, 97, 94, 4 }, { 97, 93, 95, 4 },
+    { 77, 134, 96, 57 }, { 97, 97, 97, 0 }, { 97, 101, 98, 4 }, { 101, 97, 99, 4 }, { 85, 130, 100, 45 }, { 101, 101, 101, 0 }, { 101, 105, 102, 4 }, { 105, 101, 103, 4 },
+    { 89, 134, 104, 45 }, { 105, 105, 105, 0 }, { 105, 109, 106, 4 }, { 109, 105, 107, 4 }, { 97, 130, 108, 33 }, { 109, 109, 109, 0 }, { 109, 113, 110, 4 }, { 113, 109, 111, 4 },
+    { 101, 134, 112, 33 }, { 113, 113, 113, 0 }, { 113, 117, 114, 4 }, { 117, 113, 115, 4 }, { 109, 130, 116, 21 }, { 117, 117, 117, 0 }, { 117, 121, 118, 4 }, { 121, 117, 119, 4 },
+    { 113, 134, 120, 21 }, { 121, 121, 121, 0 }, { 121, 125, 122, 4 }, { 125, 121, 123, 4 }, { 121, 130, 124, 9 }, { 125, 125, 125, 0 }, { 125, 130, 126, 5 }, { 130, 121, 127, 9 },
+    { 130, 125, 128, 5 }, { 125, 138, 129, 13 }, { 130, 130, 130, 0 }, { 130, 134, 131, 4 }, { 134, 130, 132, 4 }, { 138, 125, 133, 13 }, { 134, 134, 134, 0 }, { 134, 138, 135, 4 },
+    { 138, 134, 136, 4 }, { 146, 121, 137, 25 }, { 138, 138, 138, 0 }, { 138, 142, 139, 4 }, { 142, 138, 140, 4 }, { 150, 125, 141, 25 }, { 142, 142, 142, 0 }, { 142, 146, 143, 4 },
+    { 146, 142, 144, 4 }, { 158, 121, 145, 37 }, { 146, 146, 146, 0 }, { 146, 150, 147, 4 }, { 150, 146, 148, 4 }, { 162, 125, 149, 37 }, { 150, 150, 150, 0 }, { 150, 154, 151, 4 },
+    { 154, 150, 152, 4 }, { 170, 121, 153, 49 }, { 154, 154, 154, 0 }, { 154, 158, 155, 4 }, { 158, 154, 156, 4 }, { 174, 125, 157, 49 }, { 158, 158, 158, 0 }, { 158, 162, 159, 4 },
+    { 162, 158, 160, 4 }, { 142, 199, 161, 57 }, { 162, 162, 162, 0 }, { 162, 166, 163, 4 }, { 166, 162, 164, 4 }, { 150, 195, 165, 45 }, { 166, 166, 166, 0 }, { 166, 170, 167, 4 },
+    { 170, 166, 168, 4 }, { 154, 199, 169, 45 }, { 170, 170, 170, 0 }, { 170, 174, 171, 4 }, { 174, 170, 172, 4 }, { 162, 195, 173, 33 }, { 174, 174, 174, 0 }, { 174, 178, 175, 4 },
+    { 178, 174, 176, 4 }, { 166, 199, 177, 33 }, { 178, 178, 178, 0 }, { 178, 182, 179, 4 }, { 182, 178, 180, 4 }, { 174, 195, 181, 21 }, { 182, 182, 182, 0 }, { 182, 186, 183, 4 },
+    { 186, 182, 184, 4 }, { 178, 199, 185, 21 }, { 186, 186, 186, 0 }, { 186, 190, 187, 4 }, { 190, 186, 188, 4 }, { 186, 195, 189, 9 }, { 190, 190, 190, 0 }, { 190, 195, 191, 5 },
+    { 195, 186, 192, 9 }, { 195, 190, 193, 5 }, { 190, 203, 194, 13 }, { 195, 195, 195, 0 }, { 195, 199, 196, 4 }, { 199, 195, 197, 4 }, { 203, 190, 198, 13 }, { 199, 199, 199, 0 },
+    { 199, 203, 200, 4 }, { 203, 199, 201, 4 }, { 211, 186, 202, 25 }, { 203, 203, 203, 0 }, { 203, 207, 204, 4 }, { 207, 203, 205, 4 }, { 215, 190, 206, 25 }, { 207, 207, 207, 0 },
+    { 207, 211, 208, 4 }, { 211, 207, 209, 4 }, { 223, 186, 210, 37 }, { 211, 211, 211, 0 }, { 211, 215, 212, 4 }, { 215, 211, 213, 4 }, { 227, 190, 214, 37 }, { 215, 215, 215, 0 },
+    { 215, 219, 216, 4 }, { 219, 215, 217, 4 }, { 235, 186, 218, 49 }, { 219, 219, 219, 0 }, { 219, 223, 220, 4 }, { 223, 219, 221, 4 }, { 239, 190, 222, 49 }, { 223, 223, 223, 0 },
+    { 223, 227, 224, 4 }, { 227, 223, 225, 4 }, { 247, 186, 226, 61 }, { 227, 227, 227, 0 }, { 227, 231, 228, 4 }, { 231, 227, 229, 4 }, { 251, 190, 230, 61 }, { 231, 231, 231, 0 },
+    { 231, 235, 232, 4 }, { 235, 231, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 236, 4 }, { 239, 235, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
+    { 239, 243, 240, 4 }, { 243, 239, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 244, 4 }, { 247, 243, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
+    { 247, 251, 248, 4 }, { 251, 247, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 252, 4 }, { 255, 251, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
+};
+
+TableEntry g_singleColor5_2[256] =
+{
+    { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 },
+    { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 },
+    { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 },
+    { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 41, 32, 17 },
+    { 24, 41, 32, 17 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 24, 49, 36, 25 }, { 24, 49, 36, 25 }, { 33, 41, 37, 8 }, { 33, 41, 37, 8 }, { 24, 57, 40, 33 },
+    { 24, 57, 40, 33 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 49, 49, 49, 0 },
+    { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 57, 57, 57, 0 },
+    { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 },
+    { 57, 74, 65, 17 }, { 57, 74, 65, 17 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 57, 82, 69, 25 }, { 57, 82, 69, 25 }, { 66, 74, 70, 8 }, { 66, 74, 70, 8 },
+    { 57, 90, 73, 33 }, { 57, 90, 73, 33 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 },
+    { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 },
+    { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 },
+    { 90, 99, 94, 9 }, { 90, 107, 98, 17 }, { 90, 107, 98, 17 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 90, 115, 102, 25 }, { 90, 115, 102, 25 }, { 99, 107, 103, 8 },
+    { 99, 107, 103, 8 }, { 90, 123, 106, 33 }, { 90, 123, 106, 33 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 111, 8 }, { 107, 115, 111, 8 },
+    { 107, 115, 111, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 119, 8 }, { 115, 123, 119, 8 },
+    { 115, 123, 119, 8 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 127, 9 }, { 123, 132, 127, 9 },
+    { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, { 123, 140, 131, 17 }, { 123, 140, 131, 17 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 123, 148, 135, 25 }, { 123, 148, 135, 25 },
+    { 132, 140, 136, 8 }, { 132, 140, 136, 8 }, { 123, 156, 139, 33 }, { 123, 156, 139, 33 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 144, 8 },
+    { 140, 148, 144, 8 }, { 140, 148, 144, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 152, 8 },
+    { 148, 156, 152, 8 }, { 148, 156, 152, 8 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 160, 9 },
+    { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 173, 164, 17 }, { 156, 173, 164, 17 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 156, 181, 168, 25 },
+    { 156, 181, 168, 25 }, { 165, 173, 169, 8 }, { 165, 173, 169, 8 }, { 156, 189, 172, 33 }, { 156, 189, 172, 33 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 },
+    { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 },
+    { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 },
+    { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 206, 197, 17 }, { 189, 206, 197, 17 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
+    { 189, 214, 201, 25 }, { 189, 214, 201, 25 }, { 198, 206, 202, 8 }, { 198, 206, 202, 8 }, { 189, 222, 205, 33 }, { 189, 222, 205, 33 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
+    { 206, 206, 206, 0 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
+    { 214, 214, 214, 0 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
+    { 222, 222, 222, 0 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 239, 230, 17 }, { 222, 239, 230, 17 }, { 231, 231, 231, 0 },
+    { 231, 231, 231, 0 }, { 222, 247, 234, 25 }, { 222, 247, 234, 25 }, { 231, 239, 235, 8 }, { 231, 239, 235, 8 }, { 222, 255, 238, 33 }, { 222, 255, 238, 33 }, { 239, 239, 239, 0 },
+    { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
+    { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
+};
+
+TableEntry g_singleColor6_2[256] =
+{
+    { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 },
+    { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 },
+    { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 },
+    { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 },
+    { 32, 32, 32, 0 }, { 32, 32, 32, 0 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 },
+    { 40, 40, 40, 0 }, { 40, 40, 40, 0 }, { 40, 44, 42, 4 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 48, 46, 4 }, { 48, 48, 48, 0 },
+    { 48, 48, 48, 0 }, { 48, 48, 48, 0 }, { 48, 52, 50, 4 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 56, 54, 4 }, { 56, 56, 56, 0 },
+    { 56, 56, 56, 0 }, { 56, 56, 56, 0 }, { 56, 60, 58, 4 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 65, 62, 5 }, { 60, 65, 62, 5 },
+    { 60, 69, 64, 9 }, { 65, 65, 65, 0 }, { 60, 73, 66, 13 }, { 65, 69, 67, 4 }, { 60, 77, 68, 17 }, { 69, 69, 69, 0 }, { 60, 81, 70, 21 }, { 69, 73, 71, 4 },
+    { 60, 85, 72, 25 }, { 73, 73, 73, 0 }, { 60, 89, 74, 29 }, { 73, 77, 75, 4 }, { 60, 93, 76, 33 }, { 77, 77, 77, 0 }, { 60, 97, 78, 37 }, { 77, 81, 79, 4 },
+    { 60, 101, 80, 41 }, { 81, 81, 81, 0 }, { 60, 105, 82, 45 }, { 81, 85, 83, 4 }, { 60, 109, 84, 49 }, { 85, 85, 85, 0 }, { 60, 113, 86, 53 }, { 85, 89, 87, 4 },
+    { 60, 117, 88, 57 }, { 89, 89, 89, 0 }, { 60, 121, 90, 61 }, { 89, 93, 91, 4 }, { 60, 125, 92, 65 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 95, 4 },
+    { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 103, 4 },
+    { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 107, 4 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 113, 111, 4 },
+    { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 117, 115, 4 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 121, 119, 4 },
+    { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 125, 123, 4 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 130, 127, 5 },
+    { 125, 130, 127, 5 }, { 125, 134, 129, 9 }, { 130, 130, 130, 0 }, { 125, 138, 131, 13 }, { 130, 134, 132, 4 }, { 125, 142, 133, 17 }, { 134, 134, 134, 0 }, { 125, 146, 135, 21 },
+    { 134, 138, 136, 4 }, { 125, 150, 137, 25 }, { 138, 138, 138, 0 }, { 125, 154, 139, 29 }, { 138, 142, 140, 4 }, { 125, 158, 141, 33 }, { 142, 142, 142, 0 }, { 125, 162, 143, 37 },
+    { 142, 146, 144, 4 }, { 125, 166, 145, 41 }, { 146, 146, 146, 0 }, { 125, 170, 147, 45 }, { 146, 150, 148, 4 }, { 125, 174, 149, 49 }, { 150, 150, 150, 0 }, { 125, 178, 151, 53 },
+    { 150, 154, 152, 4 }, { 125, 182, 153, 57 }, { 154, 154, 154, 0 }, { 125, 186, 155, 61 }, { 154, 158, 156, 4 }, { 125, 190, 157, 65 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 },
+    { 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 },
+    { 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 172, 4 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 },
+    { 174, 178, 176, 4 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 182, 180, 4 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 },
+    { 182, 186, 184, 4 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 190, 188, 4 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 },
+    { 190, 195, 192, 5 }, { 190, 195, 192, 5 }, { 190, 199, 194, 9 }, { 195, 195, 195, 0 }, { 190, 203, 196, 13 }, { 195, 199, 197, 4 }, { 190, 207, 198, 17 }, { 199, 199, 199, 0 },
+    { 190, 211, 200, 21 }, { 199, 203, 201, 4 }, { 190, 215, 202, 25 }, { 203, 203, 203, 0 }, { 190, 219, 204, 29 }, { 203, 207, 205, 4 }, { 190, 223, 206, 33 }, { 207, 207, 207, 0 },
+    { 190, 227, 208, 37 }, { 207, 211, 209, 4 }, { 190, 231, 210, 41 }, { 211, 211, 211, 0 }, { 190, 235, 212, 45 }, { 211, 215, 213, 4 }, { 190, 239, 214, 49 }, { 215, 215, 215, 0 },
+    { 190, 243, 216, 53 }, { 215, 219, 217, 4 }, { 190, 247, 218, 57 }, { 219, 219, 219, 0 }, { 190, 251, 220, 61 }, { 219, 223, 221, 4 }, { 190, 255, 222, 65 }, { 223, 223, 223, 0 },
+    { 223, 223, 223, 0 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 },
+    { 231, 231, 231, 0 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
+    { 239, 239, 239, 0 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
+    { 247, 247, 247, 0 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
+};
+
+TableEntry g_singleColor5_3_p[256] =
+{
+    { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 2, 8 }, { 0, 8, 2, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 8, 8, 0 },
+    { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 10, 8 }, { 0, 33, 11, 33 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 16, 16, 0 },
+    { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 18, 8 }, { 8, 41, 19, 33 }, { 24, 16, 21, 8 }, { 24, 16, 21, 8 }, { 33, 0, 22, 33 }, { 24, 24, 24, 0 },
+    { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 41, 29, 17 }, { 33, 24, 30, 9 }, { 33, 24, 30, 9 },
+    { 24, 49, 32, 25 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 33, 41, 35, 8 }, { 33, 41, 35, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 },
+    { 49, 24, 40, 25 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 43, 8 }, { 33, 66, 44, 33 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 },
+    { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 51, 8 }, { 41, 74, 52, 33 }, { 57, 49, 54, 8 }, { 57, 49, 54, 8 }, { 66, 33, 55, 33 },
+    { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 74, 62, 17 }, { 66, 57, 63, 9 },
+    { 66, 57, 63, 9 }, { 57, 82, 65, 25 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 66, 74, 68, 8 }, { 66, 74, 68, 8 }, { 74, 66, 71, 8 }, { 74, 66, 71, 8 },
+    { 74, 66, 71, 8 }, { 82, 57, 73, 25 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 76, 8 }, { 66, 99, 77, 33 }, { 82, 74, 79, 8 }, { 82, 74, 79, 8 },
+    { 82, 74, 79, 8 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 84, 8 }, { 74, 107, 85, 33 }, { 90, 82, 87, 8 }, { 90, 82, 87, 8 },
+    { 99, 66, 88, 33 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 107, 95, 17 },
+    { 99, 90, 96, 9 }, { 99, 90, 96, 9 }, { 90, 115, 98, 25 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 99, 107, 101, 8 }, { 99, 107, 101, 8 }, { 107, 99, 104, 8 },
+    { 107, 99, 104, 8 }, { 107, 99, 104, 8 }, { 115, 90, 106, 25 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 109, 8 }, { 99, 132, 110, 33 }, { 115, 107, 112, 8 },
+    { 115, 107, 112, 8 }, { 115, 107, 112, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 117, 8 }, { 107, 140, 118, 33 }, { 123, 115, 120, 8 },
+    { 123, 115, 120, 8 }, { 132, 99, 121, 33 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 },
+    { 123, 140, 128, 17 }, { 132, 123, 129, 9 }, { 132, 123, 129, 9 }, { 123, 148, 131, 25 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 132, 140, 134, 8 }, { 132, 140, 134, 8 },
+    { 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 148, 123, 139, 25 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 142, 8 }, { 132, 165, 143, 33 },
+    { 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 150, 8 }, { 140, 173, 151, 33 },
+    { 156, 148, 153, 8 }, { 156, 148, 153, 8 }, { 165, 132, 154, 33 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 159, 9 }, { 156, 165, 159, 9 },
+    { 156, 165, 159, 9 }, { 156, 173, 161, 17 }, { 165, 156, 162, 9 }, { 165, 156, 162, 9 }, { 156, 181, 164, 25 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 165, 173, 167, 8 },
+    { 165, 173, 167, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 181, 156, 172, 25 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 181, 175, 8 },
+    { 165, 198, 176, 33 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 189, 183, 8 },
+    { 173, 206, 184, 33 }, { 189, 181, 186, 8 }, { 189, 181, 186, 8 }, { 198, 165, 187, 33 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 198, 192, 9 },
+    { 189, 198, 192, 9 }, { 189, 198, 192, 9 }, { 189, 206, 194, 17 }, { 198, 189, 195, 9 }, { 198, 189, 195, 9 }, { 189, 214, 197, 25 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
+    { 198, 206, 200, 8 }, { 198, 206, 200, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 214, 189, 205, 25 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
+    { 206, 214, 208, 8 }, { 198, 231, 209, 33 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
+    { 214, 222, 216, 8 }, { 206, 239, 217, 33 }, { 222, 214, 219, 8 }, { 222, 214, 219, 8 }, { 231, 198, 220, 33 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
+    { 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 239, 227, 17 }, { 231, 222, 228, 9 }, { 231, 222, 228, 9 }, { 222, 247, 230, 25 }, { 231, 231, 231, 0 },
+    { 231, 231, 231, 0 }, { 231, 239, 233, 8 }, { 231, 239, 233, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 247, 222, 238, 25 }, { 239, 239, 239, 0 },
+    { 239, 239, 239, 0 }, { 239, 247, 241, 8 }, { 239, 247, 241, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
+    { 247, 247, 247, 0 }, { 247, 255, 249, 8 }, { 247, 255, 249, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
+};
+
+TableEntry g_singleColor6_3_p[256] =
+{
+    { 0, 0, 0, 0 }, { 0, 4, 1, 4 }, { 4, 0, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 5, 4 }, { 8, 4, 6, 4 }, { 8, 8, 8, 0 },
+    { 8, 8, 8, 0 }, { 8, 12, 9, 4 }, { 12, 8, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 13, 4 }, { 16, 12, 14, 4 }, { 16, 16, 16, 0 },
+    { 16, 16, 16, 0 }, { 16, 20, 17, 4 }, { 20, 16, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 21, 4 }, { 24, 20, 22, 4 }, { 24, 24, 24, 0 },
+    { 24, 24, 24, 0 }, { 24, 28, 25, 4 }, { 28, 24, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 29, 4 }, { 32, 28, 30, 4 }, { 32, 32, 32, 0 },
+    { 32, 32, 32, 0 }, { 32, 36, 33, 4 }, { 36, 32, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 37, 4 }, { 40, 36, 38, 4 }, { 40, 40, 40, 0 },
+    { 40, 40, 40, 0 }, { 40, 44, 41, 4 }, { 44, 40, 42, 4 }, { 32, 65, 43, 33 }, { 44, 44, 44, 0 }, { 44, 48, 45, 4 }, { 48, 44, 46, 4 }, { 36, 69, 47, 33 },
+    { 48, 48, 48, 0 }, { 48, 52, 49, 4 }, { 52, 48, 50, 4 }, { 44, 65, 51, 21 }, { 52, 52, 52, 0 }, { 52, 56, 53, 4 }, { 56, 52, 54, 4 }, { 48, 69, 55, 21 },
+    { 56, 56, 56, 0 }, { 56, 60, 57, 4 }, { 60, 56, 58, 4 }, { 56, 65, 59, 9 }, { 60, 60, 60, 0 }, { 60, 65, 61, 5 }, { 65, 56, 62, 9 }, { 65, 60, 63, 5 },
+    { 60, 73, 64, 13 }, { 65, 65, 65, 0 }, { 65, 69, 66, 4 }, { 69, 65, 67, 4 }, { 73, 60, 68, 13 }, { 69, 69, 69, 0 }, { 69, 73, 70, 4 }, { 73, 69, 71, 4 },
+    { 81, 56, 72, 25 }, { 73, 73, 73, 0 }, { 73, 77, 74, 4 }, { 77, 73, 75, 4 }, { 85, 60, 76, 25 }, { 77, 77, 77, 0 }, { 77, 81, 78, 4 }, { 81, 77, 79, 4 },
+    { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 85, 82, 4 }, { 85, 81, 83, 4 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 89, 86, 4 }, { 89, 85, 87, 4 },
+    { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 93, 90, 4 }, { 93, 89, 91, 4 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 94, 4 }, { 97, 93, 95, 4 },
+    { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 98, 4 }, { 101, 97, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 102, 4 }, { 105, 101, 103, 4 },
+    { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 106, 4 }, { 109, 105, 107, 4 }, { 97, 130, 108, 33 }, { 109, 109, 109, 0 }, { 109, 113, 110, 4 }, { 113, 109, 111, 4 },
+    { 101, 134, 112, 33 }, { 113, 113, 113, 0 }, { 113, 117, 114, 4 }, { 117, 113, 115, 4 }, { 109, 130, 116, 21 }, { 117, 117, 117, 0 }, { 117, 121, 118, 4 }, { 121, 117, 119, 4 },
+    { 113, 134, 120, 21 }, { 121, 121, 121, 0 }, { 121, 125, 122, 4 }, { 125, 121, 123, 4 }, { 121, 130, 124, 9 }, { 125, 125, 125, 0 }, { 125, 130, 126, 5 }, { 130, 121, 127, 9 },
+    { 130, 125, 128, 5 }, { 125, 138, 129, 13 }, { 130, 130, 130, 0 }, { 130, 134, 131, 4 }, { 134, 130, 132, 4 }, { 138, 125, 133, 13 }, { 134, 134, 134, 0 }, { 134, 138, 135, 4 },
+    { 138, 134, 136, 4 }, { 146, 121, 137, 25 }, { 138, 138, 138, 0 }, { 138, 142, 139, 4 }, { 142, 138, 140, 4 }, { 150, 125, 141, 25 }, { 142, 142, 142, 0 }, { 142, 146, 143, 4 },
+    { 146, 142, 144, 4 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 150, 147, 4 }, { 150, 146, 148, 4 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, { 150, 154, 151, 4 },
+    { 154, 150, 152, 4 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 158, 155, 4 }, { 158, 154, 156, 4 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, { 158, 162, 159, 4 },
+    { 162, 158, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 163, 4 }, { 166, 162, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 170, 167, 4 },
+    { 170, 166, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 171, 4 }, { 174, 170, 172, 4 }, { 162, 195, 173, 33 }, { 174, 174, 174, 0 }, { 174, 178, 175, 4 },
+    { 178, 174, 176, 4 }, { 166, 199, 177, 33 }, { 178, 178, 178, 0 }, { 178, 182, 179, 4 }, { 182, 178, 180, 4 }, { 174, 195, 181, 21 }, { 182, 182, 182, 0 }, { 182, 186, 183, 4 },
+    { 186, 182, 184, 4 }, { 178, 199, 185, 21 }, { 186, 186, 186, 0 }, { 186, 190, 187, 4 }, { 190, 186, 188, 4 }, { 186, 195, 189, 9 }, { 190, 190, 190, 0 }, { 190, 195, 191, 5 },
+    { 195, 186, 192, 9 }, { 195, 190, 193, 5 }, { 190, 203, 194, 13 }, { 195, 195, 195, 0 }, { 195, 199, 196, 4 }, { 199, 195, 197, 4 }, { 203, 190, 198, 13 }, { 199, 199, 199, 0 },
+    { 199, 203, 200, 4 }, { 203, 199, 201, 4 }, { 211, 186, 202, 25 }, { 203, 203, 203, 0 }, { 203, 207, 204, 4 }, { 207, 203, 205, 4 }, { 215, 190, 206, 25 }, { 207, 207, 207, 0 },
+    { 207, 211, 208, 4 }, { 211, 207, 209, 4 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 215, 212, 4 }, { 215, 211, 213, 4 }, { 215, 215, 215, 0 }, { 215, 215, 215, 0 },
+    { 215, 219, 216, 4 }, { 219, 215, 217, 4 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 223, 220, 4 }, { 223, 219, 221, 4 }, { 223, 223, 223, 0 }, { 223, 223, 223, 0 },
+    { 223, 227, 224, 4 }, { 227, 223, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 228, 4 }, { 231, 227, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 },
+    { 231, 235, 232, 4 }, { 235, 231, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 236, 4 }, { 239, 235, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
+    { 239, 243, 240, 4 }, { 243, 239, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 244, 4 }, { 247, 243, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
+    { 247, 251, 248, 4 }, { 251, 247, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 252, 4 }, { 255, 251, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
+};
+
+TableEntry g_singleColor5_2_p[256] =
+{
+    { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 },
+    { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 },
+    { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 },
+    { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 41, 32, 17 },
+    { 24, 41, 32, 17 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 24, 49, 36, 25 }, { 24, 49, 36, 25 }, { 33, 41, 37, 8 }, { 33, 41, 37, 8 }, { 24, 57, 40, 33 },
+    { 24, 57, 40, 33 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 49, 49, 49, 0 },
+    { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 57, 57, 57, 0 },
+    { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 },
+    { 57, 74, 65, 17 }, { 57, 74, 65, 17 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 57, 82, 69, 25 }, { 57, 82, 69, 25 }, { 66, 74, 70, 8 }, { 66, 74, 70, 8 },
+    { 57, 90, 73, 33 }, { 57, 90, 73, 33 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 },
+    { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 },
+    { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 },
+    { 90, 99, 94, 9 }, { 90, 107, 98, 17 }, { 90, 107, 98, 17 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 90, 115, 102, 25 }, { 90, 115, 102, 25 }, { 99, 107, 103, 8 },
+    { 99, 107, 103, 8 }, { 90, 123, 106, 33 }, { 90, 123, 106, 33 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 111, 8 }, { 107, 115, 111, 8 },
+    { 107, 115, 111, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 119, 8 }, { 115, 123, 119, 8 },
+    { 115, 123, 119, 8 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 127, 9 }, { 123, 132, 127, 9 },
+    { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, { 123, 140, 131, 17 }, { 123, 140, 131, 17 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 123, 148, 135, 25 }, { 123, 148, 135, 25 },
+    { 132, 140, 136, 8 }, { 132, 140, 136, 8 }, { 123, 156, 139, 33 }, { 123, 156, 139, 33 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 144, 8 },
+    { 140, 148, 144, 8 }, { 140, 148, 144, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 152, 8 },
+    { 148, 156, 152, 8 }, { 148, 156, 152, 8 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 160, 9 },
+    { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 173, 164, 17 }, { 156, 173, 164, 17 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 156, 181, 168, 25 },
+    { 156, 181, 168, 25 }, { 165, 173, 169, 8 }, { 165, 173, 169, 8 }, { 156, 189, 172, 33 }, { 156, 189, 172, 33 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 },
+    { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 },
+    { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 },
+    { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 206, 197, 17 }, { 189, 206, 197, 17 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
+    { 189, 214, 201, 25 }, { 189, 214, 201, 25 }, { 198, 206, 202, 8 }, { 198, 206, 202, 8 }, { 189, 222, 205, 33 }, { 189, 222, 205, 33 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
+    { 206, 206, 206, 0 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
+    { 214, 214, 214, 0 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
+    { 222, 222, 222, 0 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 239, 230, 17 }, { 222, 239, 230, 17 }, { 231, 231, 231, 0 },
+    { 231, 231, 231, 0 }, { 222, 247, 234, 25 }, { 222, 247, 234, 25 }, { 231, 239, 235, 8 }, { 231, 239, 235, 8 }, { 222, 255, 238, 33 }, { 222, 255, 238, 33 }, { 239, 239, 239, 0 },
+    { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
+    { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
+};
+
+TableEntry g_singleColor6_2_p[256] =
+{
+    { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 },
+    { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 },
+    { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 },
+    { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 },
+    { 32, 32, 32, 0 }, { 32, 32, 32, 0 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 },
+    { 40, 40, 40, 0 }, { 40, 40, 40, 0 }, { 40, 44, 42, 4 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 48, 46, 4 }, { 48, 48, 48, 0 },
+    { 48, 48, 48, 0 }, { 48, 48, 48, 0 }, { 48, 52, 50, 4 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 56, 54, 4 }, { 56, 56, 56, 0 },
+    { 56, 56, 56, 0 }, { 56, 56, 56, 0 }, { 56, 60, 58, 4 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 65, 62, 5 }, { 60, 65, 62, 5 },
+    { 60, 69, 64, 9 }, { 65, 65, 65, 0 }, { 60, 73, 66, 13 }, { 65, 69, 67, 4 }, { 60, 77, 68, 17 }, { 69, 69, 69, 0 }, { 60, 81, 70, 21 }, { 69, 73, 71, 4 },
+    { 60, 85, 72, 25 }, { 73, 73, 73, 0 }, { 60, 89, 74, 29 }, { 73, 77, 75, 4 }, { 60, 93, 76, 33 }, { 77, 77, 77, 0 }, { 77, 77, 77, 0 }, { 77, 81, 79, 4 },
+    { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 85, 83, 4 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 89, 87, 4 },
+    { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 93, 91, 4 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 95, 4 },
+    { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 103, 4 },
+    { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 107, 4 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 113, 111, 4 },
+    { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 117, 115, 4 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 121, 119, 4 },
+    { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 125, 123, 4 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 130, 127, 5 },
+    { 125, 130, 127, 5 }, { 125, 134, 129, 9 }, { 130, 130, 130, 0 }, { 125, 138, 131, 13 }, { 130, 134, 132, 4 }, { 125, 142, 133, 17 }, { 134, 134, 134, 0 }, { 125, 146, 135, 21 },
+    { 134, 138, 136, 4 }, { 125, 150, 137, 25 }, { 138, 138, 138, 0 }, { 125, 154, 139, 29 }, { 138, 142, 140, 4 }, { 125, 158, 141, 33 }, { 142, 142, 142, 0 }, { 142, 142, 142, 0 },
+    { 142, 146, 144, 4 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 150, 148, 4 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 },
+    { 150, 154, 152, 4 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 158, 156, 4 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 },
+    { 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 },
+    { 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 172, 4 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 },
+    { 174, 178, 176, 4 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 182, 180, 4 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 },
+    { 182, 186, 184, 4 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 190, 188, 4 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 },
+    { 190, 195, 192, 5 }, { 190, 195, 192, 5 }, { 190, 199, 194, 9 }, { 195, 195, 195, 0 }, { 190, 203, 196, 13 }, { 195, 199, 197, 4 }, { 190, 207, 198, 17 }, { 199, 199, 199, 0 },
+    { 190, 211, 200, 21 }, { 199, 203, 201, 4 }, { 190, 215, 202, 25 }, { 203, 203, 203, 0 }, { 190, 219, 204, 29 }, { 203, 207, 205, 4 }, { 190, 223, 206, 33 }, { 207, 207, 207, 0 },
+    { 207, 207, 207, 0 }, { 207, 211, 209, 4 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 215, 213, 4 }, { 215, 215, 215, 0 }, { 215, 215, 215, 0 },
+    { 215, 215, 215, 0 }, { 215, 219, 217, 4 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 223, 221, 4 }, { 223, 223, 223, 0 }, { 223, 223, 223, 0 },
+    { 223, 223, 223, 0 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 },
+    { 231, 231, 231, 0 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
+    { 239, 239, 239, 0 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
+    { 247, 247, 247, 0 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
+};
+
+}}}
diff --git a/thirdparty/cvtt/ConvectionKernels_SingleFile.cpp b/thirdparty/cvtt/ConvectionKernels_SingleFile.cpp
new file mode 100644
index 0000000000..ad59988655
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_SingleFile.cpp
@@ -0,0 +1,48 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-------------------------------------------------------------------------------------
+
+Portions based on DirectX Texture Library (DirectXTex)
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+
+http://go.microsoft.com/fwlink/?LinkId=248926
+*/
+#include "ConvectionKernels_Config.h"
+
+#if defined(CVTT_SINGLE_FILE)
+#define CVTT_SINGLE_FILE_IMPL
+
+#include "ConvectionKernels_API.cpp"
+#include "ConvectionKernels_BC67.cpp"
+#include "ConvectionKernels_BC6H_IO.cpp"
+#include "ConvectionKernels_BC7_PrioData.cpp"
+#include "ConvectionKernels_BCCommon.cpp"
+#include "ConvectionKernels_ETC.cpp"
+#include "ConvectionKernels_IndexSelector.cpp"
+#include "ConvectionKernels_S3TC.cpp"
+#include "ConvectionKernels_Util.cpp"
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_UnfinishedEndpoints.h b/thirdparty/cvtt/ConvectionKernels_UnfinishedEndpoints.h
new file mode 100644
index 0000000000..371cbe54bf
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_UnfinishedEndpoints.h
@@ -0,0 +1,121 @@
+#pragma once
+
+#include "ConvectionKernels_Util.h"
+
+namespace cvtt
+{
+    namespace Internal
+    {
+        template<int TVectorSize>
+        class UnfinishedEndpoints
+        {
+        public:
+            typedef ParallelMath::Float MFloat;
+            typedef ParallelMath::UInt16 MUInt16;
+            typedef ParallelMath::UInt15 MUInt15;
+            typedef ParallelMath::SInt16 MSInt16;
+            typedef ParallelMath::SInt32 MSInt32;
+
+            UnfinishedEndpoints()
+            {
+            }
+
+            UnfinishedEndpoints(const MFloat *base, const MFloat *offset)
+            {
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    m_base[ch] = base[ch];
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    m_offset[ch] = offset[ch];
+            }
+
+            UnfinishedEndpoints(const UnfinishedEndpoints& other)
+            {
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    m_base[ch] = other.m_base[ch];
+                for (int ch = 0; ch < TVectorSize; ch++)
+                    m_offset[ch] = other.m_offset[ch];
+            }
+
+            void FinishHDRUnsigned(int tweak, int range, MSInt16 *outEP0, MSInt16 *outEP1, ParallelMath::RoundTowardNearestForScope *roundingMode)
+            {
+                float tweakFactors[2];
+                Util::ComputeTweakFactors(tweak, range, tweakFactors);
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    MUInt15 channelEPs[2];
+                    for (int epi = 0; epi < 2; epi++)
+                    {
+                        MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], 0.0f, 31743.0f);
+                        channelEPs[epi] = ParallelMath::RoundAndConvertToU15(f, roundingMode);
+                    }
+
+                    outEP0[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[0]);
+                    outEP1[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[1]);
+                }
+            }
+
+            void FinishHDRSigned(int tweak, int range, MSInt16* outEP0, MSInt16* outEP1, ParallelMath::RoundTowardNearestForScope* roundingMode)
+            {
+                float tweakFactors[2];
+                Util::ComputeTweakFactors(tweak, range, tweakFactors);
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    MSInt16 channelEPs[2];
+                    for (int epi = 0; epi < 2; epi++)
+                    {
+                        MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], -31743.0f, 31743.0f);
+                        channelEPs[epi] = ParallelMath::RoundAndConvertToS16(f, roundingMode);
+                    }
+
+                    outEP0[ch] = channelEPs[0];
+                    outEP1[ch] = channelEPs[1];
+                }
+            }
+
+            void FinishLDR(int tweak, int range, MUInt15* outEP0, MUInt15* outEP1)
+            {
+                ParallelMath::RoundTowardNearestForScope roundingMode;
+
+                float tweakFactors[2];
+                Util::ComputeTweakFactors(tweak, range, tweakFactors);
+
+                for (int ch = 0; ch < TVectorSize; ch++)
+                {
+                    MFloat ep0f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[0], 0.0f, 255.0f);
+                    MFloat ep1f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[1], 0.0f, 255.0f);
+                    outEP0[ch] = ParallelMath::RoundAndConvertToU15(ep0f, &roundingMode);
+                    outEP1[ch] = ParallelMath::RoundAndConvertToU15(ep1f, &roundingMode);
+                }
+            }
+
+            template<int TNewVectorSize>
+            UnfinishedEndpoints<TNewVectorSize> ExpandTo(float filler)
+            {
+                MFloat newBase[TNewVectorSize];
+                MFloat newOffset[TNewVectorSize];
+
+                for (int ch = 0; ch < TNewVectorSize && ch < TVectorSize; ch++)
+                {
+                    newBase[ch] = m_base[ch];
+                    newOffset[ch] = m_offset[ch];
+                }
+
+                MFloat fillerV = ParallelMath::MakeFloat(filler);
+
+                for (int ch = TVectorSize; ch < TNewVectorSize; ch++)
+                {
+                    newBase[ch] = fillerV;
+                    newOffset[ch] = ParallelMath::MakeFloatZero();
+                }
+
+                return UnfinishedEndpoints<TNewVectorSize>(newBase, newOffset);
+            }
+
+        private:
+            MFloat m_base[TVectorSize];
+            MFloat m_offset[TVectorSize];
+        };
+    }
+}
diff --git a/thirdparty/cvtt/ConvectionKernels_Util.cpp b/thirdparty/cvtt/ConvectionKernels_Util.cpp
new file mode 100644
index 0000000000..d9c25c7845
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_Util.cpp
@@ -0,0 +1,88 @@
+/*
+Convection Texture Tools
+Copyright (c) 2018-2019 Eric Lasota
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject
+to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-------------------------------------------------------------------------------------
+
+Portions based on DirectX Texture Library (DirectXTex)
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+
+http://go.microsoft.com/fwlink/?LinkId=248926
+*/
+#include "ConvectionKernels_Config.h"
+
+#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
+
+#include "ConvectionKernels.h"
+#include "ConvectionKernels_ParallelMath.h"
+
+#include <algorithm>
+
+namespace cvtt
+{
+    namespace Util
+    {
+        // Signed input blocks are converted into unsigned space, with the maximum value being 254
+        void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize])
+        {
+            for (size_t block = 0; block < ParallelMath::ParallelSize; block++)
+            {
+                const PixelBlockS8& inputSignedBlock = inputSigned[block];
+                PixelBlockU8& inputNormalizedBlock = inputNormalized[block];
+
+                for (size_t px = 0; px < 16; px++)
+                {
+                    for (size_t ch = 0; ch < 4; ch++)
+                        inputNormalizedBlock.m_pixels[px][ch] = static_cast<uint8_t>(std::max<int>(inputSignedBlock.m_pixels[px][ch], -127) + 127);
+                }
+            }
+        }
+
+        void FillWeights(const Options &options, float channelWeights[4])
+        {
+            if (options.flags & Flags::Uniform)
+                channelWeights[0] = channelWeights[1] = channelWeights[2] = channelWeights[3] = 1.0f;
+            else
+            {
+                channelWeights[0] = options.redWeight;
+                channelWeights[1] = options.greenWeight;
+                channelWeights[2] = options.blueWeight;
+                channelWeights[3] = options.alphaWeight;
+            }
+        }
+
+        void ComputeTweakFactors(int tweak, int range, float *outFactors)
+        {
+            int totalUnits = range - 1;
+            int minOutsideUnits = ((tweak >> 1) & 1);
+            int maxOutsideUnits = (tweak & 1);
+            int insideUnits = totalUnits - minOutsideUnits - maxOutsideUnits;
+
+            outFactors[0] = -static_cast<float>(minOutsideUnits) / static_cast<float>(insideUnits);
+            outFactors[1] = static_cast<float>(maxOutsideUnits) / static_cast<float>(insideUnits) + 1.0f;
+        }
+    }
+}
+
+#endif
diff --git a/thirdparty/cvtt/ConvectionKernels_Util.h b/thirdparty/cvtt/ConvectionKernels_Util.h
new file mode 100644
index 0000000000..c07b9bf2aa
--- /dev/null
+++ b/thirdparty/cvtt/ConvectionKernels_Util.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "ConvectionKernels_ParallelMath.h"
+
+namespace cvtt
+{
+    struct PixelBlockU8;
+    struct PixelBlockS8;
+    struct Options;
+}
+
+namespace cvtt
+{
+    namespace Util
+    {
+        // Signed input blocks are converted into unsigned space, with the maximum value being 254
+        void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize]);
+        void FillWeights(const Options &options, float channelWeights[4]);
+        void ComputeTweakFactors(int tweak, int range, float *outFactors);
+    }
+}
diff --git a/thirdparty/cvtt/etc_notes.txt b/thirdparty/cvtt/etc_notes.txt
new file mode 100644
index 0000000000..bb041a8435
--- /dev/null
+++ b/thirdparty/cvtt/etc_notes.txt
@@ -0,0 +1,27 @@
+The ETC1 compressor uses modified cluster fit:
+
+Assume that there exists an ideal base color and set of selectors for a given table.
+For a given table and set of selectors, the ideal base color can be determined by subtracting the offsets from each pixel and averaging them.
+Doing that is equivalent to subtracting the average offset from the average color.
+Because positive and negative selectors of the same magnitude cancel out, the search space of possible average offsets is reduced: 57 unique offsets for the first table and 81 for the others.
+Most of the offsets result in the same color as another average offset due to quantization of the base color, so those can be de-duplicated.
+So:
+- Start with a high-precision average color.
+- Apply precomputed luma offsets to it.
+- Quantize and de-duplicate the base colors.
+- Find the ideal selectors for each base color.
+
+Differential mode is solved by just finding the best legal combination from those attempts.
+
+There are several scenarios where this is not ideal:
+- Clamping behavior can sometimes be leveraged for a more accurate block.
+- Differentials can sometimes be moved slightly closer to become legal.
+- This only works when MSE is the error metric (i.e. not normal maps)
+- This only works when pixel weights are of equal importance (i.e. not using weight by alpha or edge deblocking)
+
+T and H mode just work by generating clustering assignments by computing a chrominance line and splitting the block in half by the chrominance midpoint and using those to determine the averages.
+
+Planar mode is just solved algebraically.
+
+If you want to emulate etc2comp's default settings, add the flag ETC_UseFakeBT709 to use its modified Rec. 709 error coefficients.
+Doing that will significantly slow down encoding because it requires much more complicated quantization math.
+\ No newline at end of file
diff --git a/thirdparty/misc/patches/polypartition-godot-types.patch b/thirdparty/misc/patches/polypartition-godot-types.patch
index 782f02e8dc..61737f9fd2 100644
--- a/thirdparty/misc/patches/polypartition-godot-types.patch
+++ b/thirdparty/misc/patches/polypartition-godot-types.patch
@@ -1,19 +1,16 @@
 diff --git a/thirdparty/misc/polypartition.cpp b/thirdparty/misc/polypartition.cpp
-index 3a8a6efa83..5e94793b79 100644
+index 3a8a6efa83..8c5409bf24 100644
 --- a/thirdparty/misc/polypartition.cpp
 +++ b/thirdparty/misc/polypartition.cpp
-@@ -23,10 +23,7 @@
- 
- #include "polypartition.h"
- 
--#include <math.h>
--#include <string.h>
+@@ -26,7 +26,6 @@
+ #include <math.h>
+ #include <string.h>
  #include <algorithm>
 -#include <vector>
  
  TPPLPoly::TPPLPoly() {
    hole = false;
-@@ -186,7 +183,7 @@ int TPPLPartition::Intersects(TPPLPoint &p11, TPPLPoint &p12, TPPLPoint &p21, TP
+@@ -186,7 +185,7 @@ int TPPLPartition::Intersects(TPPLPoint &p11, TPPLPoint &p12, TPPLPoint &p21, TP
  // Removes holes from inpolys by merging them with non-holes.
  int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
    TPPLPolyList polys;
@@ -22,7 +19,7 @@ index 3a8a6efa83..5e94793b79 100644
    long i, i2, holepointindex, polypointindex;
    TPPLPoint holepoint, polypoint, bestpolypoint;
    TPPLPoint linep1, linep2;
-@@ -198,15 +195,15 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
+@@ -198,15 +197,15 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
  
    // Check for the trivial case of no holes.
    hasholes = false;
@@ -42,7 +39,7 @@ index 3a8a6efa83..5e94793b79 100644
      }
      return 1;
    }
-@@ -216,8 +213,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
+@@ -216,8 +215,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
    while (1) {
      // Find the hole point with the largest x.
      hasholes = false;
@@ -53,7 +50,7 @@ index 3a8a6efa83..5e94793b79 100644
          continue;
        }
  
-@@ -227,8 +224,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
+@@ -227,8 +226,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
          holepointindex = 0;
        }
  
@@ -64,7 +61,7 @@ index 3a8a6efa83..5e94793b79 100644
            holeiter = iter;
            holepointindex = i;
          }
-@@ -237,24 +234,24 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
+@@ -237,24 +236,24 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
      if (!hasholes) {
        break;
      }
@@ -98,7 +95,7 @@ index 3a8a6efa83..5e94793b79 100644
          if (pointfound) {
            v1 = Normalize(polypoint - holepoint);
            v2 = Normalize(bestpolypoint - holepoint);
-@@ -263,13 +260,13 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
+@@ -263,13 +262,13 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
            }
          }
          pointvisible = true;
@@ -117,7 +114,7 @@ index 3a8a6efa83..5e94793b79 100644
              if (Intersects(holepoint, polypoint, linep1, linep2)) {
                pointvisible = false;
                break;
-@@ -292,18 +289,18 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
+@@ -292,18 +291,18 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
        return 0;
      }
  
@@ -142,7 +139,7 @@ index 3a8a6efa83..5e94793b79 100644
        i2++;
      }
  
-@@ -312,8 +309,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
+@@ -312,8 +311,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) {
      polys.push_back(newpoly);
    }
  
@@ -153,7 +150,7 @@ index 3a8a6efa83..5e94793b79 100644
    }
  
    return 1;
-@@ -524,13 +521,13 @@ int TPPLPartition::Triangulate_EC(TPPLPoly *poly, TPPLPolyList *triangles) {
+@@ -524,13 +523,13 @@ int TPPLPartition::Triangulate_EC(TPPLPoly *poly, TPPLPolyList *triangles) {
  
  int TPPLPartition::Triangulate_EC(TPPLPolyList *inpolys, TPPLPolyList *triangles) {
    TPPLPolyList outpolys;
@@ -170,7 +167,7 @@ index 3a8a6efa83..5e94793b79 100644
        return 0;
      }
    }
-@@ -543,7 +540,7 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -543,7 +542,7 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) {
    }
  
    TPPLPolyList triangles;
@@ -179,7 +176,7 @@ index 3a8a6efa83..5e94793b79 100644
    TPPLPoly *poly1 = NULL, *poly2 = NULL;
    TPPLPoly newpoly;
    TPPLPoint d1, d2, p1, p2, p3;
-@@ -578,19 +575,19 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -578,19 +577,19 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) {
      return 0;
    }
  
@@ -203,7 +200,7 @@ index 3a8a6efa83..5e94793b79 100644
  
          for (i21 = 0; i21 < poly2->GetNumPoints(); i21++) {
            if ((d2.x != poly2->GetPoint(i21).x) || (d2.y != poly2->GetPoint(i21).y)) {
-@@ -660,16 +657,16 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -660,16 +659,16 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) {
        }
  
        triangles.erase(iter2);
@@ -224,7 +221,7 @@ index 3a8a6efa83..5e94793b79 100644
    }
  
    return 1;
-@@ -677,13 +674,13 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -677,13 +676,13 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) {
  
  int TPPLPartition::ConvexPartition_HM(TPPLPolyList *inpolys, TPPLPolyList *parts) {
    TPPLPolyList outpolys;
@@ -241,7 +238,7 @@ index 3a8a6efa83..5e94793b79 100644
        return 0;
      }
    }
-@@ -824,8 +821,8 @@ int TPPLPartition::Triangulate_OPT(TPPLPoly *poly, TPPLPolyList *triangles) {
+@@ -824,8 +823,8 @@ int TPPLPartition::Triangulate_OPT(TPPLPoly *poly, TPPLPolyList *triangles) {
    newdiagonal.index1 = 0;
    newdiagonal.index2 = n - 1;
    diagonals.push_back(newdiagonal);
@@ -252,7 +249,7 @@ index 3a8a6efa83..5e94793b79 100644
      diagonals.pop_front();
      bestvertex = dpstates[diagonal.index2][diagonal.index1].bestvertex;
      if (bestvertex == -1) {
-@@ -873,10 +870,10 @@ void TPPLPartition::UpdateState(long a, long b, long w, long i, long j, DPState2
+@@ -873,10 +872,10 @@ void TPPLPartition::UpdateState(long a, long b, long w, long i, long j, DPState2
      pairs->push_front(newdiagonal);
      dpstates[a][b].weight = w;
    } else {
@@ -265,7 +262,7 @@ index 3a8a6efa83..5e94793b79 100644
        pairs->pop_front();
      }
      pairs->push_front(newdiagonal);
-@@ -885,7 +882,7 @@ void TPPLPartition::UpdateState(long a, long b, long w, long i, long j, DPState2
+@@ -885,7 +884,7 @@ void TPPLPartition::UpdateState(long a, long b, long w, long i, long j, DPState2
  
  void TPPLPartition::TypeA(long i, long j, long k, PartitionVertex *vertices, DPState2 **dpstates) {
    DiagonalList *pairs = NULL;
@@ -274,7 +271,7 @@ index 3a8a6efa83..5e94793b79 100644
    long top;
    long w;
  
-@@ -902,23 +899,23 @@ void TPPLPartition::TypeA(long i, long j, long k, PartitionVertex *vertices, DPS
+@@ -902,23 +901,23 @@ void TPPLPartition::TypeA(long i, long j, long k, PartitionVertex *vertices, DPS
    }
    if (j - i > 1) {
      pairs = &(dpstates[i][j].pairs);
@@ -305,7 +302,7 @@ index 3a8a6efa83..5e94793b79 100644
        }
      }
    }
-@@ -927,7 +924,7 @@ void TPPLPartition::TypeA(long i, long j, long k, PartitionVertex *vertices, DPS
+@@ -927,7 +926,7 @@ void TPPLPartition::TypeA(long i, long j, long k, PartitionVertex *vertices, DPS
  
  void TPPLPartition::TypeB(long i, long j, long k, PartitionVertex *vertices, DPState2 **dpstates) {
    DiagonalList *pairs = NULL;
@@ -314,7 +311,7 @@ index 3a8a6efa83..5e94793b79 100644
    long top;
    long w;
  
-@@ -946,21 +943,21 @@ void TPPLPartition::TypeB(long i, long j, long k, PartitionVertex *vertices, DPS
+@@ -946,21 +945,21 @@ void TPPLPartition::TypeB(long i, long j, long k, PartitionVertex *vertices, DPS
    if (k - j > 1) {
      pairs = &(dpstates[j][k].pairs);
  
@@ -343,7 +340,7 @@ index 3a8a6efa83..5e94793b79 100644
        }
      } else {
        w++;
-@@ -981,11 +978,11 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -981,11 +980,11 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
    DiagonalList diagonals, diagonals2;
    Diagonal diagonal, newdiagonal;
    DiagonalList *pairs = NULL, *pairs2 = NULL;
@@ -358,7 +355,7 @@ index 3a8a6efa83..5e94793b79 100644
    bool ijreal, jkreal;
  
    n = poly->GetNumPoints();
-@@ -1110,35 +1107,35 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -1110,35 +1109,35 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
    newdiagonal.index1 = 0;
    newdiagonal.index2 = n - 1;
    diagonals.push_front(newdiagonal);
@@ -403,7 +400,7 @@ index 3a8a6efa83..5e94793b79 100644
                pairs2->pop_back();
              } else {
                break;
-@@ -1153,21 +1150,21 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -1153,21 +1152,21 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
          diagonals.push_front(newdiagonal);
        }
      } else {
@@ -431,7 +428,7 @@ index 3a8a6efa83..5e94793b79 100644
                pairs2->pop_front();
              } else {
                break;
-@@ -1197,8 +1194,8 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -1197,8 +1196,8 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
    newdiagonal.index1 = 0;
    newdiagonal.index2 = n - 1;
    diagonals.push_front(newdiagonal);
@@ -442,7 +439,7 @@ index 3a8a6efa83..5e94793b79 100644
      diagonals.pop_front();
      if ((diagonal.index2 - diagonal.index1) <= 1) {
        continue;
-@@ -1210,8 +1207,8 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -1210,8 +1209,8 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
      indices.push_back(diagonal.index2);
      diagonals2.push_front(diagonal);
  
@@ -453,7 +450,7 @@ index 3a8a6efa83..5e94793b79 100644
        diagonals2.pop_front();
        if ((diagonal.index2 - diagonal.index1) <= 1) {
          continue;
-@@ -1220,16 +1217,16 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -1220,16 +1219,16 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
        jkreal = true;
        pairs = &(dpstates[diagonal.index1][diagonal.index2].pairs);
        if (!vertices[diagonal.index1].isConvex) {
@@ -476,7 +473,7 @@ index 3a8a6efa83..5e94793b79 100644
            jkreal = false;
          }
        }
-@@ -1253,11 +1250,12 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -1253,11 +1252,12 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
        indices.push_back(j);
      }
  
@@ -492,7 +489,7 @@ index 3a8a6efa83..5e94793b79 100644
        k++;
      }
      parts->push_back(newpoly);
-@@ -1281,7 +1279,7 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
+@@ -1281,7 +1281,7 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) {
  // "Computational Geometry: Algorithms and Applications"
  // by Mark de Berg, Otfried Cheong, Marc van Kreveld, and Mark Overmars.
  int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monotonePolys) {
@@ -501,7 +498,7 @@ index 3a8a6efa83..5e94793b79 100644
    MonotoneVertex *vertices = NULL;
    long i, numvertices, vindex, vindex2, newnumvertices, maxnumvertices;
    long polystartindex, polyendindex;
-@@ -1291,11 +1289,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
+@@ -1291,11 +1291,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
    bool error = false;
  
    numvertices = 0;
@@ -515,7 +512,7 @@ index 3a8a6efa83..5e94793b79 100644
    }
  
    maxnumvertices = numvertices * 3;
-@@ -1303,8 +1298,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
+@@ -1303,8 +1300,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
    newnumvertices = numvertices;
  
    polystartindex = 0;
@@ -526,7 +523,7 @@ index 3a8a6efa83..5e94793b79 100644
      polyendindex = polystartindex + poly->GetNumPoints() - 1;
      for (i = 0; i < poly->GetNumPoints(); i++) {
        vertices[i + polystartindex].p = poly->GetPoint(i);
-@@ -1360,14 +1355,14 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
+@@ -1360,14 +1357,14 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
    // Note that while set doesn't actually have to be implemented as
    // a tree, complexity requirements for operations are the same as
    // for the balanced binary search tree.
@@ -546,7 +543,7 @@ index 3a8a6efa83..5e94793b79 100644
    }
  
    // For each vertex.
-@@ -1387,13 +1382,14 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
+@@ -1387,13 +1384,14 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
          newedge.p1 = v->p;
          newedge.p2 = vertices[v->next].p;
          newedge.index = vindex;
@@ -564,7 +561,7 @@ index 3a8a6efa83..5e94793b79 100644
            error = true;
            break;
          }
-@@ -1412,29 +1408,30 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
+@@ -1412,29 +1410,30 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
          newedge.p1 = v->p;
          newedge.p2 = v->p;
          edgeIter = edgeTree.lower_bound(newedge);
@@ -601,7 +598,7 @@ index 3a8a6efa83..5e94793b79 100644
            error = true;
            break;
          }
-@@ -1452,25 +1449,25 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
+@@ -1452,25 +1451,25 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
          newedge.p1 = v->p;
          newedge.p2 = v->p;
          edgeIter = edgeTree.lower_bound(newedge);
@@ -632,7 +629,7 @@ index 3a8a6efa83..5e94793b79 100644
              error = true;
              break;
            }
-@@ -1488,27 +1485,28 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
+@@ -1488,27 +1487,28 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
            newedge.p1 = v2->p;
            newedge.p2 = vertices[v2->next].p;
            newedge.index = vindex2;
@@ -668,7 +665,7 @@ index 3a8a6efa83..5e94793b79 100644
          }
          break;
      }
-@@ -1569,8 +1567,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
+@@ -1569,8 +1569,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto
  
  // Adds a diagonal to the doubly-connected list of vertices.
  void TPPLPartition::AddDiagonal(MonotoneVertex *vertices, long *numvertices, long index1, long index2,
@@ -679,7 +676,7 @@ index 3a8a6efa83..5e94793b79 100644
    long newindex1, newindex2;
  
    newindex1 = *numvertices;
-@@ -1597,14 +1595,14 @@ void TPPLPartition::AddDiagonal(MonotoneVertex *vertices, long *numvertices, lon
+@@ -1597,14 +1597,14 @@ void TPPLPartition::AddDiagonal(MonotoneVertex *vertices, long *numvertices, lon
    vertextypes[newindex1] = vertextypes[index1];
    edgeTreeIterators[newindex1] = edgeTreeIterators[index1];
    helpers[newindex1] = helpers[index1];
@@ -698,7 +695,7 @@ index 3a8a6efa83..5e94793b79 100644
    }
  }
  
-@@ -1830,13 +1828,13 @@ int TPPLPartition::TriangulateMonotone(TPPLPoly *inPoly, TPPLPolyList *triangles
+@@ -1830,13 +1830,13 @@ int TPPLPartition::TriangulateMonotone(TPPLPoly *inPoly, TPPLPolyList *triangles
  
  int TPPLPartition::Triangulate_MONO(TPPLPolyList *inpolys, TPPLPolyList *triangles) {
    TPPLPolyList monotone;
diff --git a/thirdparty/misc/polypartition.cpp b/thirdparty/misc/polypartition.cpp
index 5e94793b79..8c5409bf24 100644
--- a/thirdparty/misc/polypartition.cpp
+++ b/thirdparty/misc/polypartition.cpp
@@ -23,6 +23,8 @@
 
 #include "polypartition.h"
 
+#include <math.h>
+#include <string.h>
 #include <algorithm>
 
 TPPLPoly::TPPLPoly() {